summaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 17:31:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 17:31:10 -0500
commitb2fe5fa68642860e7de76167c3111623aa0d5de1 (patch)
treeb7f9b89b7039ecefbc35fe3c8e73a6ff972641dd /drivers/vhost
parenta103950e0dd2058df5e8a8d4a915707bdcf205f0 (diff)
parenta54667f6728c2714a400f3c884727da74b6d1717 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Significantly shrink the core networking routing structures. Result of http://vger.kernel.org/~davem/seoul2017_netdev_keynote.pdf 2) Add netdevsim driver for testing various offloads, from Jakub Kicinski. 3) Support cross-chip FDB operations in DSA, from Vivien Didelot. 4) Add a 2nd listener hash table for TCP, similar to what was done for UDP. From Martin KaFai Lau. 5) Add eBPF based queue selection to tun, from Jason Wang. 6) Lockless qdisc support, from John Fastabend. 7) SCTP stream interleave support, from Xin Long. 8) Smoother TCP receive autotuning, from Eric Dumazet. 9) Lots of erspan tunneling enhancements, from William Tu. 10) Add true function call support to BPF, from Alexei Starovoitov. 11) Add explicit support for GRO HW offloading, from Michael Chan. 12) Support extack generation in more netlink subsystems. From Alexander Aring, Quentin Monnet, and Jakub Kicinski. 13) Add 1000BaseX, flow control, and EEE support to mvneta driver. From Russell King. 14) Add flow table abstraction to netfilter, from Pablo Neira Ayuso. 15) Many improvements and simplifications to the NFP driver bpf JIT, from Jakub Kicinski. 16) Support for ipv6 non-equal cost multipath routing, from Ido Schimmel. 17) Add resource abstration to devlink, from Arkadi Sharshevsky. 18) Packet scheduler classifier shared filter block support, from Jiri Pirko. 19) Avoid locking in act_csum, from Davide Caratti. 20) devinet_ioctl() simplifications from Al viro. 21) More TCP bpf improvements from Lawrence Brakmo. 22) Add support for onlink ipv6 route flag, similar to ipv4, from David Ahern. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1925 commits) tls: Add support for encryption using async offload accelerator ip6mr: fix stale iterator net/sched: kconfig: Remove blank help texts openvswitch: meter: Use 64-bit arithmetic instead of 32-bit tcp_nv: fix potential integer overflow in tcpnv_acked r8169: fix RTL8168EP take too long to complete driver initialization. qmi_wwan: Add support for Quectel EP06 rtnetlink: enable IFLA_IF_NETNSID for RTM_NEWLINK ipmr: Fix ptrdiff_t print formatting ibmvnic: Wait for device response when changing MAC qlcnic: fix deadlock bug tcp: release sk_frag.page in tcp_disconnect ipv4: Get the address of interface correctly. net_sched: gen_estimator: fix lockdep splat net: macb: Handle HRESP error net/mlx5e: IPoIB, Fix copy-paste bug in flow steering refactoring ipv6: addrconf: break critical section in addrconf_verify_rtnl() ipv6: change route cache aging logic i40e/i40evf: Update DESC_NEEDED value to reflect larger value bnxt_en: cleanup DIM work on device shutdown ...
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c68
1 files changed, 44 insertions, 24 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9524ee16878a..9c3f8160ef24 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -89,7 +89,7 @@ struct vhost_net_ubuf_ref {
89 89
90#define VHOST_RX_BATCH 64 90#define VHOST_RX_BATCH 64
91struct vhost_net_buf { 91struct vhost_net_buf {
92 struct sk_buff **queue; 92 void **queue;
93 int tail; 93 int tail;
94 int head; 94 int head;
95}; 95};
@@ -108,7 +108,7 @@ struct vhost_net_virtqueue {
108 /* Reference counting for outstanding ubufs. 108 /* Reference counting for outstanding ubufs.
109 * Protected by vq mutex. Writers must also take device mutex. */ 109 * Protected by vq mutex. Writers must also take device mutex. */
110 struct vhost_net_ubuf_ref *ubufs; 110 struct vhost_net_ubuf_ref *ubufs;
111 struct skb_array *rx_array; 111 struct ptr_ring *rx_ring;
112 struct vhost_net_buf rxq; 112 struct vhost_net_buf rxq;
113}; 113};
114 114
@@ -158,7 +158,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
158 struct vhost_net_buf *rxq = &nvq->rxq; 158 struct vhost_net_buf *rxq = &nvq->rxq;
159 159
160 rxq->head = 0; 160 rxq->head = 0;
161 rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue, 161 rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
162 VHOST_RX_BATCH); 162 VHOST_RX_BATCH);
163 return rxq->tail; 163 return rxq->tail;
164} 164}
@@ -167,13 +167,25 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
167{ 167{
168 struct vhost_net_buf *rxq = &nvq->rxq; 168 struct vhost_net_buf *rxq = &nvq->rxq;
169 169
170 if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) { 170 if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) {
171 skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head, 171 ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head,
172 vhost_net_buf_get_size(rxq)); 172 vhost_net_buf_get_size(rxq),
173 __skb_array_destroy_skb);
173 rxq->head = rxq->tail = 0; 174 rxq->head = rxq->tail = 0;
174 } 175 }
175} 176}
176 177
178static int vhost_net_buf_peek_len(void *ptr)
179{
180 if (tun_is_xdp_buff(ptr)) {
181 struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
182
183 return xdp->data_end - xdp->data;
184 }
185
186 return __skb_array_len_with_tag(ptr);
187}
188
177static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) 189static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
178{ 190{
179 struct vhost_net_buf *rxq = &nvq->rxq; 191 struct vhost_net_buf *rxq = &nvq->rxq;
@@ -185,7 +197,7 @@ static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
185 return 0; 197 return 0;
186 198
187out: 199out:
188 return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq)); 200 return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq));
189} 201}
190 202
191static void vhost_net_buf_init(struct vhost_net_buf *rxq) 203static void vhost_net_buf_init(struct vhost_net_buf *rxq)
@@ -583,7 +595,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
583 int len = 0; 595 int len = 0;
584 unsigned long flags; 596 unsigned long flags;
585 597
586 if (rvq->rx_array) 598 if (rvq->rx_ring)
587 return vhost_net_buf_peek(rvq); 599 return vhost_net_buf_peek(rvq);
588 600
589 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); 601 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
@@ -744,7 +756,7 @@ static void handle_rx(struct vhost_net *net)
744 }; 756 };
745 size_t total_len = 0; 757 size_t total_len = 0;
746 int err, mergeable; 758 int err, mergeable;
747 s16 headcount; 759 s16 headcount, nheads = 0;
748 size_t vhost_hlen, sock_hlen; 760 size_t vhost_hlen, sock_hlen;
749 size_t vhost_len, sock_len; 761 size_t vhost_len, sock_len;
750 struct socket *sock; 762 struct socket *sock;
@@ -772,7 +784,7 @@ static void handle_rx(struct vhost_net *net)
772 while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { 784 while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
773 sock_len += sock_hlen; 785 sock_len += sock_hlen;
774 vhost_len = sock_len + vhost_hlen; 786 vhost_len = sock_len + vhost_hlen;
775 headcount = get_rx_bufs(vq, vq->heads, vhost_len, 787 headcount = get_rx_bufs(vq, vq->heads + nheads, vhost_len,
776 &in, vq_log, &log, 788 &in, vq_log, &log,
777 likely(mergeable) ? UIO_MAXIOV : 1); 789 likely(mergeable) ? UIO_MAXIOV : 1);
778 /* On error, stop handling until the next kick. */ 790 /* On error, stop handling until the next kick. */
@@ -790,7 +802,7 @@ static void handle_rx(struct vhost_net *net)
790 * they refilled. */ 802 * they refilled. */
791 goto out; 803 goto out;
792 } 804 }
793 if (nvq->rx_array) 805 if (nvq->rx_ring)
794 msg.msg_control = vhost_net_buf_consume(&nvq->rxq); 806 msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
795 /* On overrun, truncate and discard */ 807 /* On overrun, truncate and discard */
796 if (unlikely(headcount > UIO_MAXIOV)) { 808 if (unlikely(headcount > UIO_MAXIOV)) {
@@ -844,8 +856,12 @@ static void handle_rx(struct vhost_net *net)
844 vhost_discard_vq_desc(vq, headcount); 856 vhost_discard_vq_desc(vq, headcount);
845 goto out; 857 goto out;
846 } 858 }
847 vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, 859 nheads += headcount;
848 headcount); 860 if (nheads > VHOST_RX_BATCH) {
861 vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
862 nheads);
863 nheads = 0;
864 }
849 if (unlikely(vq_log)) 865 if (unlikely(vq_log))
850 vhost_log_write(vq, vq_log, log, vhost_len); 866 vhost_log_write(vq, vq_log, log, vhost_len);
851 total_len += vhost_len; 867 total_len += vhost_len;
@@ -856,6 +872,9 @@ static void handle_rx(struct vhost_net *net)
856 } 872 }
857 vhost_net_enable_vq(net, vq); 873 vhost_net_enable_vq(net, vq);
858out: 874out:
875 if (nheads)
876 vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
877 nheads);
859 mutex_unlock(&vq->mutex); 878 mutex_unlock(&vq->mutex);
860} 879}
861 880
@@ -896,7 +915,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
896 struct vhost_net *n; 915 struct vhost_net *n;
897 struct vhost_dev *dev; 916 struct vhost_dev *dev;
898 struct vhost_virtqueue **vqs; 917 struct vhost_virtqueue **vqs;
899 struct sk_buff **queue; 918 void **queue;
900 int i; 919 int i;
901 920
902 n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); 921 n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
@@ -908,7 +927,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
908 return -ENOMEM; 927 return -ENOMEM;
909 } 928 }
910 929
911 queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *), 930 queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *),
912 GFP_KERNEL); 931 GFP_KERNEL);
913 if (!queue) { 932 if (!queue) {
914 kfree(vqs); 933 kfree(vqs);
@@ -1046,23 +1065,23 @@ err:
1046 return ERR_PTR(r); 1065 return ERR_PTR(r);
1047} 1066}
1048 1067
1049static struct skb_array *get_tap_skb_array(int fd) 1068static struct ptr_ring *get_tap_ptr_ring(int fd)
1050{ 1069{
1051 struct skb_array *array; 1070 struct ptr_ring *ring;
1052 struct file *file = fget(fd); 1071 struct file *file = fget(fd);
1053 1072
1054 if (!file) 1073 if (!file)
1055 return NULL; 1074 return NULL;
1056 array = tun_get_skb_array(file); 1075 ring = tun_get_tx_ring(file);
1057 if (!IS_ERR(array)) 1076 if (!IS_ERR(ring))
1058 goto out; 1077 goto out;
1059 array = tap_get_skb_array(file); 1078 ring = tap_get_ptr_ring(file);
1060 if (!IS_ERR(array)) 1079 if (!IS_ERR(ring))
1061 goto out; 1080 goto out;
1062 array = NULL; 1081 ring = NULL;
1063out: 1082out:
1064 fput(file); 1083 fput(file);
1065 return array; 1084 return ring;
1066} 1085}
1067 1086
1068static struct socket *get_tap_socket(int fd) 1087static struct socket *get_tap_socket(int fd)
@@ -1143,7 +1162,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
1143 vq->private_data = sock; 1162 vq->private_data = sock;
1144 vhost_net_buf_unproduce(nvq); 1163 vhost_net_buf_unproduce(nvq);
1145 if (index == VHOST_NET_VQ_RX) 1164 if (index == VHOST_NET_VQ_RX)
1146 nvq->rx_array = get_tap_skb_array(fd); 1165 nvq->rx_ring = get_tap_ptr_ring(fd);
1147 r = vhost_vq_init_access(vq); 1166 r = vhost_vq_init_access(vq);
1148 if (r) 1167 if (r)
1149 goto err_used; 1168 goto err_used;
@@ -1208,6 +1227,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
1208 } 1227 }
1209 vhost_net_stop(n, &tx_sock, &rx_sock); 1228 vhost_net_stop(n, &tx_sock, &rx_sock);
1210 vhost_net_flush(n); 1229 vhost_net_flush(n);
1230 vhost_dev_stop(&n->dev);
1211 vhost_dev_reset_owner(&n->dev, umem); 1231 vhost_dev_reset_owner(&n->dev, umem);
1212 vhost_net_vq_reset(n); 1232 vhost_net_vq_reset(n);
1213done: 1233done: