diff options
author | Jason Wang <jasowang@redhat.com> | 2013-04-10 16:50:48 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-04-11 16:16:22 -0400 |
commit | 70181d51209cbcdf9ce2171eac3f3458281d2947 (patch) | |
tree | 5f7994cf14c33cc97bd3447ed09d2ae911e26d74 | |
parent | 16e3d9648add0b3839587fd77f510537baf88c3a (diff) |
vhost_net: remove tx polling state
After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
errors when setting backend), we in fact track the polling state through
poll->wqh, so there's no need to duplicate the work with an extra
vhost_net_polling_state. So this patch removes this and make the code simpler.
This patch also removes the all tx starting/stopping code in tx path according
to Michael's suggestion.
Netperf test shows almost the same result in stream test, but gets improvements
on TCP_RR tests (both zerocopy or copy) especially on low load cases.
Tested between multiqueue kvm guest and external host with two direct
connected 82599s.
zerocopy disabled:
sessions|transaction rates|normalize|
before/after/+improvements
1 | 9510.24/11727.29/+23.3% | 693.54/887.68/+28.0% |
25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% |
50| 277634.64/291905.76/+5% | 3118.36/3230.11/+3.6% |
zerocopy enabled:
sessions|transaction rates|normalize|
before/after/+improvements
1 | 7318.33/11929.76/+63.0% | 521.86/843.30/+61.6% |
25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% |
50| 272181.02/294347.04/+8.1% | 3071.56/3257.85/+6.1% |
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/vhost/net.c | 74 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 3 |
2 files changed, 9 insertions, 68 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ec6fb3fa59bb..87c216c1e54e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -64,20 +64,10 @@ enum { | |||
64 | VHOST_NET_VQ_MAX = 2, | 64 | VHOST_NET_VQ_MAX = 2, |
65 | }; | 65 | }; |
66 | 66 | ||
67 | enum vhost_net_poll_state { | ||
68 | VHOST_NET_POLL_DISABLED = 0, | ||
69 | VHOST_NET_POLL_STARTED = 1, | ||
70 | VHOST_NET_POLL_STOPPED = 2, | ||
71 | }; | ||
72 | |||
73 | struct vhost_net { | 67 | struct vhost_net { |
74 | struct vhost_dev dev; | 68 | struct vhost_dev dev; |
75 | struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; | 69 | struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; |
76 | struct vhost_poll poll[VHOST_NET_VQ_MAX]; | 70 | struct vhost_poll poll[VHOST_NET_VQ_MAX]; |
77 | /* Tells us whether we are polling a socket for TX. | ||
78 | * We only do this when socket buffer fills up. | ||
79 | * Protected by tx vq lock. */ | ||
80 | enum vhost_net_poll_state tx_poll_state; | ||
81 | /* Number of TX recently submitted. | 71 | /* Number of TX recently submitted. |
82 | * Protected by tx vq lock. */ | 72 | * Protected by tx vq lock. */ |
83 | unsigned tx_packets; | 73 | unsigned tx_packets; |
@@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, | |||
155 | } | 145 | } |
156 | } | 146 | } |
157 | 147 | ||
158 | /* Caller must have TX VQ lock */ | ||
159 | static void tx_poll_stop(struct vhost_net *net) | ||
160 | { | ||
161 | if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) | ||
162 | return; | ||
163 | vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); | ||
164 | net->tx_poll_state = VHOST_NET_POLL_STOPPED; | ||
165 | } | ||
166 | |||
167 | /* Caller must have TX VQ lock */ | ||
168 | static int tx_poll_start(struct vhost_net *net, struct socket *sock) | ||
169 | { | ||
170 | int ret; | ||
171 | |||
172 | if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) | ||
173 | return 0; | ||
174 | ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); | ||
175 | if (!ret) | ||
176 | net->tx_poll_state = VHOST_NET_POLL_STARTED; | ||
177 | return ret; | ||
178 | } | ||
179 | |||
180 | /* In case of DMA done not in order in lower device driver for some reason. | 148 | /* In case of DMA done not in order in lower device driver for some reason. |
181 | * upend_idx is used to track end of used idx, done_idx is used to track head | 149 | * upend_idx is used to track end of used idx, done_idx is used to track head |
182 | * of used idx. Once lower device DMA done contiguously, we will signal KVM | 150 | * of used idx. Once lower device DMA done contiguously, we will signal KVM |
@@ -242,7 +210,7 @@ static void handle_tx(struct vhost_net *net) | |||
242 | .msg_flags = MSG_DONTWAIT, | 210 | .msg_flags = MSG_DONTWAIT, |
243 | }; | 211 | }; |
244 | size_t len, total_len = 0; | 212 | size_t len, total_len = 0; |
245 | int err, wmem; | 213 | int err; |
246 | size_t hdr_size; | 214 | size_t hdr_size; |
247 | struct socket *sock; | 215 | struct socket *sock; |
248 | struct vhost_ubuf_ref *uninitialized_var(ubufs); | 216 | struct vhost_ubuf_ref *uninitialized_var(ubufs); |
@@ -253,19 +221,9 @@ static void handle_tx(struct vhost_net *net) | |||
253 | if (!sock) | 221 | if (!sock) |
254 | return; | 222 | return; |
255 | 223 | ||
256 | wmem = atomic_read(&sock->sk->sk_wmem_alloc); | ||
257 | if (wmem >= sock->sk->sk_sndbuf) { | ||
258 | mutex_lock(&vq->mutex); | ||
259 | tx_poll_start(net, sock); | ||
260 | mutex_unlock(&vq->mutex); | ||
261 | return; | ||
262 | } | ||
263 | |||
264 | mutex_lock(&vq->mutex); | 224 | mutex_lock(&vq->mutex); |
265 | vhost_disable_notify(&net->dev, vq); | 225 | vhost_disable_notify(&net->dev, vq); |
266 | 226 | ||
267 | if (wmem < sock->sk->sk_sndbuf / 2) | ||
268 | tx_poll_stop(net); | ||
269 | hdr_size = vq->vhost_hlen; | 227 | hdr_size = vq->vhost_hlen; |
270 | zcopy = vq->ubufs; | 228 | zcopy = vq->ubufs; |
271 | 229 | ||
@@ -285,23 +243,14 @@ static void handle_tx(struct vhost_net *net) | |||
285 | if (head == vq->num) { | 243 | if (head == vq->num) { |
286 | int num_pends; | 244 | int num_pends; |
287 | 245 | ||
288 | wmem = atomic_read(&sock->sk->sk_wmem_alloc); | ||
289 | if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { | ||
290 | tx_poll_start(net, sock); | ||
291 | set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
292 | break; | ||
293 | } | ||
294 | /* If more outstanding DMAs, queue the work. | 246 | /* If more outstanding DMAs, queue the work. |
295 | * Handle upend_idx wrap around | 247 | * Handle upend_idx wrap around |
296 | */ | 248 | */ |
297 | num_pends = likely(vq->upend_idx >= vq->done_idx) ? | 249 | num_pends = likely(vq->upend_idx >= vq->done_idx) ? |
298 | (vq->upend_idx - vq->done_idx) : | 250 | (vq->upend_idx - vq->done_idx) : |
299 | (vq->upend_idx + UIO_MAXIOV - vq->done_idx); | 251 | (vq->upend_idx + UIO_MAXIOV - vq->done_idx); |
300 | if (unlikely(num_pends > VHOST_MAX_PEND)) { | 252 | if (unlikely(num_pends > VHOST_MAX_PEND)) |
301 | tx_poll_start(net, sock); | ||
302 | set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
303 | break; | 253 | break; |
304 | } | ||
305 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { | 254 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { |
306 | vhost_disable_notify(&net->dev, vq); | 255 | vhost_disable_notify(&net->dev, vq); |
307 | continue; | 256 | continue; |
@@ -364,8 +313,6 @@ static void handle_tx(struct vhost_net *net) | |||
364 | UIO_MAXIOV; | 313 | UIO_MAXIOV; |
365 | } | 314 | } |
366 | vhost_discard_vq_desc(vq, 1); | 315 | vhost_discard_vq_desc(vq, 1); |
367 | if (err == -EAGAIN || err == -ENOBUFS) | ||
368 | tx_poll_start(net, sock); | ||
369 | break; | 316 | break; |
370 | } | 317 | } |
371 | if (err != len) | 318 | if (err != len) |
@@ -628,7 +575,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) | |||
628 | 575 | ||
629 | vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); | 576 | vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); |
630 | vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); | 577 | vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); |
631 | n->tx_poll_state = VHOST_NET_POLL_DISABLED; | ||
632 | 578 | ||
633 | f->private_data = n; | 579 | f->private_data = n; |
634 | 580 | ||
@@ -638,32 +584,24 @@ static int vhost_net_open(struct inode *inode, struct file *f) | |||
638 | static void vhost_net_disable_vq(struct vhost_net *n, | 584 | static void vhost_net_disable_vq(struct vhost_net *n, |
639 | struct vhost_virtqueue *vq) | 585 | struct vhost_virtqueue *vq) |
640 | { | 586 | { |
587 | struct vhost_poll *poll = n->poll + (vq - n->vqs); | ||
641 | if (!vq->private_data) | 588 | if (!vq->private_data) |
642 | return; | 589 | return; |
643 | if (vq == n->vqs + VHOST_NET_VQ_TX) { | 590 | vhost_poll_stop(poll); |
644 | tx_poll_stop(n); | ||
645 | n->tx_poll_state = VHOST_NET_POLL_DISABLED; | ||
646 | } else | ||
647 | vhost_poll_stop(n->poll + VHOST_NET_VQ_RX); | ||
648 | } | 591 | } |
649 | 592 | ||
650 | static int vhost_net_enable_vq(struct vhost_net *n, | 593 | static int vhost_net_enable_vq(struct vhost_net *n, |
651 | struct vhost_virtqueue *vq) | 594 | struct vhost_virtqueue *vq) |
652 | { | 595 | { |
596 | struct vhost_poll *poll = n->poll + (vq - n->vqs); | ||
653 | struct socket *sock; | 597 | struct socket *sock; |
654 | int ret; | ||
655 | 598 | ||
656 | sock = rcu_dereference_protected(vq->private_data, | 599 | sock = rcu_dereference_protected(vq->private_data, |
657 | lockdep_is_held(&vq->mutex)); | 600 | lockdep_is_held(&vq->mutex)); |
658 | if (!sock) | 601 | if (!sock) |
659 | return 0; | 602 | return 0; |
660 | if (vq == n->vqs + VHOST_NET_VQ_TX) { | ||
661 | n->tx_poll_state = VHOST_NET_POLL_STOPPED; | ||
662 | ret = tx_poll_start(n, sock); | ||
663 | } else | ||
664 | ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file); | ||
665 | 603 | ||
666 | return ret; | 604 | return vhost_poll_start(poll, sock->file); |
667 | } | 605 | } |
668 | 606 | ||
669 | static struct socket *vhost_net_stop_vq(struct vhost_net *n, | 607 | static struct socket *vhost_net_stop_vq(struct vhost_net *n, |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9759249e6d90..4eecdb867d53 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) | |||
89 | unsigned long mask; | 89 | unsigned long mask; |
90 | int ret = 0; | 90 | int ret = 0; |
91 | 91 | ||
92 | if (poll->wqh) | ||
93 | return 0; | ||
94 | |||
92 | mask = file->f_op->poll(file, &poll->table); | 95 | mask = file->f_op->poll(file, &poll->table); |
93 | if (mask) | 96 | if (mask) |
94 | vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); | 97 | vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); |