aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2013-04-10 16:50:48 -0400
committerDavid S. Miller <davem@davemloft.net>2013-04-11 16:16:22 -0400
commit70181d51209cbcdf9ce2171eac3f3458281d2947 (patch)
tree5f7994cf14c33cc97bd3447ed09d2ae911e26d74
parent16e3d9648add0b3839587fd77f510537baf88c3a (diff)
vhost_net: remove tx polling state
After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling errors when setting backend), we in fact track the polling state through poll->wqh, so there's no need to duplicate the work with an extra vhost_net_polling_state. So this patch removes this and make the code simpler. This patch also removes the all tx starting/stopping code in tx path according to Michael's suggestion. Netperf test shows almost the same result in stream test, but gets improvements on TCP_RR tests (both zerocopy or copy) especially on low load cases. Tested between multiqueue kvm guest and external host with two direct connected 82599s. zerocopy disabled: sessions|transaction rates|normalize| before/after/+improvements 1 | 9510.24/11727.29/+23.3% | 693.54/887.68/+28.0% | 25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% | 50| 277634.64/291905.76/+5% | 3118.36/3230.11/+3.6% | zerocopy enabled: sessions|transaction rates|normalize| before/after/+improvements 1 | 7318.33/11929.76/+63.0% | 521.86/843.30/+61.6% | 25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% | 50| 272181.02/294347.04/+8.1% | 3071.56/3257.85/+6.1% | Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/vhost/net.c74
-rw-r--r--drivers/vhost/vhost.c3
2 files changed, 9 insertions, 68 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ec6fb3fa59bb..87c216c1e54e 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,20 +64,10 @@ enum {
64 VHOST_NET_VQ_MAX = 2, 64 VHOST_NET_VQ_MAX = 2,
65}; 65};
66 66
67enum vhost_net_poll_state {
68 VHOST_NET_POLL_DISABLED = 0,
69 VHOST_NET_POLL_STARTED = 1,
70 VHOST_NET_POLL_STOPPED = 2,
71};
72
73struct vhost_net { 67struct vhost_net {
74 struct vhost_dev dev; 68 struct vhost_dev dev;
75 struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; 69 struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
76 struct vhost_poll poll[VHOST_NET_VQ_MAX]; 70 struct vhost_poll poll[VHOST_NET_VQ_MAX];
77 /* Tells us whether we are polling a socket for TX.
78 * We only do this when socket buffer fills up.
79 * Protected by tx vq lock. */
80 enum vhost_net_poll_state tx_poll_state;
81 /* Number of TX recently submitted. 71 /* Number of TX recently submitted.
82 * Protected by tx vq lock. */ 72 * Protected by tx vq lock. */
83 unsigned tx_packets; 73 unsigned tx_packets;
@@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
155 } 145 }
156} 146}
157 147
158/* Caller must have TX VQ lock */
159static void tx_poll_stop(struct vhost_net *net)
160{
161 if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
162 return;
163 vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
164 net->tx_poll_state = VHOST_NET_POLL_STOPPED;
165}
166
167/* Caller must have TX VQ lock */
168static int tx_poll_start(struct vhost_net *net, struct socket *sock)
169{
170 int ret;
171
172 if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
173 return 0;
174 ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
175 if (!ret)
176 net->tx_poll_state = VHOST_NET_POLL_STARTED;
177 return ret;
178}
179
180/* In case of DMA done not in order in lower device driver for some reason. 148/* In case of DMA done not in order in lower device driver for some reason.
181 * upend_idx is used to track end of used idx, done_idx is used to track head 149 * upend_idx is used to track end of used idx, done_idx is used to track head
182 * of used idx. Once lower device DMA done contiguously, we will signal KVM 150 * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -242,7 +210,7 @@ static void handle_tx(struct vhost_net *net)
242 .msg_flags = MSG_DONTWAIT, 210 .msg_flags = MSG_DONTWAIT,
243 }; 211 };
244 size_t len, total_len = 0; 212 size_t len, total_len = 0;
245 int err, wmem; 213 int err;
246 size_t hdr_size; 214 size_t hdr_size;
247 struct socket *sock; 215 struct socket *sock;
248 struct vhost_ubuf_ref *uninitialized_var(ubufs); 216 struct vhost_ubuf_ref *uninitialized_var(ubufs);
@@ -253,19 +221,9 @@ static void handle_tx(struct vhost_net *net)
253 if (!sock) 221 if (!sock)
254 return; 222 return;
255 223
256 wmem = atomic_read(&sock->sk->sk_wmem_alloc);
257 if (wmem >= sock->sk->sk_sndbuf) {
258 mutex_lock(&vq->mutex);
259 tx_poll_start(net, sock);
260 mutex_unlock(&vq->mutex);
261 return;
262 }
263
264 mutex_lock(&vq->mutex); 224 mutex_lock(&vq->mutex);
265 vhost_disable_notify(&net->dev, vq); 225 vhost_disable_notify(&net->dev, vq);
266 226
267 if (wmem < sock->sk->sk_sndbuf / 2)
268 tx_poll_stop(net);
269 hdr_size = vq->vhost_hlen; 227 hdr_size = vq->vhost_hlen;
270 zcopy = vq->ubufs; 228 zcopy = vq->ubufs;
271 229
@@ -285,23 +243,14 @@ static void handle_tx(struct vhost_net *net)
285 if (head == vq->num) { 243 if (head == vq->num) {
286 int num_pends; 244 int num_pends;
287 245
288 wmem = atomic_read(&sock->sk->sk_wmem_alloc);
289 if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
290 tx_poll_start(net, sock);
291 set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
292 break;
293 }
294 /* If more outstanding DMAs, queue the work. 246 /* If more outstanding DMAs, queue the work.
295 * Handle upend_idx wrap around 247 * Handle upend_idx wrap around
296 */ 248 */
297 num_pends = likely(vq->upend_idx >= vq->done_idx) ? 249 num_pends = likely(vq->upend_idx >= vq->done_idx) ?
298 (vq->upend_idx - vq->done_idx) : 250 (vq->upend_idx - vq->done_idx) :
299 (vq->upend_idx + UIO_MAXIOV - vq->done_idx); 251 (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
300 if (unlikely(num_pends > VHOST_MAX_PEND)) { 252 if (unlikely(num_pends > VHOST_MAX_PEND))
301 tx_poll_start(net, sock);
302 set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
303 break; 253 break;
304 }
305 if (unlikely(vhost_enable_notify(&net->dev, vq))) { 254 if (unlikely(vhost_enable_notify(&net->dev, vq))) {
306 vhost_disable_notify(&net->dev, vq); 255 vhost_disable_notify(&net->dev, vq);
307 continue; 256 continue;
@@ -364,8 +313,6 @@ static void handle_tx(struct vhost_net *net)
364 UIO_MAXIOV; 313 UIO_MAXIOV;
365 } 314 }
366 vhost_discard_vq_desc(vq, 1); 315 vhost_discard_vq_desc(vq, 1);
367 if (err == -EAGAIN || err == -ENOBUFS)
368 tx_poll_start(net, sock);
369 break; 316 break;
370 } 317 }
371 if (err != len) 318 if (err != len)
@@ -628,7 +575,6 @@ static int vhost_net_open(struct inode *inode, struct file *f)
628 575
629 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); 576 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
630 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); 577 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
631 n->tx_poll_state = VHOST_NET_POLL_DISABLED;
632 578
633 f->private_data = n; 579 f->private_data = n;
634 580
@@ -638,32 +584,24 @@ static int vhost_net_open(struct inode *inode, struct file *f)
638static void vhost_net_disable_vq(struct vhost_net *n, 584static void vhost_net_disable_vq(struct vhost_net *n,
639 struct vhost_virtqueue *vq) 585 struct vhost_virtqueue *vq)
640{ 586{
587 struct vhost_poll *poll = n->poll + (vq - n->vqs);
641 if (!vq->private_data) 588 if (!vq->private_data)
642 return; 589 return;
643 if (vq == n->vqs + VHOST_NET_VQ_TX) { 590 vhost_poll_stop(poll);
644 tx_poll_stop(n);
645 n->tx_poll_state = VHOST_NET_POLL_DISABLED;
646 } else
647 vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
648} 591}
649 592
650static int vhost_net_enable_vq(struct vhost_net *n, 593static int vhost_net_enable_vq(struct vhost_net *n,
651 struct vhost_virtqueue *vq) 594 struct vhost_virtqueue *vq)
652{ 595{
596 struct vhost_poll *poll = n->poll + (vq - n->vqs);
653 struct socket *sock; 597 struct socket *sock;
654 int ret;
655 598
656 sock = rcu_dereference_protected(vq->private_data, 599 sock = rcu_dereference_protected(vq->private_data,
657 lockdep_is_held(&vq->mutex)); 600 lockdep_is_held(&vq->mutex));
658 if (!sock) 601 if (!sock)
659 return 0; 602 return 0;
660 if (vq == n->vqs + VHOST_NET_VQ_TX) {
661 n->tx_poll_state = VHOST_NET_POLL_STOPPED;
662 ret = tx_poll_start(n, sock);
663 } else
664 ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
665 603
666 return ret; 604 return vhost_poll_start(poll, sock->file);
667} 605}
668 606
669static struct socket *vhost_net_stop_vq(struct vhost_net *n, 607static struct socket *vhost_net_stop_vq(struct vhost_net *n,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9759249e6d90..4eecdb867d53 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
89 unsigned long mask; 89 unsigned long mask;
90 int ret = 0; 90 int ret = 0;
91 91
92 if (poll->wqh)
93 return 0;
94
92 mask = file->f_op->poll(file, &poll->table); 95 mask = file->f_op->poll(file, &poll->table);
93 if (mask) 96 if (mask)
94 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); 97 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);