aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@redhat.com>2012-11-01 05:16:51 -0400
committerDavid S. Miller <davem@davemloft.net>2012-11-02 21:29:58 -0400
commiteaae8132ef6032ffe612e789e8ff145dcf5bc2bb (patch)
treef2c8d0d2ae6ef1caa474abff4430eaedb189f193 /drivers/vhost
parentb211616d712551874db3ce0fb44196f6faad2c34 (diff)
vhost-net: select tx zero copy dynamically
Even when vhost-net is in zero-copy transmit mode, net core might still decide to copy the skb later which is somewhat slower than a copy in user context: data copy overhead is added to the cost of page pin/unpin. The result is that enabling tx zero copy option leads to higher CPU utilization for guest to guest and guest to host traffic. To fix this, suppress zero copy tx after a given number of packets triggered late data copy. Re-enable periodically to detect workload changes. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c61
1 files changed, 53 insertions, 8 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 532fc8830c42..93f2d6741f34 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -42,6 +42,21 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Experimental Zero Copy TX");
42#define VHOST_MAX_PEND 128 42#define VHOST_MAX_PEND 128
43#define VHOST_GOODCOPY_LEN 256 43#define VHOST_GOODCOPY_LEN 256
44 44
45/*
46 * For transmit, used buffer len is unused; we override it to track buffer
47 * status internally; used for zerocopy tx only.
48 */
49/* Lower device DMA failed */
50#define VHOST_DMA_FAILED_LEN 3
51/* Lower device DMA done */
52#define VHOST_DMA_DONE_LEN 2
53/* Lower device DMA in progress */
54#define VHOST_DMA_IN_PROGRESS 1
55/* Buffer unused */
56#define VHOST_DMA_CLEAR_LEN 0
57
58#define VHOST_DMA_IS_DONE(len) ((len) >= VHOST_DMA_DONE_LEN)
59
45enum { 60enum {
46 VHOST_NET_VQ_RX = 0, 61 VHOST_NET_VQ_RX = 0,
47 VHOST_NET_VQ_TX = 1, 62 VHOST_NET_VQ_TX = 1,
@@ -62,8 +77,33 @@ struct vhost_net {
62 * We only do this when socket buffer fills up. 77 * We only do this when socket buffer fills up.
63 * Protected by tx vq lock. */ 78 * Protected by tx vq lock. */
64 enum vhost_net_poll_state tx_poll_state; 79 enum vhost_net_poll_state tx_poll_state;
80 /* Number of TX recently submitted.
81 * Protected by tx vq lock. */
82 unsigned tx_packets;
83 /* Number of times zerocopy TX recently failed.
84 * Protected by tx vq lock. */
85 unsigned tx_zcopy_err;
65}; 86};
66 87
88static void vhost_net_tx_packet(struct vhost_net *net)
89{
90 ++net->tx_packets;
91 if (net->tx_packets < 1024)
92 return;
93 net->tx_packets = 0;
94 net->tx_zcopy_err = 0;
95}
96
97static void vhost_net_tx_err(struct vhost_net *net)
98{
99 ++net->tx_zcopy_err;
100}
101
102static bool vhost_net_tx_select_zcopy(struct vhost_net *net)
103{
104 return net->tx_packets / 64 >= net->tx_zcopy_err;
105}
106
67static bool vhost_sock_zcopy(struct socket *sock) 107static bool vhost_sock_zcopy(struct socket *sock)
68{ 108{
69 return unlikely(experimental_zcopytx) && 109 return unlikely(experimental_zcopytx) &&
@@ -131,12 +171,15 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock)
131 * of used idx. Once lower device DMA done contiguously, we will signal KVM 171 * of used idx. Once lower device DMA done contiguously, we will signal KVM
132 * guest used idx. 172 * guest used idx.
133 */ 173 */
134int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq) 174static int vhost_zerocopy_signal_used(struct vhost_net *net,
175 struct vhost_virtqueue *vq)
135{ 176{
136 int i; 177 int i;
137 int j = 0; 178 int j = 0;
138 179
139 for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { 180 for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
181 if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
182 vhost_net_tx_err(net);
140 if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { 183 if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
141 vq->heads[i].len = VHOST_DMA_CLEAR_LEN; 184 vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
142 vhost_add_used_and_signal(vq->dev, vq, 185 vhost_add_used_and_signal(vq->dev, vq,
@@ -150,15 +193,15 @@ int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq)
150 return j; 193 return j;
151} 194}
152 195
153static void vhost_zerocopy_callback(struct ubuf_info *ubuf, int status) 196static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
154{ 197{
155 struct vhost_ubuf_ref *ubufs = ubuf->ctx; 198 struct vhost_ubuf_ref *ubufs = ubuf->ctx;
156 struct vhost_virtqueue *vq = ubufs->vq; 199 struct vhost_virtqueue *vq = ubufs->vq;
157 200
158 vhost_poll_queue(&vq->poll); 201 vhost_poll_queue(&vq->poll);
159 /* set len to mark this desc buffers done DMA */ 202 /* set len to mark this desc buffers done DMA */
160 vq->heads[ubuf->desc].len = status ? 203 vq->heads[ubuf->desc].len = success ?
161 VHOST_DMA_FAILED_LEN : VHOST_DMA_DONE_LEN; 204 VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
162 vhost_ubuf_put(ubufs); 205 vhost_ubuf_put(ubufs);
163} 206}
164 207
@@ -208,7 +251,7 @@ static void handle_tx(struct vhost_net *net)
208 for (;;) { 251 for (;;) {
209 /* Release DMAs done buffers first */ 252 /* Release DMAs done buffers first */
210 if (zcopy) 253 if (zcopy)
211 vhost_zerocopy_signal_used(vq); 254 vhost_zerocopy_signal_used(net, vq);
212 255
213 head = vhost_get_vq_desc(&net->dev, vq, vq->iov, 256 head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
214 ARRAY_SIZE(vq->iov), 257 ARRAY_SIZE(vq->iov),
@@ -263,7 +306,8 @@ static void handle_tx(struct vhost_net *net)
263 /* use msg_control to pass vhost zerocopy ubuf info to skb */ 306 /* use msg_control to pass vhost zerocopy ubuf info to skb */
264 if (zcopy) { 307 if (zcopy) {
265 vq->heads[vq->upend_idx].id = head; 308 vq->heads[vq->upend_idx].id = head;
266 if (len < VHOST_GOODCOPY_LEN) { 309 if (!vhost_net_tx_select_zcopy(net) ||
310 len < VHOST_GOODCOPY_LEN) {
267 /* copy don't need to wait for DMA done */ 311 /* copy don't need to wait for DMA done */
268 vq->heads[vq->upend_idx].len = 312 vq->heads[vq->upend_idx].len =
269 VHOST_DMA_DONE_LEN; 313 VHOST_DMA_DONE_LEN;
@@ -305,8 +349,9 @@ static void handle_tx(struct vhost_net *net)
305 if (!zcopy) 349 if (!zcopy)
306 vhost_add_used_and_signal(&net->dev, vq, head, 0); 350 vhost_add_used_and_signal(&net->dev, vq, head, 0);
307 else 351 else
308 vhost_zerocopy_signal_used(vq); 352 vhost_zerocopy_signal_used(net, vq);
309 total_len += len; 353 total_len += len;
354 vhost_net_tx_packet(net);
310 if (unlikely(total_len >= VHOST_NET_WEIGHT)) { 355 if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
311 vhost_poll_queue(&vq->poll); 356 vhost_poll_queue(&vq->poll);
312 break; 357 break;
@@ -774,7 +819,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
774 if (oldubufs) { 819 if (oldubufs) {
775 vhost_ubuf_put_and_wait(oldubufs); 820 vhost_ubuf_put_and_wait(oldubufs);
776 mutex_lock(&vq->mutex); 821 mutex_lock(&vq->mutex);
777 vhost_zerocopy_signal_used(vq); 822 vhost_zerocopy_signal_used(n, vq);
778 mutex_unlock(&vq->mutex); 823 mutex_unlock(&vq->mutex);
779 } 824 }
780 825