summaryrefslogtreecommitdiffstats
path: root/drivers/net/macvtap.c
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2016-07-15 03:46:31 -0400
committerDavid S. Miller <davem@davemloft.net>2016-07-15 17:40:39 -0400
commit362899b8725b35e32802882c67f99cbf42bce2af (patch)
tree3b7ca9c6bbd449af799a6cd569c14e1666239519 /drivers/net/macvtap.c
parent1b16bf42d154c8fbbab2cccc419e2ba47d700849 (diff)
macvtap: switch to use skb array
This patch switch to use skb array instead of sk_receive_queue to avoid spinlock contentions. Tests shows about 21% improvements for guest rx pps: Before: 1472731 pkts/s After: 1786289 pkts/s Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/macvtap.c')
-rw-r--r--drivers/net/macvtap.c82
1 files changed, 71 insertions, 11 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 2476923b424d..9204d19fb30c 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -21,6 +21,7 @@
21#include <net/rtnetlink.h> 21#include <net/rtnetlink.h>
22#include <net/sock.h> 22#include <net/sock.h>
23#include <linux/virtio_net.h> 23#include <linux/virtio_net.h>
24#include <linux/skb_array.h>
24 25
25/* 26/*
26 * A macvtap queue is the central object of this driver, it connects 27 * A macvtap queue is the central object of this driver, it connects
@@ -43,6 +44,7 @@ struct macvtap_queue {
43 u16 queue_index; 44 u16 queue_index;
44 bool enabled; 45 bool enabled;
45 struct list_head next; 46 struct list_head next;
47 struct skb_array skb_array;
46}; 48};
47 49
48#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) 50#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
@@ -273,6 +275,7 @@ static void macvtap_put_queue(struct macvtap_queue *q)
273 rtnl_unlock(); 275 rtnl_unlock();
274 276
275 synchronize_rcu(); 277 synchronize_rcu();
278 skb_array_cleanup(&q->skb_array);
276 sock_put(&q->sk); 279 sock_put(&q->sk);
277} 280}
278 281
@@ -366,7 +369,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
366 if (!q) 369 if (!q)
367 return RX_HANDLER_PASS; 370 return RX_HANDLER_PASS;
368 371
369 if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) 372 if (__skb_array_full(&q->skb_array))
370 goto drop; 373 goto drop;
371 374
372 skb_push(skb, ETH_HLEN); 375 skb_push(skb, ETH_HLEN);
@@ -384,7 +387,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
384 goto drop; 387 goto drop;
385 388
386 if (!segs) { 389 if (!segs) {
387 skb_queue_tail(&q->sk.sk_receive_queue, skb); 390 if (skb_array_produce(&q->skb_array, skb))
391 goto drop;
388 goto wake_up; 392 goto wake_up;
389 } 393 }
390 394
@@ -393,7 +397,11 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
393 struct sk_buff *nskb = segs->next; 397 struct sk_buff *nskb = segs->next;
394 398
395 segs->next = NULL; 399 segs->next = NULL;
396 skb_queue_tail(&q->sk.sk_receive_queue, segs); 400 if (skb_array_produce(&q->skb_array, segs)) {
401 kfree_skb(segs);
402 kfree_skb_list(nskb);
403 break;
404 }
397 segs = nskb; 405 segs = nskb;
398 } 406 }
399 } else { 407 } else {
@@ -406,7 +414,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
406 !(features & NETIF_F_CSUM_MASK) && 414 !(features & NETIF_F_CSUM_MASK) &&
407 skb_checksum_help(skb)) 415 skb_checksum_help(skb))
408 goto drop; 416 goto drop;
409 skb_queue_tail(&q->sk.sk_receive_queue, skb); 417 if (skb_array_produce(&q->skb_array, skb))
418 goto drop;
410 } 419 }
411 420
412wake_up: 421wake_up:
@@ -523,7 +532,11 @@ static void macvtap_sock_write_space(struct sock *sk)
523 532
524static void macvtap_sock_destruct(struct sock *sk) 533static void macvtap_sock_destruct(struct sock *sk)
525{ 534{
526 skb_queue_purge(&sk->sk_receive_queue); 535 struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
536 struct sk_buff *skb;
537
538 while ((skb = skb_array_consume(&q->skb_array)) != NULL)
539 kfree(skb);
527} 540}
528 541
529static int macvtap_open(struct inode *inode, struct file *file) 542static int macvtap_open(struct inode *inode, struct file *file)
@@ -536,13 +549,13 @@ static int macvtap_open(struct inode *inode, struct file *file)
536 rtnl_lock(); 549 rtnl_lock();
537 dev = dev_get_by_macvtap_minor(iminor(inode)); 550 dev = dev_get_by_macvtap_minor(iminor(inode));
538 if (!dev) 551 if (!dev)
539 goto out; 552 goto err;
540 553
541 err = -ENOMEM; 554 err = -ENOMEM;
542 q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 555 q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
543 &macvtap_proto, 0); 556 &macvtap_proto, 0);
544 if (!q) 557 if (!q)
545 goto out; 558 goto err;
546 559
547 RCU_INIT_POINTER(q->sock.wq, &q->wq); 560 RCU_INIT_POINTER(q->sock.wq, &q->wq);
548 init_waitqueue_head(&q->wq.wait); 561 init_waitqueue_head(&q->wq.wait);
@@ -566,11 +579,24 @@ static int macvtap_open(struct inode *inode, struct file *file)
566 if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG)) 579 if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG))
567 sock_set_flag(&q->sk, SOCK_ZEROCOPY); 580 sock_set_flag(&q->sk, SOCK_ZEROCOPY);
568 581
582 err = -ENOMEM;
583 if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL))
584 goto err_array;
585
569 err = macvtap_set_queue(dev, file, q); 586 err = macvtap_set_queue(dev, file, q);
570 if (err) 587 if (err)
571 sock_put(&q->sk); 588 goto err_queue;
572 589
573out: 590 dev_put(dev);
591
592 rtnl_unlock();
593 return err;
594
595err_queue:
596 skb_array_cleanup(&q->skb_array);
597err_array:
598 sock_put(&q->sk);
599err:
574 if (dev) 600 if (dev)
575 dev_put(dev); 601 dev_put(dev);
576 602
@@ -596,7 +622,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
596 mask = 0; 622 mask = 0;
597 poll_wait(file, &q->wq.wait, wait); 623 poll_wait(file, &q->wq.wait, wait);
598 624
599 if (!skb_queue_empty(&q->sk.sk_receive_queue)) 625 if (!skb_array_empty(&q->skb_array))
600 mask |= POLLIN | POLLRDNORM; 626 mask |= POLLIN | POLLRDNORM;
601 627
602 if (sock_writeable(&q->sk) || 628 if (sock_writeable(&q->sk) ||
@@ -856,7 +882,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q,
856 TASK_INTERRUPTIBLE); 882 TASK_INTERRUPTIBLE);
857 883
858 /* Read frames from the queue */ 884 /* Read frames from the queue */
859 skb = skb_dequeue(&q->sk.sk_receive_queue); 885 skb = skb_array_consume(&q->skb_array);
860 if (skb) 886 if (skb)
861 break; 887 break;
862 if (noblock) { 888 if (noblock) {
@@ -1180,10 +1206,18 @@ static int macvtap_recvmsg(struct socket *sock, struct msghdr *m,
1180 return ret; 1206 return ret;
1181} 1207}
1182 1208
1209static int macvtap_peek_len(struct socket *sock)
1210{
1211 struct macvtap_queue *q = container_of(sock, struct macvtap_queue,
1212 sock);
1213 return skb_array_peek_len(&q->skb_array);
1214}
1215
1183/* Ops structure to mimic raw sockets with tun */ 1216/* Ops structure to mimic raw sockets with tun */
1184static const struct proto_ops macvtap_socket_ops = { 1217static const struct proto_ops macvtap_socket_ops = {
1185 .sendmsg = macvtap_sendmsg, 1218 .sendmsg = macvtap_sendmsg,
1186 .recvmsg = macvtap_recvmsg, 1219 .recvmsg = macvtap_recvmsg,
1220 .peek_len = macvtap_peek_len,
1187}; 1221};
1188 1222
1189/* Get an underlying socket object from tun file. Returns error unless file is 1223/* Get an underlying socket object from tun file. Returns error unless file is
@@ -1202,6 +1236,28 @@ struct socket *macvtap_get_socket(struct file *file)
1202} 1236}
1203EXPORT_SYMBOL_GPL(macvtap_get_socket); 1237EXPORT_SYMBOL_GPL(macvtap_get_socket);
1204 1238
1239static int macvtap_queue_resize(struct macvlan_dev *vlan)
1240{
1241 struct net_device *dev = vlan->dev;
1242 struct macvtap_queue *q;
1243 struct skb_array **arrays;
1244 int n = vlan->numqueues;
1245 int ret, i = 0;
1246
1247 arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
1248 if (!arrays)
1249 return -ENOMEM;
1250
1251 list_for_each_entry(q, &vlan->queue_list, next)
1252 arrays[i++] = &q->skb_array;
1253
1254 ret = skb_array_resize_multiple(arrays, n,
1255 dev->tx_queue_len, GFP_KERNEL);
1256
1257 kfree(arrays);
1258 return ret;
1259}
1260
1205static int macvtap_device_event(struct notifier_block *unused, 1261static int macvtap_device_event(struct notifier_block *unused,
1206 unsigned long event, void *ptr) 1262 unsigned long event, void *ptr)
1207{ 1263{
@@ -1249,6 +1305,10 @@ static int macvtap_device_event(struct notifier_block *unused,
1249 device_destroy(&macvtap_class, devt); 1305 device_destroy(&macvtap_class, devt);
1250 macvtap_free_minor(vlan); 1306 macvtap_free_minor(vlan);
1251 break; 1307 break;
1308 case NETDEV_CHANGE_TX_QUEUE_LEN:
1309 if (macvtap_queue_resize(vlan))
1310 return NOTIFY_BAD;
1311 break;
1252 } 1312 }
1253 1313
1254 return NOTIFY_DONE; 1314 return NOTIFY_DONE;