diff options
author | Jason Wang <jasowang@redhat.com> | 2016-07-15 03:46:31 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-07-15 17:40:39 -0400 |
commit | 362899b8725b35e32802882c67f99cbf42bce2af (patch) | |
tree | 3b7ca9c6bbd449af799a6cd569c14e1666239519 /drivers/net/macvtap.c | |
parent | 1b16bf42d154c8fbbab2cccc419e2ba47d700849 (diff) |
macvtap: switch to use skb array
This patch switch to use skb array instead of sk_receive_queue to
avoid spinlock contentions. Tests shows about 21% improvements for
guest rx pps:
Before: 1472731 pkts/s
After: 1786289 pkts/s
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/macvtap.c')
-rw-r--r-- | drivers/net/macvtap.c | 82 |
1 files changed, 71 insertions, 11 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 2476923b424d..9204d19fb30c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <net/rtnetlink.h> | 21 | #include <net/rtnetlink.h> |
22 | #include <net/sock.h> | 22 | #include <net/sock.h> |
23 | #include <linux/virtio_net.h> | 23 | #include <linux/virtio_net.h> |
24 | #include <linux/skb_array.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * A macvtap queue is the central object of this driver, it connects | 27 | * A macvtap queue is the central object of this driver, it connects |
@@ -43,6 +44,7 @@ struct macvtap_queue { | |||
43 | u16 queue_index; | 44 | u16 queue_index; |
44 | bool enabled; | 45 | bool enabled; |
45 | struct list_head next; | 46 | struct list_head next; |
47 | struct skb_array skb_array; | ||
46 | }; | 48 | }; |
47 | 49 | ||
48 | #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) | 50 | #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) |
@@ -273,6 +275,7 @@ static void macvtap_put_queue(struct macvtap_queue *q) | |||
273 | rtnl_unlock(); | 275 | rtnl_unlock(); |
274 | 276 | ||
275 | synchronize_rcu(); | 277 | synchronize_rcu(); |
278 | skb_array_cleanup(&q->skb_array); | ||
276 | sock_put(&q->sk); | 279 | sock_put(&q->sk); |
277 | } | 280 | } |
278 | 281 | ||
@@ -366,7 +369,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) | |||
366 | if (!q) | 369 | if (!q) |
367 | return RX_HANDLER_PASS; | 370 | return RX_HANDLER_PASS; |
368 | 371 | ||
369 | if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) | 372 | if (__skb_array_full(&q->skb_array)) |
370 | goto drop; | 373 | goto drop; |
371 | 374 | ||
372 | skb_push(skb, ETH_HLEN); | 375 | skb_push(skb, ETH_HLEN); |
@@ -384,7 +387,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) | |||
384 | goto drop; | 387 | goto drop; |
385 | 388 | ||
386 | if (!segs) { | 389 | if (!segs) { |
387 | skb_queue_tail(&q->sk.sk_receive_queue, skb); | 390 | if (skb_array_produce(&q->skb_array, skb)) |
391 | goto drop; | ||
388 | goto wake_up; | 392 | goto wake_up; |
389 | } | 393 | } |
390 | 394 | ||
@@ -393,7 +397,11 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) | |||
393 | struct sk_buff *nskb = segs->next; | 397 | struct sk_buff *nskb = segs->next; |
394 | 398 | ||
395 | segs->next = NULL; | 399 | segs->next = NULL; |
396 | skb_queue_tail(&q->sk.sk_receive_queue, segs); | 400 | if (skb_array_produce(&q->skb_array, segs)) { |
401 | kfree_skb(segs); | ||
402 | kfree_skb_list(nskb); | ||
403 | break; | ||
404 | } | ||
397 | segs = nskb; | 405 | segs = nskb; |
398 | } | 406 | } |
399 | } else { | 407 | } else { |
@@ -406,7 +414,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) | |||
406 | !(features & NETIF_F_CSUM_MASK) && | 414 | !(features & NETIF_F_CSUM_MASK) && |
407 | skb_checksum_help(skb)) | 415 | skb_checksum_help(skb)) |
408 | goto drop; | 416 | goto drop; |
409 | skb_queue_tail(&q->sk.sk_receive_queue, skb); | 417 | if (skb_array_produce(&q->skb_array, skb)) |
418 | goto drop; | ||
410 | } | 419 | } |
411 | 420 | ||
412 | wake_up: | 421 | wake_up: |
@@ -523,7 +532,11 @@ static void macvtap_sock_write_space(struct sock *sk) | |||
523 | 532 | ||
524 | static void macvtap_sock_destruct(struct sock *sk) | 533 | static void macvtap_sock_destruct(struct sock *sk) |
525 | { | 534 | { |
526 | skb_queue_purge(&sk->sk_receive_queue); | 535 | struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk); |
536 | struct sk_buff *skb; | ||
537 | |||
538 | while ((skb = skb_array_consume(&q->skb_array)) != NULL) | ||
539 | kfree(skb); | ||
527 | } | 540 | } |
528 | 541 | ||
529 | static int macvtap_open(struct inode *inode, struct file *file) | 542 | static int macvtap_open(struct inode *inode, struct file *file) |
@@ -536,13 +549,13 @@ static int macvtap_open(struct inode *inode, struct file *file) | |||
536 | rtnl_lock(); | 549 | rtnl_lock(); |
537 | dev = dev_get_by_macvtap_minor(iminor(inode)); | 550 | dev = dev_get_by_macvtap_minor(iminor(inode)); |
538 | if (!dev) | 551 | if (!dev) |
539 | goto out; | 552 | goto err; |
540 | 553 | ||
541 | err = -ENOMEM; | 554 | err = -ENOMEM; |
542 | q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, | 555 | q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, |
543 | &macvtap_proto, 0); | 556 | &macvtap_proto, 0); |
544 | if (!q) | 557 | if (!q) |
545 | goto out; | 558 | goto err; |
546 | 559 | ||
547 | RCU_INIT_POINTER(q->sock.wq, &q->wq); | 560 | RCU_INIT_POINTER(q->sock.wq, &q->wq); |
548 | init_waitqueue_head(&q->wq.wait); | 561 | init_waitqueue_head(&q->wq.wait); |
@@ -566,11 +579,24 @@ static int macvtap_open(struct inode *inode, struct file *file) | |||
566 | if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG)) | 579 | if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG)) |
567 | sock_set_flag(&q->sk, SOCK_ZEROCOPY); | 580 | sock_set_flag(&q->sk, SOCK_ZEROCOPY); |
568 | 581 | ||
582 | err = -ENOMEM; | ||
583 | if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL)) | ||
584 | goto err_array; | ||
585 | |||
569 | err = macvtap_set_queue(dev, file, q); | 586 | err = macvtap_set_queue(dev, file, q); |
570 | if (err) | 587 | if (err) |
571 | sock_put(&q->sk); | 588 | goto err_queue; |
572 | 589 | ||
573 | out: | 590 | dev_put(dev); |
591 | |||
592 | rtnl_unlock(); | ||
593 | return err; | ||
594 | |||
595 | err_queue: | ||
596 | skb_array_cleanup(&q->skb_array); | ||
597 | err_array: | ||
598 | sock_put(&q->sk); | ||
599 | err: | ||
574 | if (dev) | 600 | if (dev) |
575 | dev_put(dev); | 601 | dev_put(dev); |
576 | 602 | ||
@@ -596,7 +622,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) | |||
596 | mask = 0; | 622 | mask = 0; |
597 | poll_wait(file, &q->wq.wait, wait); | 623 | poll_wait(file, &q->wq.wait, wait); |
598 | 624 | ||
599 | if (!skb_queue_empty(&q->sk.sk_receive_queue)) | 625 | if (!skb_array_empty(&q->skb_array)) |
600 | mask |= POLLIN | POLLRDNORM; | 626 | mask |= POLLIN | POLLRDNORM; |
601 | 627 | ||
602 | if (sock_writeable(&q->sk) || | 628 | if (sock_writeable(&q->sk) || |
@@ -856,7 +882,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, | |||
856 | TASK_INTERRUPTIBLE); | 882 | TASK_INTERRUPTIBLE); |
857 | 883 | ||
858 | /* Read frames from the queue */ | 884 | /* Read frames from the queue */ |
859 | skb = skb_dequeue(&q->sk.sk_receive_queue); | 885 | skb = skb_array_consume(&q->skb_array); |
860 | if (skb) | 886 | if (skb) |
861 | break; | 887 | break; |
862 | if (noblock) { | 888 | if (noblock) { |
@@ -1180,10 +1206,18 @@ static int macvtap_recvmsg(struct socket *sock, struct msghdr *m, | |||
1180 | return ret; | 1206 | return ret; |
1181 | } | 1207 | } |
1182 | 1208 | ||
1209 | static int macvtap_peek_len(struct socket *sock) | ||
1210 | { | ||
1211 | struct macvtap_queue *q = container_of(sock, struct macvtap_queue, | ||
1212 | sock); | ||
1213 | return skb_array_peek_len(&q->skb_array); | ||
1214 | } | ||
1215 | |||
1183 | /* Ops structure to mimic raw sockets with tun */ | 1216 | /* Ops structure to mimic raw sockets with tun */ |
1184 | static const struct proto_ops macvtap_socket_ops = { | 1217 | static const struct proto_ops macvtap_socket_ops = { |
1185 | .sendmsg = macvtap_sendmsg, | 1218 | .sendmsg = macvtap_sendmsg, |
1186 | .recvmsg = macvtap_recvmsg, | 1219 | .recvmsg = macvtap_recvmsg, |
1220 | .peek_len = macvtap_peek_len, | ||
1187 | }; | 1221 | }; |
1188 | 1222 | ||
1189 | /* Get an underlying socket object from tun file. Returns error unless file is | 1223 | /* Get an underlying socket object from tun file. Returns error unless file is |
@@ -1202,6 +1236,28 @@ struct socket *macvtap_get_socket(struct file *file) | |||
1202 | } | 1236 | } |
1203 | EXPORT_SYMBOL_GPL(macvtap_get_socket); | 1237 | EXPORT_SYMBOL_GPL(macvtap_get_socket); |
1204 | 1238 | ||
1239 | static int macvtap_queue_resize(struct macvlan_dev *vlan) | ||
1240 | { | ||
1241 | struct net_device *dev = vlan->dev; | ||
1242 | struct macvtap_queue *q; | ||
1243 | struct skb_array **arrays; | ||
1244 | int n = vlan->numqueues; | ||
1245 | int ret, i = 0; | ||
1246 | |||
1247 | arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL); | ||
1248 | if (!arrays) | ||
1249 | return -ENOMEM; | ||
1250 | |||
1251 | list_for_each_entry(q, &vlan->queue_list, next) | ||
1252 | arrays[i++] = &q->skb_array; | ||
1253 | |||
1254 | ret = skb_array_resize_multiple(arrays, n, | ||
1255 | dev->tx_queue_len, GFP_KERNEL); | ||
1256 | |||
1257 | kfree(arrays); | ||
1258 | return ret; | ||
1259 | } | ||
1260 | |||
1205 | static int macvtap_device_event(struct notifier_block *unused, | 1261 | static int macvtap_device_event(struct notifier_block *unused, |
1206 | unsigned long event, void *ptr) | 1262 | unsigned long event, void *ptr) |
1207 | { | 1263 | { |
@@ -1249,6 +1305,10 @@ static int macvtap_device_event(struct notifier_block *unused, | |||
1249 | device_destroy(&macvtap_class, devt); | 1305 | device_destroy(&macvtap_class, devt); |
1250 | macvtap_free_minor(vlan); | 1306 | macvtap_free_minor(vlan); |
1251 | break; | 1307 | break; |
1308 | case NETDEV_CHANGE_TX_QUEUE_LEN: | ||
1309 | if (macvtap_queue_resize(vlan)) | ||
1310 | return NOTIFY_BAD; | ||
1311 | break; | ||
1252 | } | 1312 | } |
1253 | 1313 | ||
1254 | return NOTIFY_DONE; | 1314 | return NOTIFY_DONE; |