diff options
author | Jason Wang <jasowang@redhat.com> | 2012-12-07 02:04:56 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-12-09 00:30:55 -0500 |
commit | 986a4f4d452dec004697f667439d27c3fda9c928 (patch) | |
tree | 7c6d697cb47a069e0dc52d99fe2199e27f003c43 /drivers/net/virtio_net.c | |
parent | e9d7417b97f420fa70e3e198f2603a0375fb80a7 (diff) |
virtio_net: multiqueue support
This patch adds the multiqueue (VIRTIO_NET_F_MQ) support to virtio_net
driver. VIRTIO_NET_F_MQ capable device could allow the driver to do packet
transmission and reception through multiple queue pairs and does the packet
steering to get better performance. By default, one one queue pair is used, user
could change the number of queue pairs by ethtool in the next patch.
When multiple queue pairs is used and the number of queue pairs is equal to the
number of vcpus. Driver does the following optimizations to implement per-cpu
virt queue pairs:
- select the txq based on the smp processor id.
- smp affinity hint to the cpu that owns the queue pairs.
This could be used with the flow steering support of the device to guarantee the
packets of a single flow is handled by the same cpu.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 473 |
1 files changed, 375 insertions, 98 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 02a71021565e..c0830488a390 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -58,6 +58,9 @@ struct send_queue { | |||
58 | 58 | ||
59 | /* TX: fragments + linear part + virtio header */ | 59 | /* TX: fragments + linear part + virtio header */ |
60 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; | 60 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; |
61 | |||
62 | /* Name of the send queue: output.$index */ | ||
63 | char name[40]; | ||
61 | }; | 64 | }; |
62 | 65 | ||
63 | /* Internal representation of a receive virtqueue */ | 66 | /* Internal representation of a receive virtqueue */ |
@@ -75,22 +78,34 @@ struct receive_queue { | |||
75 | 78 | ||
76 | /* RX: fragments + linear part + virtio header */ | 79 | /* RX: fragments + linear part + virtio header */ |
77 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; | 80 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; |
81 | |||
82 | /* Name of this receive queue: input.$index */ | ||
83 | char name[40]; | ||
78 | }; | 84 | }; |
79 | 85 | ||
80 | struct virtnet_info { | 86 | struct virtnet_info { |
81 | struct virtio_device *vdev; | 87 | struct virtio_device *vdev; |
82 | struct virtqueue *cvq; | 88 | struct virtqueue *cvq; |
83 | struct net_device *dev; | 89 | struct net_device *dev; |
84 | struct send_queue sq; | 90 | struct send_queue *sq; |
85 | struct receive_queue rq; | 91 | struct receive_queue *rq; |
86 | unsigned int status; | 92 | unsigned int status; |
87 | 93 | ||
94 | /* Max # of queue pairs supported by the device */ | ||
95 | u16 max_queue_pairs; | ||
96 | |||
97 | /* # of queue pairs currently used by the driver */ | ||
98 | u16 curr_queue_pairs; | ||
99 | |||
88 | /* I like... big packets and I cannot lie! */ | 100 | /* I like... big packets and I cannot lie! */ |
89 | bool big_packets; | 101 | bool big_packets; |
90 | 102 | ||
91 | /* Host will merge rx buffers for big packets (shake it! shake it!) */ | 103 | /* Host will merge rx buffers for big packets (shake it! shake it!) */ |
92 | bool mergeable_rx_bufs; | 104 | bool mergeable_rx_bufs; |
93 | 105 | ||
106 | /* Has control virtqueue */ | ||
107 | bool has_cvq; | ||
108 | |||
94 | /* enable config space updates */ | 109 | /* enable config space updates */ |
95 | bool config_enable; | 110 | bool config_enable; |
96 | 111 | ||
@@ -105,6 +120,9 @@ struct virtnet_info { | |||
105 | 120 | ||
106 | /* Lock for config space updates */ | 121 | /* Lock for config space updates */ |
107 | struct mutex config_lock; | 122 | struct mutex config_lock; |
123 | |||
124 | /* Does the affinity hint is set for virtqueues? */ | ||
125 | bool affinity_hint_set; | ||
108 | }; | 126 | }; |
109 | 127 | ||
110 | struct skb_vnet_hdr { | 128 | struct skb_vnet_hdr { |
@@ -125,6 +143,29 @@ struct padded_vnet_hdr { | |||
125 | char padding[6]; | 143 | char padding[6]; |
126 | }; | 144 | }; |
127 | 145 | ||
146 | /* Converting between virtqueue no. and kernel tx/rx queue no. | ||
147 | * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq | ||
148 | */ | ||
149 | static int vq2txq(struct virtqueue *vq) | ||
150 | { | ||
151 | return (virtqueue_get_queue_index(vq) - 1) / 2; | ||
152 | } | ||
153 | |||
154 | static int txq2vq(int txq) | ||
155 | { | ||
156 | return txq * 2 + 1; | ||
157 | } | ||
158 | |||
159 | static int vq2rxq(struct virtqueue *vq) | ||
160 | { | ||
161 | return virtqueue_get_queue_index(vq) / 2; | ||
162 | } | ||
163 | |||
164 | static int rxq2vq(int rxq) | ||
165 | { | ||
166 | return rxq * 2; | ||
167 | } | ||
168 | |||
128 | static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) | 169 | static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) |
129 | { | 170 | { |
130 | return (struct skb_vnet_hdr *)skb->cb; | 171 | return (struct skb_vnet_hdr *)skb->cb; |
@@ -165,7 +206,7 @@ static void skb_xmit_done(struct virtqueue *vq) | |||
165 | virtqueue_disable_cb(vq); | 206 | virtqueue_disable_cb(vq); |
166 | 207 | ||
167 | /* We were probably waiting for more output buffers. */ | 208 | /* We were probably waiting for more output buffers. */ |
168 | netif_wake_queue(vi->dev); | 209 | netif_wake_subqueue(vi->dev, vq2txq(vq)); |
169 | } | 210 | } |
170 | 211 | ||
171 | static void set_skb_frag(struct sk_buff *skb, struct page *page, | 212 | static void set_skb_frag(struct sk_buff *skb, struct page *page, |
@@ -502,7 +543,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) | |||
502 | static void skb_recv_done(struct virtqueue *rvq) | 543 | static void skb_recv_done(struct virtqueue *rvq) |
503 | { | 544 | { |
504 | struct virtnet_info *vi = rvq->vdev->priv; | 545 | struct virtnet_info *vi = rvq->vdev->priv; |
505 | struct receive_queue *rq = &vi->rq; | 546 | struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; |
506 | 547 | ||
507 | /* Schedule NAPI, Suppress further interrupts if successful. */ | 548 | /* Schedule NAPI, Suppress further interrupts if successful. */ |
508 | if (napi_schedule_prep(&rq->napi)) { | 549 | if (napi_schedule_prep(&rq->napi)) { |
@@ -532,15 +573,21 @@ static void refill_work(struct work_struct *work) | |||
532 | struct virtnet_info *vi = | 573 | struct virtnet_info *vi = |
533 | container_of(work, struct virtnet_info, refill.work); | 574 | container_of(work, struct virtnet_info, refill.work); |
534 | bool still_empty; | 575 | bool still_empty; |
576 | int i; | ||
577 | |||
578 | for (i = 0; i < vi->max_queue_pairs; i++) { | ||
579 | struct receive_queue *rq = &vi->rq[i]; | ||
535 | 580 | ||
536 | napi_disable(&vi->rq.napi); | 581 | napi_disable(&rq->napi); |
537 | still_empty = !try_fill_recv(&vi->rq, GFP_KERNEL); | 582 | still_empty = !try_fill_recv(rq, GFP_KERNEL); |
538 | virtnet_napi_enable(&vi->rq); | 583 | virtnet_napi_enable(rq); |
539 | 584 | ||
540 | /* In theory, this can happen: if we don't get any buffers in | 585 | /* In theory, this can happen: if we don't get any buffers in |
541 | * we will *never* try to fill again. */ | 586 | * we will *never* try to fill again. |
542 | if (still_empty) | 587 | */ |
543 | schedule_delayed_work(&vi->refill, HZ/2); | 588 | if (still_empty) |
589 | schedule_delayed_work(&vi->refill, HZ/2); | ||
590 | } | ||
544 | } | 591 | } |
545 | 592 | ||
546 | static int virtnet_poll(struct napi_struct *napi, int budget) | 593 | static int virtnet_poll(struct napi_struct *napi, int budget) |
@@ -578,6 +625,21 @@ again: | |||
578 | return received; | 625 | return received; |
579 | } | 626 | } |
580 | 627 | ||
628 | static int virtnet_open(struct net_device *dev) | ||
629 | { | ||
630 | struct virtnet_info *vi = netdev_priv(dev); | ||
631 | int i; | ||
632 | |||
633 | for (i = 0; i < vi->max_queue_pairs; i++) { | ||
634 | /* Make sure we have some buffers: if oom use wq. */ | ||
635 | if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) | ||
636 | schedule_delayed_work(&vi->refill, 0); | ||
637 | virtnet_napi_enable(&vi->rq[i]); | ||
638 | } | ||
639 | |||
640 | return 0; | ||
641 | } | ||
642 | |||
581 | static unsigned int free_old_xmit_skbs(struct send_queue *sq) | 643 | static unsigned int free_old_xmit_skbs(struct send_queue *sq) |
582 | { | 644 | { |
583 | struct sk_buff *skb; | 645 | struct sk_buff *skb; |
@@ -650,7 +712,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) | |||
650 | static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) | 712 | static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) |
651 | { | 713 | { |
652 | struct virtnet_info *vi = netdev_priv(dev); | 714 | struct virtnet_info *vi = netdev_priv(dev); |
653 | struct send_queue *sq = &vi->sq; | 715 | int qnum = skb_get_queue_mapping(skb); |
716 | struct send_queue *sq = &vi->sq[qnum]; | ||
654 | int capacity; | 717 | int capacity; |
655 | 718 | ||
656 | /* Free up any pending old buffers before queueing new ones. */ | 719 | /* Free up any pending old buffers before queueing new ones. */ |
@@ -664,13 +727,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
664 | if (likely(capacity == -ENOMEM)) { | 727 | if (likely(capacity == -ENOMEM)) { |
665 | if (net_ratelimit()) | 728 | if (net_ratelimit()) |
666 | dev_warn(&dev->dev, | 729 | dev_warn(&dev->dev, |
667 | "TX queue failure: out of memory\n"); | 730 | "TXQ (%d) failure: out of memory\n", |
731 | qnum); | ||
668 | } else { | 732 | } else { |
669 | dev->stats.tx_fifo_errors++; | 733 | dev->stats.tx_fifo_errors++; |
670 | if (net_ratelimit()) | 734 | if (net_ratelimit()) |
671 | dev_warn(&dev->dev, | 735 | dev_warn(&dev->dev, |
672 | "Unexpected TX queue failure: %d\n", | 736 | "Unexpected TXQ (%d) failure: %d\n", |
673 | capacity); | 737 | qnum, capacity); |
674 | } | 738 | } |
675 | dev->stats.tx_dropped++; | 739 | dev->stats.tx_dropped++; |
676 | kfree_skb(skb); | 740 | kfree_skb(skb); |
@@ -685,12 +749,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
685 | /* Apparently nice girls don't return TX_BUSY; stop the queue | 749 | /* Apparently nice girls don't return TX_BUSY; stop the queue |
686 | * before it gets out of hand. Naturally, this wastes entries. */ | 750 | * before it gets out of hand. Naturally, this wastes entries. */ |
687 | if (capacity < 2+MAX_SKB_FRAGS) { | 751 | if (capacity < 2+MAX_SKB_FRAGS) { |
688 | netif_stop_queue(dev); | 752 | netif_stop_subqueue(dev, qnum); |
689 | if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { | 753 | if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { |
690 | /* More just got used, free them then recheck. */ | 754 | /* More just got used, free them then recheck. */ |
691 | capacity += free_old_xmit_skbs(sq); | 755 | capacity += free_old_xmit_skbs(sq); |
692 | if (capacity >= 2+MAX_SKB_FRAGS) { | 756 | if (capacity >= 2+MAX_SKB_FRAGS) { |
693 | netif_start_queue(dev); | 757 | netif_start_subqueue(dev, qnum); |
694 | virtqueue_disable_cb(sq->vq); | 758 | virtqueue_disable_cb(sq->vq); |
695 | } | 759 | } |
696 | } | 760 | } |
@@ -758,23 +822,13 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, | |||
758 | static void virtnet_netpoll(struct net_device *dev) | 822 | static void virtnet_netpoll(struct net_device *dev) |
759 | { | 823 | { |
760 | struct virtnet_info *vi = netdev_priv(dev); | 824 | struct virtnet_info *vi = netdev_priv(dev); |
825 | int i; | ||
761 | 826 | ||
762 | napi_schedule(&vi->rq.napi); | 827 | for (i = 0; i < vi->curr_queue_pairs; i++) |
828 | napi_schedule(&vi->rq[i].napi); | ||
763 | } | 829 | } |
764 | #endif | 830 | #endif |
765 | 831 | ||
766 | static int virtnet_open(struct net_device *dev) | ||
767 | { | ||
768 | struct virtnet_info *vi = netdev_priv(dev); | ||
769 | |||
770 | /* Make sure we have some buffers: if oom use wq. */ | ||
771 | if (!try_fill_recv(&vi->rq, GFP_KERNEL)) | ||
772 | schedule_delayed_work(&vi->refill, 0); | ||
773 | |||
774 | virtnet_napi_enable(&vi->rq); | ||
775 | return 0; | ||
776 | } | ||
777 | |||
778 | /* | 832 | /* |
779 | * Send command via the control virtqueue and check status. Commands | 833 | * Send command via the control virtqueue and check status. Commands |
780 | * supported by the hypervisor, as indicated by feature bits, should | 834 | * supported by the hypervisor, as indicated by feature bits, should |
@@ -830,13 +884,39 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) | |||
830 | rtnl_unlock(); | 884 | rtnl_unlock(); |
831 | } | 885 | } |
832 | 886 | ||
887 | static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) | ||
888 | { | ||
889 | struct scatterlist sg; | ||
890 | struct virtio_net_ctrl_mq s; | ||
891 | struct net_device *dev = vi->dev; | ||
892 | |||
893 | if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) | ||
894 | return 0; | ||
895 | |||
896 | s.virtqueue_pairs = queue_pairs; | ||
897 | sg_init_one(&sg, &s, sizeof(s)); | ||
898 | |||
899 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, | ||
900 | VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, 1, 0)){ | ||
901 | dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", | ||
902 | queue_pairs); | ||
903 | return -EINVAL; | ||
904 | } else | ||
905 | vi->curr_queue_pairs = queue_pairs; | ||
906 | |||
907 | return 0; | ||
908 | } | ||
909 | |||
833 | static int virtnet_close(struct net_device *dev) | 910 | static int virtnet_close(struct net_device *dev) |
834 | { | 911 | { |
835 | struct virtnet_info *vi = netdev_priv(dev); | 912 | struct virtnet_info *vi = netdev_priv(dev); |
913 | int i; | ||
836 | 914 | ||
837 | /* Make sure refill_work doesn't re-enable napi! */ | 915 | /* Make sure refill_work doesn't re-enable napi! */ |
838 | cancel_delayed_work_sync(&vi->refill); | 916 | cancel_delayed_work_sync(&vi->refill); |
839 | napi_disable(&vi->rq.napi); | 917 | |
918 | for (i = 0; i < vi->max_queue_pairs; i++) | ||
919 | napi_disable(&vi->rq[i].napi); | ||
840 | 920 | ||
841 | return 0; | 921 | return 0; |
842 | } | 922 | } |
@@ -943,13 +1023,41 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid) | |||
943 | return 0; | 1023 | return 0; |
944 | } | 1024 | } |
945 | 1025 | ||
1026 | static void virtnet_set_affinity(struct virtnet_info *vi, bool set) | ||
1027 | { | ||
1028 | int i; | ||
1029 | |||
1030 | /* In multiqueue mode, when the number of cpu is equal to the number of | ||
1031 | * queue pairs, we let the queue pairs to be private to one cpu by | ||
1032 | * setting the affinity hint to eliminate the contention. | ||
1033 | */ | ||
1034 | if ((vi->curr_queue_pairs == 1 || | ||
1035 | vi->max_queue_pairs != num_online_cpus()) && set) { | ||
1036 | if (vi->affinity_hint_set) | ||
1037 | set = false; | ||
1038 | else | ||
1039 | return; | ||
1040 | } | ||
1041 | |||
1042 | for (i = 0; i < vi->max_queue_pairs; i++) { | ||
1043 | int cpu = set ? i : -1; | ||
1044 | virtqueue_set_affinity(vi->rq[i].vq, cpu); | ||
1045 | virtqueue_set_affinity(vi->sq[i].vq, cpu); | ||
1046 | } | ||
1047 | |||
1048 | if (set) | ||
1049 | vi->affinity_hint_set = true; | ||
1050 | else | ||
1051 | vi->affinity_hint_set = false; | ||
1052 | } | ||
1053 | |||
946 | static void virtnet_get_ringparam(struct net_device *dev, | 1054 | static void virtnet_get_ringparam(struct net_device *dev, |
947 | struct ethtool_ringparam *ring) | 1055 | struct ethtool_ringparam *ring) |
948 | { | 1056 | { |
949 | struct virtnet_info *vi = netdev_priv(dev); | 1057 | struct virtnet_info *vi = netdev_priv(dev); |
950 | 1058 | ||
951 | ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq); | 1059 | ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq); |
952 | ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq); | 1060 | ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq); |
953 | ring->rx_pending = ring->rx_max_pending; | 1061 | ring->rx_pending = ring->rx_max_pending; |
954 | ring->tx_pending = ring->tx_max_pending; | 1062 | ring->tx_pending = ring->tx_max_pending; |
955 | } | 1063 | } |
@@ -984,6 +1092,21 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu) | |||
984 | return 0; | 1092 | return 0; |
985 | } | 1093 | } |
986 | 1094 | ||
1095 | /* To avoid contending a lock hold by a vcpu who would exit to host, select the | ||
1096 | * txq based on the processor id. | ||
1097 | * TODO: handle cpu hotplug. | ||
1098 | */ | ||
1099 | static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb) | ||
1100 | { | ||
1101 | int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : | ||
1102 | smp_processor_id(); | ||
1103 | |||
1104 | while (unlikely(txq >= dev->real_num_tx_queues)) | ||
1105 | txq -= dev->real_num_tx_queues; | ||
1106 | |||
1107 | return txq; | ||
1108 | } | ||
1109 | |||
987 | static const struct net_device_ops virtnet_netdev = { | 1110 | static const struct net_device_ops virtnet_netdev = { |
988 | .ndo_open = virtnet_open, | 1111 | .ndo_open = virtnet_open, |
989 | .ndo_stop = virtnet_close, | 1112 | .ndo_stop = virtnet_close, |
@@ -995,6 +1118,7 @@ static const struct net_device_ops virtnet_netdev = { | |||
995 | .ndo_get_stats64 = virtnet_stats, | 1118 | .ndo_get_stats64 = virtnet_stats, |
996 | .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, | 1119 | .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, |
997 | .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, | 1120 | .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, |
1121 | .ndo_select_queue = virtnet_select_queue, | ||
998 | #ifdef CONFIG_NET_POLL_CONTROLLER | 1122 | #ifdef CONFIG_NET_POLL_CONTROLLER |
999 | .ndo_poll_controller = virtnet_netpoll, | 1123 | .ndo_poll_controller = virtnet_netpoll, |
1000 | #endif | 1124 | #endif |
@@ -1030,10 +1154,10 @@ static void virtnet_config_changed_work(struct work_struct *work) | |||
1030 | 1154 | ||
1031 | if (vi->status & VIRTIO_NET_S_LINK_UP) { | 1155 | if (vi->status & VIRTIO_NET_S_LINK_UP) { |
1032 | netif_carrier_on(vi->dev); | 1156 | netif_carrier_on(vi->dev); |
1033 | netif_wake_queue(vi->dev); | 1157 | netif_tx_wake_all_queues(vi->dev); |
1034 | } else { | 1158 | } else { |
1035 | netif_carrier_off(vi->dev); | 1159 | netif_carrier_off(vi->dev); |
1036 | netif_stop_queue(vi->dev); | 1160 | netif_tx_stop_all_queues(vi->dev); |
1037 | } | 1161 | } |
1038 | done: | 1162 | done: |
1039 | mutex_unlock(&vi->config_lock); | 1163 | mutex_unlock(&vi->config_lock); |
@@ -1046,48 +1170,203 @@ static void virtnet_config_changed(struct virtio_device *vdev) | |||
1046 | schedule_work(&vi->config_work); | 1170 | schedule_work(&vi->config_work); |
1047 | } | 1171 | } |
1048 | 1172 | ||
1173 | static void virtnet_free_queues(struct virtnet_info *vi) | ||
1174 | { | ||
1175 | kfree(vi->rq); | ||
1176 | kfree(vi->sq); | ||
1177 | } | ||
1178 | |||
1179 | static void free_receive_bufs(struct virtnet_info *vi) | ||
1180 | { | ||
1181 | int i; | ||
1182 | |||
1183 | for (i = 0; i < vi->max_queue_pairs; i++) { | ||
1184 | while (vi->rq[i].pages) | ||
1185 | __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); | ||
1186 | } | ||
1187 | } | ||
1188 | |||
1189 | static void free_unused_bufs(struct virtnet_info *vi) | ||
1190 | { | ||
1191 | void *buf; | ||
1192 | int i; | ||
1193 | |||
1194 | for (i = 0; i < vi->max_queue_pairs; i++) { | ||
1195 | struct virtqueue *vq = vi->sq[i].vq; | ||
1196 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) | ||
1197 | dev_kfree_skb(buf); | ||
1198 | } | ||
1199 | |||
1200 | for (i = 0; i < vi->max_queue_pairs; i++) { | ||
1201 | struct virtqueue *vq = vi->rq[i].vq; | ||
1202 | |||
1203 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { | ||
1204 | if (vi->mergeable_rx_bufs || vi->big_packets) | ||
1205 | give_pages(&vi->rq[i], buf); | ||
1206 | else | ||
1207 | dev_kfree_skb(buf); | ||
1208 | --vi->rq[i].num; | ||
1209 | } | ||
1210 | BUG_ON(vi->rq[i].num != 0); | ||
1211 | } | ||
1212 | } | ||
1213 | |||
1049 | static void virtnet_del_vqs(struct virtnet_info *vi) | 1214 | static void virtnet_del_vqs(struct virtnet_info *vi) |
1050 | { | 1215 | { |
1051 | struct virtio_device *vdev = vi->vdev; | 1216 | struct virtio_device *vdev = vi->vdev; |
1052 | 1217 | ||
1218 | virtnet_set_affinity(vi, false); | ||
1219 | |||
1053 | vdev->config->del_vqs(vdev); | 1220 | vdev->config->del_vqs(vdev); |
1221 | |||
1222 | virtnet_free_queues(vi); | ||
1054 | } | 1223 | } |
1055 | 1224 | ||
1056 | static int init_vqs(struct virtnet_info *vi) | 1225 | static int virtnet_find_vqs(struct virtnet_info *vi) |
1057 | { | 1226 | { |
1058 | struct virtqueue *vqs[3]; | 1227 | vq_callback_t **callbacks; |
1059 | vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; | 1228 | struct virtqueue **vqs; |
1060 | const char *names[] = { "input", "output", "control" }; | 1229 | int ret = -ENOMEM; |
1061 | int nvqs, err; | 1230 | int i, total_vqs; |
1062 | 1231 | const char **names; | |
1063 | /* We expect two virtqueues, receive then send, | 1232 | |
1064 | * and optionally control. */ | 1233 | /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by |
1065 | nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; | 1234 | * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by |
1066 | 1235 | * possible control vq. | |
1067 | err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names); | 1236 | */ |
1068 | if (err) | 1237 | total_vqs = vi->max_queue_pairs * 2 + |
1069 | return err; | 1238 | virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); |
1239 | |||
1240 | /* Allocate space for find_vqs parameters */ | ||
1241 | vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL); | ||
1242 | if (!vqs) | ||
1243 | goto err_vq; | ||
1244 | callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL); | ||
1245 | if (!callbacks) | ||
1246 | goto err_callback; | ||
1247 | names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL); | ||
1248 | if (!names) | ||
1249 | goto err_names; | ||
1250 | |||
1251 | /* Parameters for control virtqueue, if any */ | ||
1252 | if (vi->has_cvq) { | ||
1253 | callbacks[total_vqs - 1] = NULL; | ||
1254 | names[total_vqs - 1] = "control"; | ||
1255 | } | ||
1070 | 1256 | ||
1071 | vi->rq.vq = vqs[0]; | 1257 | /* Allocate/initialize parameters for send/receive virtqueues */ |
1072 | vi->sq.vq = vqs[1]; | 1258 | for (i = 0; i < vi->max_queue_pairs; i++) { |
1259 | callbacks[rxq2vq(i)] = skb_recv_done; | ||
1260 | callbacks[txq2vq(i)] = skb_xmit_done; | ||
1261 | sprintf(vi->rq[i].name, "input.%d", i); | ||
1262 | sprintf(vi->sq[i].name, "output.%d", i); | ||
1263 | names[rxq2vq(i)] = vi->rq[i].name; | ||
1264 | names[txq2vq(i)] = vi->sq[i].name; | ||
1265 | } | ||
1073 | 1266 | ||
1074 | if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { | 1267 | ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, |
1075 | vi->cvq = vqs[2]; | 1268 | names); |
1269 | if (ret) | ||
1270 | goto err_find; | ||
1076 | 1271 | ||
1272 | if (vi->has_cvq) { | ||
1273 | vi->cvq = vqs[total_vqs - 1]; | ||
1077 | if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) | 1274 | if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) |
1078 | vi->dev->features |= NETIF_F_HW_VLAN_FILTER; | 1275 | vi->dev->features |= NETIF_F_HW_VLAN_FILTER; |
1079 | } | 1276 | } |
1277 | |||
1278 | for (i = 0; i < vi->max_queue_pairs; i++) { | ||
1279 | vi->rq[i].vq = vqs[rxq2vq(i)]; | ||
1280 | vi->sq[i].vq = vqs[txq2vq(i)]; | ||
1281 | } | ||
1282 | |||
1283 | kfree(names); | ||
1284 | kfree(callbacks); | ||
1285 | kfree(vqs); | ||
1286 | |||
1080 | return 0; | 1287 | return 0; |
1288 | |||
1289 | err_find: | ||
1290 | kfree(names); | ||
1291 | err_names: | ||
1292 | kfree(callbacks); | ||
1293 | err_callback: | ||
1294 | kfree(vqs); | ||
1295 | err_vq: | ||
1296 | return ret; | ||
1297 | } | ||
1298 | |||
1299 | static int virtnet_alloc_queues(struct virtnet_info *vi) | ||
1300 | { | ||
1301 | int i; | ||
1302 | |||
1303 | vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL); | ||
1304 | if (!vi->sq) | ||
1305 | goto err_sq; | ||
1306 | vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL); | ||
1307 | if (!vi->sq) | ||
1308 | goto err_rq; | ||
1309 | |||
1310 | INIT_DELAYED_WORK(&vi->refill, refill_work); | ||
1311 | for (i = 0; i < vi->max_queue_pairs; i++) { | ||
1312 | vi->rq[i].pages = NULL; | ||
1313 | netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, | ||
1314 | napi_weight); | ||
1315 | |||
1316 | sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); | ||
1317 | sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); | ||
1318 | } | ||
1319 | |||
1320 | return 0; | ||
1321 | |||
1322 | err_rq: | ||
1323 | kfree(vi->sq); | ||
1324 | err_sq: | ||
1325 | return -ENOMEM; | ||
1326 | } | ||
1327 | |||
1328 | static int init_vqs(struct virtnet_info *vi) | ||
1329 | { | ||
1330 | int ret; | ||
1331 | |||
1332 | /* Allocate send & receive queues */ | ||
1333 | ret = virtnet_alloc_queues(vi); | ||
1334 | if (ret) | ||
1335 | goto err; | ||
1336 | |||
1337 | ret = virtnet_find_vqs(vi); | ||
1338 | if (ret) | ||
1339 | goto err_free; | ||
1340 | |||
1341 | virtnet_set_affinity(vi, true); | ||
1342 | return 0; | ||
1343 | |||
1344 | err_free: | ||
1345 | virtnet_free_queues(vi); | ||
1346 | err: | ||
1347 | return ret; | ||
1081 | } | 1348 | } |
1082 | 1349 | ||
1083 | static int virtnet_probe(struct virtio_device *vdev) | 1350 | static int virtnet_probe(struct virtio_device *vdev) |
1084 | { | 1351 | { |
1085 | int err; | 1352 | int i, err; |
1086 | struct net_device *dev; | 1353 | struct net_device *dev; |
1087 | struct virtnet_info *vi; | 1354 | struct virtnet_info *vi; |
1355 | u16 max_queue_pairs; | ||
1356 | |||
1357 | /* Find if host supports multiqueue virtio_net device */ | ||
1358 | err = virtio_config_val(vdev, VIRTIO_NET_F_MQ, | ||
1359 | offsetof(struct virtio_net_config, | ||
1360 | max_virtqueue_pairs), &max_queue_pairs); | ||
1361 | |||
1362 | /* We need at least 2 queue's */ | ||
1363 | if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || | ||
1364 | max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || | ||
1365 | !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) | ||
1366 | max_queue_pairs = 1; | ||
1088 | 1367 | ||
1089 | /* Allocate ourselves a network device with room for our info */ | 1368 | /* Allocate ourselves a network device with room for our info */ |
1090 | dev = alloc_etherdev(sizeof(struct virtnet_info)); | 1369 | dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); |
1091 | if (!dev) | 1370 | if (!dev) |
1092 | return -ENOMEM; | 1371 | return -ENOMEM; |
1093 | 1372 | ||
@@ -1133,22 +1412,17 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
1133 | 1412 | ||
1134 | /* Set up our device-specific information */ | 1413 | /* Set up our device-specific information */ |
1135 | vi = netdev_priv(dev); | 1414 | vi = netdev_priv(dev); |
1136 | netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight); | ||
1137 | vi->dev = dev; | 1415 | vi->dev = dev; |
1138 | vi->vdev = vdev; | 1416 | vi->vdev = vdev; |
1139 | vdev->priv = vi; | 1417 | vdev->priv = vi; |
1140 | vi->rq.pages = NULL; | ||
1141 | vi->stats = alloc_percpu(struct virtnet_stats); | 1418 | vi->stats = alloc_percpu(struct virtnet_stats); |
1142 | err = -ENOMEM; | 1419 | err = -ENOMEM; |
1143 | if (vi->stats == NULL) | 1420 | if (vi->stats == NULL) |
1144 | goto free; | 1421 | goto free; |
1145 | 1422 | ||
1146 | INIT_DELAYED_WORK(&vi->refill, refill_work); | ||
1147 | mutex_init(&vi->config_lock); | 1423 | mutex_init(&vi->config_lock); |
1148 | vi->config_enable = true; | 1424 | vi->config_enable = true; |
1149 | INIT_WORK(&vi->config_work, virtnet_config_changed_work); | 1425 | INIT_WORK(&vi->config_work, virtnet_config_changed_work); |
1150 | sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg)); | ||
1151 | sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg)); | ||
1152 | 1426 | ||
1153 | /* If we can receive ANY GSO packets, we must allocate large ones. */ | 1427 | /* If we can receive ANY GSO packets, we must allocate large ones. */ |
1154 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || | 1428 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || |
@@ -1159,10 +1433,21 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
1159 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) | 1433 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) |
1160 | vi->mergeable_rx_bufs = true; | 1434 | vi->mergeable_rx_bufs = true; |
1161 | 1435 | ||
1436 | if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) | ||
1437 | vi->has_cvq = true; | ||
1438 | |||
1439 | /* Use single tx/rx queue pair as default */ | ||
1440 | vi->curr_queue_pairs = 1; | ||
1441 | vi->max_queue_pairs = max_queue_pairs; | ||
1442 | |||
1443 | /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ | ||
1162 | err = init_vqs(vi); | 1444 | err = init_vqs(vi); |
1163 | if (err) | 1445 | if (err) |
1164 | goto free_stats; | 1446 | goto free_stats; |
1165 | 1447 | ||
1448 | netif_set_real_num_tx_queues(dev, 1); | ||
1449 | netif_set_real_num_rx_queues(dev, 1); | ||
1450 | |||
1166 | err = register_netdev(dev); | 1451 | err = register_netdev(dev); |
1167 | if (err) { | 1452 | if (err) { |
1168 | pr_debug("virtio_net: registering device failed\n"); | 1453 | pr_debug("virtio_net: registering device failed\n"); |
@@ -1170,12 +1455,15 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
1170 | } | 1455 | } |
1171 | 1456 | ||
1172 | /* Last of all, set up some receive buffers. */ | 1457 | /* Last of all, set up some receive buffers. */ |
1173 | try_fill_recv(&vi->rq, GFP_KERNEL); | 1458 | for (i = 0; i < vi->max_queue_pairs; i++) { |
1174 | 1459 | try_fill_recv(&vi->rq[i], GFP_KERNEL); | |
1175 | /* If we didn't even get one input buffer, we're useless. */ | 1460 | |
1176 | if (vi->rq.num == 0) { | 1461 | /* If we didn't even get one input buffer, we're useless. */ |
1177 | err = -ENOMEM; | 1462 | if (vi->rq[i].num == 0) { |
1178 | goto unregister; | 1463 | free_unused_bufs(vi); |
1464 | err = -ENOMEM; | ||
1465 | goto free_recv_bufs; | ||
1466 | } | ||
1179 | } | 1467 | } |
1180 | 1468 | ||
1181 | /* Assume link up if device can't report link status, | 1469 | /* Assume link up if device can't report link status, |
@@ -1188,12 +1476,16 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
1188 | netif_carrier_on(dev); | 1476 | netif_carrier_on(dev); |
1189 | } | 1477 | } |
1190 | 1478 | ||
1191 | pr_debug("virtnet: registered device %s\n", dev->name); | 1479 | pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", |
1480 | dev->name, max_queue_pairs); | ||
1481 | |||
1192 | return 0; | 1482 | return 0; |
1193 | 1483 | ||
1194 | unregister: | 1484 | free_recv_bufs: |
1485 | free_receive_bufs(vi); | ||
1195 | unregister_netdev(dev); | 1486 | unregister_netdev(dev); |
1196 | free_vqs: | 1487 | free_vqs: |
1488 | cancel_delayed_work_sync(&vi->refill); | ||
1197 | virtnet_del_vqs(vi); | 1489 | virtnet_del_vqs(vi); |
1198 | free_stats: | 1490 | free_stats: |
1199 | free_percpu(vi->stats); | 1491 | free_percpu(vi->stats); |
@@ -1202,28 +1494,6 @@ free: | |||
1202 | return err; | 1494 | return err; |
1203 | } | 1495 | } |
1204 | 1496 | ||
1205 | static void free_unused_bufs(struct virtnet_info *vi) | ||
1206 | { | ||
1207 | void *buf; | ||
1208 | while (1) { | ||
1209 | buf = virtqueue_detach_unused_buf(vi->sq.vq); | ||
1210 | if (!buf) | ||
1211 | break; | ||
1212 | dev_kfree_skb(buf); | ||
1213 | } | ||
1214 | while (1) { | ||
1215 | buf = virtqueue_detach_unused_buf(vi->rq.vq); | ||
1216 | if (!buf) | ||
1217 | break; | ||
1218 | if (vi->mergeable_rx_bufs || vi->big_packets) | ||
1219 | give_pages(&vi->rq, buf); | ||
1220 | else | ||
1221 | dev_kfree_skb(buf); | ||
1222 | --vi->rq.num; | ||
1223 | } | ||
1224 | BUG_ON(vi->rq.num != 0); | ||
1225 | } | ||
1226 | |||
1227 | static void remove_vq_common(struct virtnet_info *vi) | 1497 | static void remove_vq_common(struct virtnet_info *vi) |
1228 | { | 1498 | { |
1229 | vi->vdev->config->reset(vi->vdev); | 1499 | vi->vdev->config->reset(vi->vdev); |
@@ -1231,10 +1501,9 @@ static void remove_vq_common(struct virtnet_info *vi) | |||
1231 | /* Free unused buffers in both send and recv, if any. */ | 1501 | /* Free unused buffers in both send and recv, if any. */ |
1232 | free_unused_bufs(vi); | 1502 | free_unused_bufs(vi); |
1233 | 1503 | ||
1234 | virtnet_del_vqs(vi); | 1504 | free_receive_bufs(vi); |
1235 | 1505 | ||
1236 | while (vi->rq.pages) | 1506 | virtnet_del_vqs(vi); |
1237 | __free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0); | ||
1238 | } | 1507 | } |
1239 | 1508 | ||
1240 | static void virtnet_remove(struct virtio_device *vdev) | 1509 | static void virtnet_remove(struct virtio_device *vdev) |
@@ -1260,6 +1529,7 @@ static void virtnet_remove(struct virtio_device *vdev) | |||
1260 | static int virtnet_freeze(struct virtio_device *vdev) | 1529 | static int virtnet_freeze(struct virtio_device *vdev) |
1261 | { | 1530 | { |
1262 | struct virtnet_info *vi = vdev->priv; | 1531 | struct virtnet_info *vi = vdev->priv; |
1532 | int i; | ||
1263 | 1533 | ||
1264 | /* Prevent config work handler from accessing the device */ | 1534 | /* Prevent config work handler from accessing the device */ |
1265 | mutex_lock(&vi->config_lock); | 1535 | mutex_lock(&vi->config_lock); |
@@ -1270,7 +1540,10 @@ static int virtnet_freeze(struct virtio_device *vdev) | |||
1270 | cancel_delayed_work_sync(&vi->refill); | 1540 | cancel_delayed_work_sync(&vi->refill); |
1271 | 1541 | ||
1272 | if (netif_running(vi->dev)) | 1542 | if (netif_running(vi->dev)) |
1273 | napi_disable(&vi->rq.napi); | 1543 | for (i = 0; i < vi->max_queue_pairs; i++) { |
1544 | napi_disable(&vi->rq[i].napi); | ||
1545 | netif_napi_del(&vi->rq[i].napi); | ||
1546 | } | ||
1274 | 1547 | ||
1275 | remove_vq_common(vi); | 1548 | remove_vq_common(vi); |
1276 | 1549 | ||
@@ -1282,24 +1555,28 @@ static int virtnet_freeze(struct virtio_device *vdev) | |||
1282 | static int virtnet_restore(struct virtio_device *vdev) | 1555 | static int virtnet_restore(struct virtio_device *vdev) |
1283 | { | 1556 | { |
1284 | struct virtnet_info *vi = vdev->priv; | 1557 | struct virtnet_info *vi = vdev->priv; |
1285 | int err; | 1558 | int err, i; |
1286 | 1559 | ||
1287 | err = init_vqs(vi); | 1560 | err = init_vqs(vi); |
1288 | if (err) | 1561 | if (err) |
1289 | return err; | 1562 | return err; |
1290 | 1563 | ||
1291 | if (netif_running(vi->dev)) | 1564 | if (netif_running(vi->dev)) |
1292 | virtnet_napi_enable(&vi->rq); | 1565 | for (i = 0; i < vi->max_queue_pairs; i++) |
1566 | virtnet_napi_enable(&vi->rq[i]); | ||
1293 | 1567 | ||
1294 | netif_device_attach(vi->dev); | 1568 | netif_device_attach(vi->dev); |
1295 | 1569 | ||
1296 | if (!try_fill_recv(&vi->rq, GFP_KERNEL)) | 1570 | for (i = 0; i < vi->max_queue_pairs; i++) |
1297 | schedule_delayed_work(&vi->refill, 0); | 1571 | if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) |
1572 | schedule_delayed_work(&vi->refill, 0); | ||
1298 | 1573 | ||
1299 | mutex_lock(&vi->config_lock); | 1574 | mutex_lock(&vi->config_lock); |
1300 | vi->config_enable = true; | 1575 | vi->config_enable = true; |
1301 | mutex_unlock(&vi->config_lock); | 1576 | mutex_unlock(&vi->config_lock); |
1302 | 1577 | ||
1578 | virtnet_set_queues(vi, vi->curr_queue_pairs); | ||
1579 | |||
1303 | return 0; | 1580 | return 0; |
1304 | } | 1581 | } |
1305 | #endif | 1582 | #endif |
@@ -1317,7 +1594,7 @@ static unsigned int features[] = { | |||
1317 | VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, | 1594 | VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, |
1318 | VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, | 1595 | VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, |
1319 | VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, | 1596 | VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, |
1320 | VIRTIO_NET_F_GUEST_ANNOUNCE, | 1597 | VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, |
1321 | }; | 1598 | }; |
1322 | 1599 | ||
1323 | static struct virtio_driver virtio_net_driver = { | 1600 | static struct virtio_driver virtio_net_driver = { |