aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/virtio_net.c
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2012-12-07 02:04:56 -0500
committerDavid S. Miller <davem@davemloft.net>2012-12-09 00:30:55 -0500
commit986a4f4d452dec004697f667439d27c3fda9c928 (patch)
tree7c6d697cb47a069e0dc52d99fe2199e27f003c43 /drivers/net/virtio_net.c
parente9d7417b97f420fa70e3e198f2603a0375fb80a7 (diff)
virtio_net: multiqueue support
This patch adds the multiqueue (VIRTIO_NET_F_MQ) support to virtio_net driver. VIRTIO_NET_F_MQ capable device could allow the driver to do packet transmission and reception through multiple queue pairs and does the packet steering to get better performance. By default, one one queue pair is used, user could change the number of queue pairs by ethtool in the next patch. When multiple queue pairs is used and the number of queue pairs is equal to the number of vcpus. Driver does the following optimizations to implement per-cpu virt queue pairs: - select the txq based on the smp processor id. - smp affinity hint to the cpu that owns the queue pairs. This could be used with the flow steering support of the device to guarantee the packets of a single flow is handled by the same cpu. Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r--drivers/net/virtio_net.c473
1 files changed, 375 insertions, 98 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 02a71021565e..c0830488a390 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -58,6 +58,9 @@ struct send_queue {
58 58
59 /* TX: fragments + linear part + virtio header */ 59 /* TX: fragments + linear part + virtio header */
60 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 60 struct scatterlist sg[MAX_SKB_FRAGS + 2];
61
62 /* Name of the send queue: output.$index */
63 char name[40];
61}; 64};
62 65
63/* Internal representation of a receive virtqueue */ 66/* Internal representation of a receive virtqueue */
@@ -75,22 +78,34 @@ struct receive_queue {
75 78
76 /* RX: fragments + linear part + virtio header */ 79 /* RX: fragments + linear part + virtio header */
77 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 80 struct scatterlist sg[MAX_SKB_FRAGS + 2];
81
82 /* Name of this receive queue: input.$index */
83 char name[40];
78}; 84};
79 85
80struct virtnet_info { 86struct virtnet_info {
81 struct virtio_device *vdev; 87 struct virtio_device *vdev;
82 struct virtqueue *cvq; 88 struct virtqueue *cvq;
83 struct net_device *dev; 89 struct net_device *dev;
84 struct send_queue sq; 90 struct send_queue *sq;
85 struct receive_queue rq; 91 struct receive_queue *rq;
86 unsigned int status; 92 unsigned int status;
87 93
94 /* Max # of queue pairs supported by the device */
95 u16 max_queue_pairs;
96
97 /* # of queue pairs currently used by the driver */
98 u16 curr_queue_pairs;
99
88 /* I like... big packets and I cannot lie! */ 100 /* I like... big packets and I cannot lie! */
89 bool big_packets; 101 bool big_packets;
90 102
91 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 103 /* Host will merge rx buffers for big packets (shake it! shake it!) */
92 bool mergeable_rx_bufs; 104 bool mergeable_rx_bufs;
93 105
106 /* Has control virtqueue */
107 bool has_cvq;
108
94 /* enable config space updates */ 109 /* enable config space updates */
95 bool config_enable; 110 bool config_enable;
96 111
@@ -105,6 +120,9 @@ struct virtnet_info {
105 120
106 /* Lock for config space updates */ 121 /* Lock for config space updates */
107 struct mutex config_lock; 122 struct mutex config_lock;
123
124 /* Does the affinity hint is set for virtqueues? */
125 bool affinity_hint_set;
108}; 126};
109 127
110struct skb_vnet_hdr { 128struct skb_vnet_hdr {
@@ -125,6 +143,29 @@ struct padded_vnet_hdr {
125 char padding[6]; 143 char padding[6];
126}; 144};
127 145
146/* Converting between virtqueue no. and kernel tx/rx queue no.
147 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
148 */
149static int vq2txq(struct virtqueue *vq)
150{
151 return (virtqueue_get_queue_index(vq) - 1) / 2;
152}
153
154static int txq2vq(int txq)
155{
156 return txq * 2 + 1;
157}
158
159static int vq2rxq(struct virtqueue *vq)
160{
161 return virtqueue_get_queue_index(vq) / 2;
162}
163
164static int rxq2vq(int rxq)
165{
166 return rxq * 2;
167}
168
128static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) 169static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
129{ 170{
130 return (struct skb_vnet_hdr *)skb->cb; 171 return (struct skb_vnet_hdr *)skb->cb;
@@ -165,7 +206,7 @@ static void skb_xmit_done(struct virtqueue *vq)
165 virtqueue_disable_cb(vq); 206 virtqueue_disable_cb(vq);
166 207
167 /* We were probably waiting for more output buffers. */ 208 /* We were probably waiting for more output buffers. */
168 netif_wake_queue(vi->dev); 209 netif_wake_subqueue(vi->dev, vq2txq(vq));
169} 210}
170 211
171static void set_skb_frag(struct sk_buff *skb, struct page *page, 212static void set_skb_frag(struct sk_buff *skb, struct page *page,
@@ -502,7 +543,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
502static void skb_recv_done(struct virtqueue *rvq) 543static void skb_recv_done(struct virtqueue *rvq)
503{ 544{
504 struct virtnet_info *vi = rvq->vdev->priv; 545 struct virtnet_info *vi = rvq->vdev->priv;
505 struct receive_queue *rq = &vi->rq; 546 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
506 547
507 /* Schedule NAPI, Suppress further interrupts if successful. */ 548 /* Schedule NAPI, Suppress further interrupts if successful. */
508 if (napi_schedule_prep(&rq->napi)) { 549 if (napi_schedule_prep(&rq->napi)) {
@@ -532,15 +573,21 @@ static void refill_work(struct work_struct *work)
532 struct virtnet_info *vi = 573 struct virtnet_info *vi =
533 container_of(work, struct virtnet_info, refill.work); 574 container_of(work, struct virtnet_info, refill.work);
534 bool still_empty; 575 bool still_empty;
576 int i;
577
578 for (i = 0; i < vi->max_queue_pairs; i++) {
579 struct receive_queue *rq = &vi->rq[i];
535 580
536 napi_disable(&vi->rq.napi); 581 napi_disable(&rq->napi);
537 still_empty = !try_fill_recv(&vi->rq, GFP_KERNEL); 582 still_empty = !try_fill_recv(rq, GFP_KERNEL);
538 virtnet_napi_enable(&vi->rq); 583 virtnet_napi_enable(rq);
539 584
540 /* In theory, this can happen: if we don't get any buffers in 585 /* In theory, this can happen: if we don't get any buffers in
541 * we will *never* try to fill again. */ 586 * we will *never* try to fill again.
542 if (still_empty) 587 */
543 schedule_delayed_work(&vi->refill, HZ/2); 588 if (still_empty)
589 schedule_delayed_work(&vi->refill, HZ/2);
590 }
544} 591}
545 592
546static int virtnet_poll(struct napi_struct *napi, int budget) 593static int virtnet_poll(struct napi_struct *napi, int budget)
@@ -578,6 +625,21 @@ again:
578 return received; 625 return received;
579} 626}
580 627
628static int virtnet_open(struct net_device *dev)
629{
630 struct virtnet_info *vi = netdev_priv(dev);
631 int i;
632
633 for (i = 0; i < vi->max_queue_pairs; i++) {
634 /* Make sure we have some buffers: if oom use wq. */
635 if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
636 schedule_delayed_work(&vi->refill, 0);
637 virtnet_napi_enable(&vi->rq[i]);
638 }
639
640 return 0;
641}
642
581static unsigned int free_old_xmit_skbs(struct send_queue *sq) 643static unsigned int free_old_xmit_skbs(struct send_queue *sq)
582{ 644{
583 struct sk_buff *skb; 645 struct sk_buff *skb;
@@ -650,7 +712,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
650static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 712static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
651{ 713{
652 struct virtnet_info *vi = netdev_priv(dev); 714 struct virtnet_info *vi = netdev_priv(dev);
653 struct send_queue *sq = &vi->sq; 715 int qnum = skb_get_queue_mapping(skb);
716 struct send_queue *sq = &vi->sq[qnum];
654 int capacity; 717 int capacity;
655 718
656 /* Free up any pending old buffers before queueing new ones. */ 719 /* Free up any pending old buffers before queueing new ones. */
@@ -664,13 +727,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
664 if (likely(capacity == -ENOMEM)) { 727 if (likely(capacity == -ENOMEM)) {
665 if (net_ratelimit()) 728 if (net_ratelimit())
666 dev_warn(&dev->dev, 729 dev_warn(&dev->dev,
667 "TX queue failure: out of memory\n"); 730 "TXQ (%d) failure: out of memory\n",
731 qnum);
668 } else { 732 } else {
669 dev->stats.tx_fifo_errors++; 733 dev->stats.tx_fifo_errors++;
670 if (net_ratelimit()) 734 if (net_ratelimit())
671 dev_warn(&dev->dev, 735 dev_warn(&dev->dev,
672 "Unexpected TX queue failure: %d\n", 736 "Unexpected TXQ (%d) failure: %d\n",
673 capacity); 737 qnum, capacity);
674 } 738 }
675 dev->stats.tx_dropped++; 739 dev->stats.tx_dropped++;
676 kfree_skb(skb); 740 kfree_skb(skb);
@@ -685,12 +749,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
685 /* Apparently nice girls don't return TX_BUSY; stop the queue 749 /* Apparently nice girls don't return TX_BUSY; stop the queue
686 * before it gets out of hand. Naturally, this wastes entries. */ 750 * before it gets out of hand. Naturally, this wastes entries. */
687 if (capacity < 2+MAX_SKB_FRAGS) { 751 if (capacity < 2+MAX_SKB_FRAGS) {
688 netif_stop_queue(dev); 752 netif_stop_subqueue(dev, qnum);
689 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 753 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
690 /* More just got used, free them then recheck. */ 754 /* More just got used, free them then recheck. */
691 capacity += free_old_xmit_skbs(sq); 755 capacity += free_old_xmit_skbs(sq);
692 if (capacity >= 2+MAX_SKB_FRAGS) { 756 if (capacity >= 2+MAX_SKB_FRAGS) {
693 netif_start_queue(dev); 757 netif_start_subqueue(dev, qnum);
694 virtqueue_disable_cb(sq->vq); 758 virtqueue_disable_cb(sq->vq);
695 } 759 }
696 } 760 }
@@ -758,23 +822,13 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
758static void virtnet_netpoll(struct net_device *dev) 822static void virtnet_netpoll(struct net_device *dev)
759{ 823{
760 struct virtnet_info *vi = netdev_priv(dev); 824 struct virtnet_info *vi = netdev_priv(dev);
825 int i;
761 826
762 napi_schedule(&vi->rq.napi); 827 for (i = 0; i < vi->curr_queue_pairs; i++)
828 napi_schedule(&vi->rq[i].napi);
763} 829}
764#endif 830#endif
765 831
766static int virtnet_open(struct net_device *dev)
767{
768 struct virtnet_info *vi = netdev_priv(dev);
769
770 /* Make sure we have some buffers: if oom use wq. */
771 if (!try_fill_recv(&vi->rq, GFP_KERNEL))
772 schedule_delayed_work(&vi->refill, 0);
773
774 virtnet_napi_enable(&vi->rq);
775 return 0;
776}
777
778/* 832/*
779 * Send command via the control virtqueue and check status. Commands 833 * Send command via the control virtqueue and check status. Commands
780 * supported by the hypervisor, as indicated by feature bits, should 834 * supported by the hypervisor, as indicated by feature bits, should
@@ -830,13 +884,39 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi)
830 rtnl_unlock(); 884 rtnl_unlock();
831} 885}
832 886
887static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
888{
889 struct scatterlist sg;
890 struct virtio_net_ctrl_mq s;
891 struct net_device *dev = vi->dev;
892
893 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
894 return 0;
895
896 s.virtqueue_pairs = queue_pairs;
897 sg_init_one(&sg, &s, sizeof(s));
898
899 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
900 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, 1, 0)){
901 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
902 queue_pairs);
903 return -EINVAL;
904 } else
905 vi->curr_queue_pairs = queue_pairs;
906
907 return 0;
908}
909
833static int virtnet_close(struct net_device *dev) 910static int virtnet_close(struct net_device *dev)
834{ 911{
835 struct virtnet_info *vi = netdev_priv(dev); 912 struct virtnet_info *vi = netdev_priv(dev);
913 int i;
836 914
837 /* Make sure refill_work doesn't re-enable napi! */ 915 /* Make sure refill_work doesn't re-enable napi! */
838 cancel_delayed_work_sync(&vi->refill); 916 cancel_delayed_work_sync(&vi->refill);
839 napi_disable(&vi->rq.napi); 917
918 for (i = 0; i < vi->max_queue_pairs; i++)
919 napi_disable(&vi->rq[i].napi);
840 920
841 return 0; 921 return 0;
842} 922}
@@ -943,13 +1023,41 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
943 return 0; 1023 return 0;
944} 1024}
945 1025
1026static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
1027{
1028 int i;
1029
1030 /* In multiqueue mode, when the number of cpu is equal to the number of
1031 * queue pairs, we let the queue pairs to be private to one cpu by
1032 * setting the affinity hint to eliminate the contention.
1033 */
1034 if ((vi->curr_queue_pairs == 1 ||
1035 vi->max_queue_pairs != num_online_cpus()) && set) {
1036 if (vi->affinity_hint_set)
1037 set = false;
1038 else
1039 return;
1040 }
1041
1042 for (i = 0; i < vi->max_queue_pairs; i++) {
1043 int cpu = set ? i : -1;
1044 virtqueue_set_affinity(vi->rq[i].vq, cpu);
1045 virtqueue_set_affinity(vi->sq[i].vq, cpu);
1046 }
1047
1048 if (set)
1049 vi->affinity_hint_set = true;
1050 else
1051 vi->affinity_hint_set = false;
1052}
1053
946static void virtnet_get_ringparam(struct net_device *dev, 1054static void virtnet_get_ringparam(struct net_device *dev,
947 struct ethtool_ringparam *ring) 1055 struct ethtool_ringparam *ring)
948{ 1056{
949 struct virtnet_info *vi = netdev_priv(dev); 1057 struct virtnet_info *vi = netdev_priv(dev);
950 1058
951 ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq); 1059 ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
952 ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq); 1060 ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
953 ring->rx_pending = ring->rx_max_pending; 1061 ring->rx_pending = ring->rx_max_pending;
954 ring->tx_pending = ring->tx_max_pending; 1062 ring->tx_pending = ring->tx_max_pending;
955} 1063}
@@ -984,6 +1092,21 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
984 return 0; 1092 return 0;
985} 1093}
986 1094
1095/* To avoid contending a lock hold by a vcpu who would exit to host, select the
1096 * txq based on the processor id.
1097 * TODO: handle cpu hotplug.
1098 */
1099static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
1100{
1101 int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
1102 smp_processor_id();
1103
1104 while (unlikely(txq >= dev->real_num_tx_queues))
1105 txq -= dev->real_num_tx_queues;
1106
1107 return txq;
1108}
1109
987static const struct net_device_ops virtnet_netdev = { 1110static const struct net_device_ops virtnet_netdev = {
988 .ndo_open = virtnet_open, 1111 .ndo_open = virtnet_open,
989 .ndo_stop = virtnet_close, 1112 .ndo_stop = virtnet_close,
@@ -995,6 +1118,7 @@ static const struct net_device_ops virtnet_netdev = {
995 .ndo_get_stats64 = virtnet_stats, 1118 .ndo_get_stats64 = virtnet_stats,
996 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 1119 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
997 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 1120 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
1121 .ndo_select_queue = virtnet_select_queue,
998#ifdef CONFIG_NET_POLL_CONTROLLER 1122#ifdef CONFIG_NET_POLL_CONTROLLER
999 .ndo_poll_controller = virtnet_netpoll, 1123 .ndo_poll_controller = virtnet_netpoll,
1000#endif 1124#endif
@@ -1030,10 +1154,10 @@ static void virtnet_config_changed_work(struct work_struct *work)
1030 1154
1031 if (vi->status & VIRTIO_NET_S_LINK_UP) { 1155 if (vi->status & VIRTIO_NET_S_LINK_UP) {
1032 netif_carrier_on(vi->dev); 1156 netif_carrier_on(vi->dev);
1033 netif_wake_queue(vi->dev); 1157 netif_tx_wake_all_queues(vi->dev);
1034 } else { 1158 } else {
1035 netif_carrier_off(vi->dev); 1159 netif_carrier_off(vi->dev);
1036 netif_stop_queue(vi->dev); 1160 netif_tx_stop_all_queues(vi->dev);
1037 } 1161 }
1038done: 1162done:
1039 mutex_unlock(&vi->config_lock); 1163 mutex_unlock(&vi->config_lock);
@@ -1046,48 +1170,203 @@ static void virtnet_config_changed(struct virtio_device *vdev)
1046 schedule_work(&vi->config_work); 1170 schedule_work(&vi->config_work);
1047} 1171}
1048 1172
1173static void virtnet_free_queues(struct virtnet_info *vi)
1174{
1175 kfree(vi->rq);
1176 kfree(vi->sq);
1177}
1178
1179static void free_receive_bufs(struct virtnet_info *vi)
1180{
1181 int i;
1182
1183 for (i = 0; i < vi->max_queue_pairs; i++) {
1184 while (vi->rq[i].pages)
1185 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
1186 }
1187}
1188
1189static void free_unused_bufs(struct virtnet_info *vi)
1190{
1191 void *buf;
1192 int i;
1193
1194 for (i = 0; i < vi->max_queue_pairs; i++) {
1195 struct virtqueue *vq = vi->sq[i].vq;
1196 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
1197 dev_kfree_skb(buf);
1198 }
1199
1200 for (i = 0; i < vi->max_queue_pairs; i++) {
1201 struct virtqueue *vq = vi->rq[i].vq;
1202
1203 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1204 if (vi->mergeable_rx_bufs || vi->big_packets)
1205 give_pages(&vi->rq[i], buf);
1206 else
1207 dev_kfree_skb(buf);
1208 --vi->rq[i].num;
1209 }
1210 BUG_ON(vi->rq[i].num != 0);
1211 }
1212}
1213
1049static void virtnet_del_vqs(struct virtnet_info *vi) 1214static void virtnet_del_vqs(struct virtnet_info *vi)
1050{ 1215{
1051 struct virtio_device *vdev = vi->vdev; 1216 struct virtio_device *vdev = vi->vdev;
1052 1217
1218 virtnet_set_affinity(vi, false);
1219
1053 vdev->config->del_vqs(vdev); 1220 vdev->config->del_vqs(vdev);
1221
1222 virtnet_free_queues(vi);
1054} 1223}
1055 1224
1056static int init_vqs(struct virtnet_info *vi) 1225static int virtnet_find_vqs(struct virtnet_info *vi)
1057{ 1226{
1058 struct virtqueue *vqs[3]; 1227 vq_callback_t **callbacks;
1059 vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; 1228 struct virtqueue **vqs;
1060 const char *names[] = { "input", "output", "control" }; 1229 int ret = -ENOMEM;
1061 int nvqs, err; 1230 int i, total_vqs;
1062 1231 const char **names;
1063 /* We expect two virtqueues, receive then send, 1232
1064 * and optionally control. */ 1233 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
1065 nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; 1234 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
1066 1235 * possible control vq.
1067 err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names); 1236 */
1068 if (err) 1237 total_vqs = vi->max_queue_pairs * 2 +
1069 return err; 1238 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
1239
1240 /* Allocate space for find_vqs parameters */
1241 vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
1242 if (!vqs)
1243 goto err_vq;
1244 callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
1245 if (!callbacks)
1246 goto err_callback;
1247 names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
1248 if (!names)
1249 goto err_names;
1250
1251 /* Parameters for control virtqueue, if any */
1252 if (vi->has_cvq) {
1253 callbacks[total_vqs - 1] = NULL;
1254 names[total_vqs - 1] = "control";
1255 }
1070 1256
1071 vi->rq.vq = vqs[0]; 1257 /* Allocate/initialize parameters for send/receive virtqueues */
1072 vi->sq.vq = vqs[1]; 1258 for (i = 0; i < vi->max_queue_pairs; i++) {
1259 callbacks[rxq2vq(i)] = skb_recv_done;
1260 callbacks[txq2vq(i)] = skb_xmit_done;
1261 sprintf(vi->rq[i].name, "input.%d", i);
1262 sprintf(vi->sq[i].name, "output.%d", i);
1263 names[rxq2vq(i)] = vi->rq[i].name;
1264 names[txq2vq(i)] = vi->sq[i].name;
1265 }
1073 1266
1074 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { 1267 ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
1075 vi->cvq = vqs[2]; 1268 names);
1269 if (ret)
1270 goto err_find;
1076 1271
1272 if (vi->has_cvq) {
1273 vi->cvq = vqs[total_vqs - 1];
1077 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 1274 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
1078 vi->dev->features |= NETIF_F_HW_VLAN_FILTER; 1275 vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
1079 } 1276 }
1277
1278 for (i = 0; i < vi->max_queue_pairs; i++) {
1279 vi->rq[i].vq = vqs[rxq2vq(i)];
1280 vi->sq[i].vq = vqs[txq2vq(i)];
1281 }
1282
1283 kfree(names);
1284 kfree(callbacks);
1285 kfree(vqs);
1286
1080 return 0; 1287 return 0;
1288
1289err_find:
1290 kfree(names);
1291err_names:
1292 kfree(callbacks);
1293err_callback:
1294 kfree(vqs);
1295err_vq:
1296 return ret;
1297}
1298
1299static int virtnet_alloc_queues(struct virtnet_info *vi)
1300{
1301 int i;
1302
1303 vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
1304 if (!vi->sq)
1305 goto err_sq;
1306 vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
1307 if (!vi->sq)
1308 goto err_rq;
1309
1310 INIT_DELAYED_WORK(&vi->refill, refill_work);
1311 for (i = 0; i < vi->max_queue_pairs; i++) {
1312 vi->rq[i].pages = NULL;
1313 netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
1314 napi_weight);
1315
1316 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1317 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
1318 }
1319
1320 return 0;
1321
1322err_rq:
1323 kfree(vi->sq);
1324err_sq:
1325 return -ENOMEM;
1326}
1327
1328static int init_vqs(struct virtnet_info *vi)
1329{
1330 int ret;
1331
1332 /* Allocate send & receive queues */
1333 ret = virtnet_alloc_queues(vi);
1334 if (ret)
1335 goto err;
1336
1337 ret = virtnet_find_vqs(vi);
1338 if (ret)
1339 goto err_free;
1340
1341 virtnet_set_affinity(vi, true);
1342 return 0;
1343
1344err_free:
1345 virtnet_free_queues(vi);
1346err:
1347 return ret;
1081} 1348}
1082 1349
1083static int virtnet_probe(struct virtio_device *vdev) 1350static int virtnet_probe(struct virtio_device *vdev)
1084{ 1351{
1085 int err; 1352 int i, err;
1086 struct net_device *dev; 1353 struct net_device *dev;
1087 struct virtnet_info *vi; 1354 struct virtnet_info *vi;
1355 u16 max_queue_pairs;
1356
1357 /* Find if host supports multiqueue virtio_net device */
1358 err = virtio_config_val(vdev, VIRTIO_NET_F_MQ,
1359 offsetof(struct virtio_net_config,
1360 max_virtqueue_pairs), &max_queue_pairs);
1361
1362 /* We need at least 2 queue's */
1363 if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1364 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1365 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1366 max_queue_pairs = 1;
1088 1367
1089 /* Allocate ourselves a network device with room for our info */ 1368 /* Allocate ourselves a network device with room for our info */
1090 dev = alloc_etherdev(sizeof(struct virtnet_info)); 1369 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
1091 if (!dev) 1370 if (!dev)
1092 return -ENOMEM; 1371 return -ENOMEM;
1093 1372
@@ -1133,22 +1412,17 @@ static int virtnet_probe(struct virtio_device *vdev)
1133 1412
1134 /* Set up our device-specific information */ 1413 /* Set up our device-specific information */
1135 vi = netdev_priv(dev); 1414 vi = netdev_priv(dev);
1136 netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight);
1137 vi->dev = dev; 1415 vi->dev = dev;
1138 vi->vdev = vdev; 1416 vi->vdev = vdev;
1139 vdev->priv = vi; 1417 vdev->priv = vi;
1140 vi->rq.pages = NULL;
1141 vi->stats = alloc_percpu(struct virtnet_stats); 1418 vi->stats = alloc_percpu(struct virtnet_stats);
1142 err = -ENOMEM; 1419 err = -ENOMEM;
1143 if (vi->stats == NULL) 1420 if (vi->stats == NULL)
1144 goto free; 1421 goto free;
1145 1422
1146 INIT_DELAYED_WORK(&vi->refill, refill_work);
1147 mutex_init(&vi->config_lock); 1423 mutex_init(&vi->config_lock);
1148 vi->config_enable = true; 1424 vi->config_enable = true;
1149 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 1425 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
1150 sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg));
1151 sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg));
1152 1426
1153 /* If we can receive ANY GSO packets, we must allocate large ones. */ 1427 /* If we can receive ANY GSO packets, we must allocate large ones. */
1154 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 1428 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -1159,10 +1433,21 @@ static int virtnet_probe(struct virtio_device *vdev)
1159 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) 1433 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
1160 vi->mergeable_rx_bufs = true; 1434 vi->mergeable_rx_bufs = true;
1161 1435
1436 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1437 vi->has_cvq = true;
1438
1439 /* Use single tx/rx queue pair as default */
1440 vi->curr_queue_pairs = 1;
1441 vi->max_queue_pairs = max_queue_pairs;
1442
1443 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
1162 err = init_vqs(vi); 1444 err = init_vqs(vi);
1163 if (err) 1445 if (err)
1164 goto free_stats; 1446 goto free_stats;
1165 1447
1448 netif_set_real_num_tx_queues(dev, 1);
1449 netif_set_real_num_rx_queues(dev, 1);
1450
1166 err = register_netdev(dev); 1451 err = register_netdev(dev);
1167 if (err) { 1452 if (err) {
1168 pr_debug("virtio_net: registering device failed\n"); 1453 pr_debug("virtio_net: registering device failed\n");
@@ -1170,12 +1455,15 @@ static int virtnet_probe(struct virtio_device *vdev)
1170 } 1455 }
1171 1456
1172 /* Last of all, set up some receive buffers. */ 1457 /* Last of all, set up some receive buffers. */
1173 try_fill_recv(&vi->rq, GFP_KERNEL); 1458 for (i = 0; i < vi->max_queue_pairs; i++) {
1174 1459 try_fill_recv(&vi->rq[i], GFP_KERNEL);
1175 /* If we didn't even get one input buffer, we're useless. */ 1460
1176 if (vi->rq.num == 0) { 1461 /* If we didn't even get one input buffer, we're useless. */
1177 err = -ENOMEM; 1462 if (vi->rq[i].num == 0) {
1178 goto unregister; 1463 free_unused_bufs(vi);
1464 err = -ENOMEM;
1465 goto free_recv_bufs;
1466 }
1179 } 1467 }
1180 1468
1181 /* Assume link up if device can't report link status, 1469 /* Assume link up if device can't report link status,
@@ -1188,12 +1476,16 @@ static int virtnet_probe(struct virtio_device *vdev)
1188 netif_carrier_on(dev); 1476 netif_carrier_on(dev);
1189 } 1477 }
1190 1478
1191 pr_debug("virtnet: registered device %s\n", dev->name); 1479 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
1480 dev->name, max_queue_pairs);
1481
1192 return 0; 1482 return 0;
1193 1483
1194unregister: 1484free_recv_bufs:
1485 free_receive_bufs(vi);
1195 unregister_netdev(dev); 1486 unregister_netdev(dev);
1196free_vqs: 1487free_vqs:
1488 cancel_delayed_work_sync(&vi->refill);
1197 virtnet_del_vqs(vi); 1489 virtnet_del_vqs(vi);
1198free_stats: 1490free_stats:
1199 free_percpu(vi->stats); 1491 free_percpu(vi->stats);
@@ -1202,28 +1494,6 @@ free:
1202 return err; 1494 return err;
1203} 1495}
1204 1496
1205static void free_unused_bufs(struct virtnet_info *vi)
1206{
1207 void *buf;
1208 while (1) {
1209 buf = virtqueue_detach_unused_buf(vi->sq.vq);
1210 if (!buf)
1211 break;
1212 dev_kfree_skb(buf);
1213 }
1214 while (1) {
1215 buf = virtqueue_detach_unused_buf(vi->rq.vq);
1216 if (!buf)
1217 break;
1218 if (vi->mergeable_rx_bufs || vi->big_packets)
1219 give_pages(&vi->rq, buf);
1220 else
1221 dev_kfree_skb(buf);
1222 --vi->rq.num;
1223 }
1224 BUG_ON(vi->rq.num != 0);
1225}
1226
1227static void remove_vq_common(struct virtnet_info *vi) 1497static void remove_vq_common(struct virtnet_info *vi)
1228{ 1498{
1229 vi->vdev->config->reset(vi->vdev); 1499 vi->vdev->config->reset(vi->vdev);
@@ -1231,10 +1501,9 @@ static void remove_vq_common(struct virtnet_info *vi)
1231 /* Free unused buffers in both send and recv, if any. */ 1501 /* Free unused buffers in both send and recv, if any. */
1232 free_unused_bufs(vi); 1502 free_unused_bufs(vi);
1233 1503
1234 virtnet_del_vqs(vi); 1504 free_receive_bufs(vi);
1235 1505
1236 while (vi->rq.pages) 1506 virtnet_del_vqs(vi);
1237 __free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0);
1238} 1507}
1239 1508
1240static void virtnet_remove(struct virtio_device *vdev) 1509static void virtnet_remove(struct virtio_device *vdev)
@@ -1260,6 +1529,7 @@ static void virtnet_remove(struct virtio_device *vdev)
1260static int virtnet_freeze(struct virtio_device *vdev) 1529static int virtnet_freeze(struct virtio_device *vdev)
1261{ 1530{
1262 struct virtnet_info *vi = vdev->priv; 1531 struct virtnet_info *vi = vdev->priv;
1532 int i;
1263 1533
1264 /* Prevent config work handler from accessing the device */ 1534 /* Prevent config work handler from accessing the device */
1265 mutex_lock(&vi->config_lock); 1535 mutex_lock(&vi->config_lock);
@@ -1270,7 +1540,10 @@ static int virtnet_freeze(struct virtio_device *vdev)
1270 cancel_delayed_work_sync(&vi->refill); 1540 cancel_delayed_work_sync(&vi->refill);
1271 1541
1272 if (netif_running(vi->dev)) 1542 if (netif_running(vi->dev))
1273 napi_disable(&vi->rq.napi); 1543 for (i = 0; i < vi->max_queue_pairs; i++) {
1544 napi_disable(&vi->rq[i].napi);
1545 netif_napi_del(&vi->rq[i].napi);
1546 }
1274 1547
1275 remove_vq_common(vi); 1548 remove_vq_common(vi);
1276 1549
@@ -1282,24 +1555,28 @@ static int virtnet_freeze(struct virtio_device *vdev)
1282static int virtnet_restore(struct virtio_device *vdev) 1555static int virtnet_restore(struct virtio_device *vdev)
1283{ 1556{
1284 struct virtnet_info *vi = vdev->priv; 1557 struct virtnet_info *vi = vdev->priv;
1285 int err; 1558 int err, i;
1286 1559
1287 err = init_vqs(vi); 1560 err = init_vqs(vi);
1288 if (err) 1561 if (err)
1289 return err; 1562 return err;
1290 1563
1291 if (netif_running(vi->dev)) 1564 if (netif_running(vi->dev))
1292 virtnet_napi_enable(&vi->rq); 1565 for (i = 0; i < vi->max_queue_pairs; i++)
1566 virtnet_napi_enable(&vi->rq[i]);
1293 1567
1294 netif_device_attach(vi->dev); 1568 netif_device_attach(vi->dev);
1295 1569
1296 if (!try_fill_recv(&vi->rq, GFP_KERNEL)) 1570 for (i = 0; i < vi->max_queue_pairs; i++)
1297 schedule_delayed_work(&vi->refill, 0); 1571 if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
1572 schedule_delayed_work(&vi->refill, 0);
1298 1573
1299 mutex_lock(&vi->config_lock); 1574 mutex_lock(&vi->config_lock);
1300 vi->config_enable = true; 1575 vi->config_enable = true;
1301 mutex_unlock(&vi->config_lock); 1576 mutex_unlock(&vi->config_lock);
1302 1577
1578 virtnet_set_queues(vi, vi->curr_queue_pairs);
1579
1303 return 0; 1580 return 0;
1304} 1581}
1305#endif 1582#endif
@@ -1317,7 +1594,7 @@ static unsigned int features[] = {
1317 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, 1594 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
1318 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, 1595 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
1319 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, 1596 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
1320 VIRTIO_NET_F_GUEST_ANNOUNCE, 1597 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
1321}; 1598};
1322 1599
1323static struct virtio_driver virtio_net_driver = { 1600static struct virtio_driver virtio_net_driver = {