summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/virtio_net.c473
-rw-r--r--include/uapi/linux/virtio_net.h27
2 files changed, 402 insertions, 98 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 02a71021565e..c0830488a390 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -58,6 +58,9 @@ struct send_queue {
58 58
59 /* TX: fragments + linear part + virtio header */ 59 /* TX: fragments + linear part + virtio header */
60 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 60 struct scatterlist sg[MAX_SKB_FRAGS + 2];
61
62 /* Name of the send queue: output.$index */
63 char name[40];
61}; 64};
62 65
63/* Internal representation of a receive virtqueue */ 66/* Internal representation of a receive virtqueue */
@@ -75,22 +78,34 @@ struct receive_queue {
75 78
76 /* RX: fragments + linear part + virtio header */ 79 /* RX: fragments + linear part + virtio header */
77 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 80 struct scatterlist sg[MAX_SKB_FRAGS + 2];
81
82 /* Name of this receive queue: input.$index */
83 char name[40];
78}; 84};
79 85
80struct virtnet_info { 86struct virtnet_info {
81 struct virtio_device *vdev; 87 struct virtio_device *vdev;
82 struct virtqueue *cvq; 88 struct virtqueue *cvq;
83 struct net_device *dev; 89 struct net_device *dev;
84 struct send_queue sq; 90 struct send_queue *sq;
85 struct receive_queue rq; 91 struct receive_queue *rq;
86 unsigned int status; 92 unsigned int status;
87 93
94 /* Max # of queue pairs supported by the device */
95 u16 max_queue_pairs;
96
97 /* # of queue pairs currently used by the driver */
98 u16 curr_queue_pairs;
99
88 /* I like... big packets and I cannot lie! */ 100 /* I like... big packets and I cannot lie! */
89 bool big_packets; 101 bool big_packets;
90 102
91 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 103 /* Host will merge rx buffers for big packets (shake it! shake it!) */
92 bool mergeable_rx_bufs; 104 bool mergeable_rx_bufs;
93 105
106 /* Has control virtqueue */
107 bool has_cvq;
108
94 /* enable config space updates */ 109 /* enable config space updates */
95 bool config_enable; 110 bool config_enable;
96 111
@@ -105,6 +120,9 @@ struct virtnet_info {
105 120
106 /* Lock for config space updates */ 121 /* Lock for config space updates */
107 struct mutex config_lock; 122 struct mutex config_lock;
123
124 /* Does the affinity hint is set for virtqueues? */
125 bool affinity_hint_set;
108}; 126};
109 127
110struct skb_vnet_hdr { 128struct skb_vnet_hdr {
@@ -125,6 +143,29 @@ struct padded_vnet_hdr {
125 char padding[6]; 143 char padding[6];
126}; 144};
127 145
146/* Converting between virtqueue no. and kernel tx/rx queue no.
147 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
148 */
149static int vq2txq(struct virtqueue *vq)
150{
151 return (virtqueue_get_queue_index(vq) - 1) / 2;
152}
153
154static int txq2vq(int txq)
155{
156 return txq * 2 + 1;
157}
158
159static int vq2rxq(struct virtqueue *vq)
160{
161 return virtqueue_get_queue_index(vq) / 2;
162}
163
164static int rxq2vq(int rxq)
165{
166 return rxq * 2;
167}
168
128static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) 169static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
129{ 170{
130 return (struct skb_vnet_hdr *)skb->cb; 171 return (struct skb_vnet_hdr *)skb->cb;
@@ -165,7 +206,7 @@ static void skb_xmit_done(struct virtqueue *vq)
165 virtqueue_disable_cb(vq); 206 virtqueue_disable_cb(vq);
166 207
167 /* We were probably waiting for more output buffers. */ 208 /* We were probably waiting for more output buffers. */
168 netif_wake_queue(vi->dev); 209 netif_wake_subqueue(vi->dev, vq2txq(vq));
169} 210}
170 211
171static void set_skb_frag(struct sk_buff *skb, struct page *page, 212static void set_skb_frag(struct sk_buff *skb, struct page *page,
@@ -502,7 +543,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
502static void skb_recv_done(struct virtqueue *rvq) 543static void skb_recv_done(struct virtqueue *rvq)
503{ 544{
504 struct virtnet_info *vi = rvq->vdev->priv; 545 struct virtnet_info *vi = rvq->vdev->priv;
505 struct receive_queue *rq = &vi->rq; 546 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
506 547
507 /* Schedule NAPI, Suppress further interrupts if successful. */ 548 /* Schedule NAPI, Suppress further interrupts if successful. */
508 if (napi_schedule_prep(&rq->napi)) { 549 if (napi_schedule_prep(&rq->napi)) {
@@ -532,15 +573,21 @@ static void refill_work(struct work_struct *work)
532 struct virtnet_info *vi = 573 struct virtnet_info *vi =
533 container_of(work, struct virtnet_info, refill.work); 574 container_of(work, struct virtnet_info, refill.work);
534 bool still_empty; 575 bool still_empty;
576 int i;
577
578 for (i = 0; i < vi->max_queue_pairs; i++) {
579 struct receive_queue *rq = &vi->rq[i];
535 580
536 napi_disable(&vi->rq.napi); 581 napi_disable(&rq->napi);
537 still_empty = !try_fill_recv(&vi->rq, GFP_KERNEL); 582 still_empty = !try_fill_recv(rq, GFP_KERNEL);
538 virtnet_napi_enable(&vi->rq); 583 virtnet_napi_enable(rq);
539 584
540 /* In theory, this can happen: if we don't get any buffers in 585 /* In theory, this can happen: if we don't get any buffers in
541 * we will *never* try to fill again. */ 586 * we will *never* try to fill again.
542 if (still_empty) 587 */
543 schedule_delayed_work(&vi->refill, HZ/2); 588 if (still_empty)
589 schedule_delayed_work(&vi->refill, HZ/2);
590 }
544} 591}
545 592
546static int virtnet_poll(struct napi_struct *napi, int budget) 593static int virtnet_poll(struct napi_struct *napi, int budget)
@@ -578,6 +625,21 @@ again:
578 return received; 625 return received;
579} 626}
580 627
628static int virtnet_open(struct net_device *dev)
629{
630 struct virtnet_info *vi = netdev_priv(dev);
631 int i;
632
633 for (i = 0; i < vi->max_queue_pairs; i++) {
634 /* Make sure we have some buffers: if oom use wq. */
635 if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
636 schedule_delayed_work(&vi->refill, 0);
637 virtnet_napi_enable(&vi->rq[i]);
638 }
639
640 return 0;
641}
642
581static unsigned int free_old_xmit_skbs(struct send_queue *sq) 643static unsigned int free_old_xmit_skbs(struct send_queue *sq)
582{ 644{
583 struct sk_buff *skb; 645 struct sk_buff *skb;
@@ -650,7 +712,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
650static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 712static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
651{ 713{
652 struct virtnet_info *vi = netdev_priv(dev); 714 struct virtnet_info *vi = netdev_priv(dev);
653 struct send_queue *sq = &vi->sq; 715 int qnum = skb_get_queue_mapping(skb);
716 struct send_queue *sq = &vi->sq[qnum];
654 int capacity; 717 int capacity;
655 718
656 /* Free up any pending old buffers before queueing new ones. */ 719 /* Free up any pending old buffers before queueing new ones. */
@@ -664,13 +727,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
664 if (likely(capacity == -ENOMEM)) { 727 if (likely(capacity == -ENOMEM)) {
665 if (net_ratelimit()) 728 if (net_ratelimit())
666 dev_warn(&dev->dev, 729 dev_warn(&dev->dev,
667 "TX queue failure: out of memory\n"); 730 "TXQ (%d) failure: out of memory\n",
731 qnum);
668 } else { 732 } else {
669 dev->stats.tx_fifo_errors++; 733 dev->stats.tx_fifo_errors++;
670 if (net_ratelimit()) 734 if (net_ratelimit())
671 dev_warn(&dev->dev, 735 dev_warn(&dev->dev,
672 "Unexpected TX queue failure: %d\n", 736 "Unexpected TXQ (%d) failure: %d\n",
673 capacity); 737 qnum, capacity);
674 } 738 }
675 dev->stats.tx_dropped++; 739 dev->stats.tx_dropped++;
676 kfree_skb(skb); 740 kfree_skb(skb);
@@ -685,12 +749,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
685 /* Apparently nice girls don't return TX_BUSY; stop the queue 749 /* Apparently nice girls don't return TX_BUSY; stop the queue
686 * before it gets out of hand. Naturally, this wastes entries. */ 750 * before it gets out of hand. Naturally, this wastes entries. */
687 if (capacity < 2+MAX_SKB_FRAGS) { 751 if (capacity < 2+MAX_SKB_FRAGS) {
688 netif_stop_queue(dev); 752 netif_stop_subqueue(dev, qnum);
689 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 753 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
690 /* More just got used, free them then recheck. */ 754 /* More just got used, free them then recheck. */
691 capacity += free_old_xmit_skbs(sq); 755 capacity += free_old_xmit_skbs(sq);
692 if (capacity >= 2+MAX_SKB_FRAGS) { 756 if (capacity >= 2+MAX_SKB_FRAGS) {
693 netif_start_queue(dev); 757 netif_start_subqueue(dev, qnum);
694 virtqueue_disable_cb(sq->vq); 758 virtqueue_disable_cb(sq->vq);
695 } 759 }
696 } 760 }
@@ -758,23 +822,13 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
758static void virtnet_netpoll(struct net_device *dev) 822static void virtnet_netpoll(struct net_device *dev)
759{ 823{
760 struct virtnet_info *vi = netdev_priv(dev); 824 struct virtnet_info *vi = netdev_priv(dev);
825 int i;
761 826
762 napi_schedule(&vi->rq.napi); 827 for (i = 0; i < vi->curr_queue_pairs; i++)
828 napi_schedule(&vi->rq[i].napi);
763} 829}
764#endif 830#endif
765 831
766static int virtnet_open(struct net_device *dev)
767{
768 struct virtnet_info *vi = netdev_priv(dev);
769
770 /* Make sure we have some buffers: if oom use wq. */
771 if (!try_fill_recv(&vi->rq, GFP_KERNEL))
772 schedule_delayed_work(&vi->refill, 0);
773
774 virtnet_napi_enable(&vi->rq);
775 return 0;
776}
777
778/* 832/*
779 * Send command via the control virtqueue and check status. Commands 833 * Send command via the control virtqueue and check status. Commands
780 * supported by the hypervisor, as indicated by feature bits, should 834 * supported by the hypervisor, as indicated by feature bits, should
@@ -830,13 +884,39 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi)
830 rtnl_unlock(); 884 rtnl_unlock();
831} 885}
832 886
887static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
888{
889 struct scatterlist sg;
890 struct virtio_net_ctrl_mq s;
891 struct net_device *dev = vi->dev;
892
893 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
894 return 0;
895
896 s.virtqueue_pairs = queue_pairs;
897 sg_init_one(&sg, &s, sizeof(s));
898
899 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
900 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, 1, 0)){
901 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
902 queue_pairs);
903 return -EINVAL;
904 } else
905 vi->curr_queue_pairs = queue_pairs;
906
907 return 0;
908}
909
833static int virtnet_close(struct net_device *dev) 910static int virtnet_close(struct net_device *dev)
834{ 911{
835 struct virtnet_info *vi = netdev_priv(dev); 912 struct virtnet_info *vi = netdev_priv(dev);
913 int i;
836 914
837 /* Make sure refill_work doesn't re-enable napi! */ 915 /* Make sure refill_work doesn't re-enable napi! */
838 cancel_delayed_work_sync(&vi->refill); 916 cancel_delayed_work_sync(&vi->refill);
839 napi_disable(&vi->rq.napi); 917
918 for (i = 0; i < vi->max_queue_pairs; i++)
919 napi_disable(&vi->rq[i].napi);
840 920
841 return 0; 921 return 0;
842} 922}
@@ -943,13 +1023,41 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
943 return 0; 1023 return 0;
944} 1024}
945 1025
1026static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
1027{
1028 int i;
1029
1030 /* In multiqueue mode, when the number of cpu is equal to the number of
1031 * queue pairs, we let the queue pairs to be private to one cpu by
1032 * setting the affinity hint to eliminate the contention.
1033 */
1034 if ((vi->curr_queue_pairs == 1 ||
1035 vi->max_queue_pairs != num_online_cpus()) && set) {
1036 if (vi->affinity_hint_set)
1037 set = false;
1038 else
1039 return;
1040 }
1041
1042 for (i = 0; i < vi->max_queue_pairs; i++) {
1043 int cpu = set ? i : -1;
1044 virtqueue_set_affinity(vi->rq[i].vq, cpu);
1045 virtqueue_set_affinity(vi->sq[i].vq, cpu);
1046 }
1047
1048 if (set)
1049 vi->affinity_hint_set = true;
1050 else
1051 vi->affinity_hint_set = false;
1052}
1053
946static void virtnet_get_ringparam(struct net_device *dev, 1054static void virtnet_get_ringparam(struct net_device *dev,
947 struct ethtool_ringparam *ring) 1055 struct ethtool_ringparam *ring)
948{ 1056{
949 struct virtnet_info *vi = netdev_priv(dev); 1057 struct virtnet_info *vi = netdev_priv(dev);
950 1058
951 ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq); 1059 ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
952 ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq); 1060 ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
953 ring->rx_pending = ring->rx_max_pending; 1061 ring->rx_pending = ring->rx_max_pending;
954 ring->tx_pending = ring->tx_max_pending; 1062 ring->tx_pending = ring->tx_max_pending;
955} 1063}
@@ -984,6 +1092,21 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
984 return 0; 1092 return 0;
985} 1093}
986 1094
1095/* To avoid contending a lock hold by a vcpu who would exit to host, select the
1096 * txq based on the processor id.
1097 * TODO: handle cpu hotplug.
1098 */
1099static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
1100{
1101 int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
1102 smp_processor_id();
1103
1104 while (unlikely(txq >= dev->real_num_tx_queues))
1105 txq -= dev->real_num_tx_queues;
1106
1107 return txq;
1108}
1109
987static const struct net_device_ops virtnet_netdev = { 1110static const struct net_device_ops virtnet_netdev = {
988 .ndo_open = virtnet_open, 1111 .ndo_open = virtnet_open,
989 .ndo_stop = virtnet_close, 1112 .ndo_stop = virtnet_close,
@@ -995,6 +1118,7 @@ static const struct net_device_ops virtnet_netdev = {
995 .ndo_get_stats64 = virtnet_stats, 1118 .ndo_get_stats64 = virtnet_stats,
996 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 1119 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
997 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 1120 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
1121 .ndo_select_queue = virtnet_select_queue,
998#ifdef CONFIG_NET_POLL_CONTROLLER 1122#ifdef CONFIG_NET_POLL_CONTROLLER
999 .ndo_poll_controller = virtnet_netpoll, 1123 .ndo_poll_controller = virtnet_netpoll,
1000#endif 1124#endif
@@ -1030,10 +1154,10 @@ static void virtnet_config_changed_work(struct work_struct *work)
1030 1154
1031 if (vi->status & VIRTIO_NET_S_LINK_UP) { 1155 if (vi->status & VIRTIO_NET_S_LINK_UP) {
1032 netif_carrier_on(vi->dev); 1156 netif_carrier_on(vi->dev);
1033 netif_wake_queue(vi->dev); 1157 netif_tx_wake_all_queues(vi->dev);
1034 } else { 1158 } else {
1035 netif_carrier_off(vi->dev); 1159 netif_carrier_off(vi->dev);
1036 netif_stop_queue(vi->dev); 1160 netif_tx_stop_all_queues(vi->dev);
1037 } 1161 }
1038done: 1162done:
1039 mutex_unlock(&vi->config_lock); 1163 mutex_unlock(&vi->config_lock);
@@ -1046,48 +1170,203 @@ static void virtnet_config_changed(struct virtio_device *vdev)
1046 schedule_work(&vi->config_work); 1170 schedule_work(&vi->config_work);
1047} 1171}
1048 1172
1173static void virtnet_free_queues(struct virtnet_info *vi)
1174{
1175 kfree(vi->rq);
1176 kfree(vi->sq);
1177}
1178
1179static void free_receive_bufs(struct virtnet_info *vi)
1180{
1181 int i;
1182
1183 for (i = 0; i < vi->max_queue_pairs; i++) {
1184 while (vi->rq[i].pages)
1185 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
1186 }
1187}
1188
1189static void free_unused_bufs(struct virtnet_info *vi)
1190{
1191 void *buf;
1192 int i;
1193
1194 for (i = 0; i < vi->max_queue_pairs; i++) {
1195 struct virtqueue *vq = vi->sq[i].vq;
1196 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
1197 dev_kfree_skb(buf);
1198 }
1199
1200 for (i = 0; i < vi->max_queue_pairs; i++) {
1201 struct virtqueue *vq = vi->rq[i].vq;
1202
1203 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1204 if (vi->mergeable_rx_bufs || vi->big_packets)
1205 give_pages(&vi->rq[i], buf);
1206 else
1207 dev_kfree_skb(buf);
1208 --vi->rq[i].num;
1209 }
1210 BUG_ON(vi->rq[i].num != 0);
1211 }
1212}
1213
1049static void virtnet_del_vqs(struct virtnet_info *vi) 1214static void virtnet_del_vqs(struct virtnet_info *vi)
1050{ 1215{
1051 struct virtio_device *vdev = vi->vdev; 1216 struct virtio_device *vdev = vi->vdev;
1052 1217
1218 virtnet_set_affinity(vi, false);
1219
1053 vdev->config->del_vqs(vdev); 1220 vdev->config->del_vqs(vdev);
1221
1222 virtnet_free_queues(vi);
1054} 1223}
1055 1224
1056static int init_vqs(struct virtnet_info *vi) 1225static int virtnet_find_vqs(struct virtnet_info *vi)
1057{ 1226{
1058 struct virtqueue *vqs[3]; 1227 vq_callback_t **callbacks;
1059 vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; 1228 struct virtqueue **vqs;
1060 const char *names[] = { "input", "output", "control" }; 1229 int ret = -ENOMEM;
1061 int nvqs, err; 1230 int i, total_vqs;
1062 1231 const char **names;
1063 /* We expect two virtqueues, receive then send, 1232
1064 * and optionally control. */ 1233 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
1065 nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; 1234 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
1066 1235 * possible control vq.
1067 err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names); 1236 */
1068 if (err) 1237 total_vqs = vi->max_queue_pairs * 2 +
1069 return err; 1238 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
1239
1240 /* Allocate space for find_vqs parameters */
1241 vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
1242 if (!vqs)
1243 goto err_vq;
1244 callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
1245 if (!callbacks)
1246 goto err_callback;
1247 names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
1248 if (!names)
1249 goto err_names;
1250
1251 /* Parameters for control virtqueue, if any */
1252 if (vi->has_cvq) {
1253 callbacks[total_vqs - 1] = NULL;
1254 names[total_vqs - 1] = "control";
1255 }
1070 1256
1071 vi->rq.vq = vqs[0]; 1257 /* Allocate/initialize parameters for send/receive virtqueues */
1072 vi->sq.vq = vqs[1]; 1258 for (i = 0; i < vi->max_queue_pairs; i++) {
1259 callbacks[rxq2vq(i)] = skb_recv_done;
1260 callbacks[txq2vq(i)] = skb_xmit_done;
1261 sprintf(vi->rq[i].name, "input.%d", i);
1262 sprintf(vi->sq[i].name, "output.%d", i);
1263 names[rxq2vq(i)] = vi->rq[i].name;
1264 names[txq2vq(i)] = vi->sq[i].name;
1265 }
1073 1266
1074 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { 1267 ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
1075 vi->cvq = vqs[2]; 1268 names);
1269 if (ret)
1270 goto err_find;
1076 1271
1272 if (vi->has_cvq) {
1273 vi->cvq = vqs[total_vqs - 1];
1077 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 1274 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
1078 vi->dev->features |= NETIF_F_HW_VLAN_FILTER; 1275 vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
1079 } 1276 }
1277
1278 for (i = 0; i < vi->max_queue_pairs; i++) {
1279 vi->rq[i].vq = vqs[rxq2vq(i)];
1280 vi->sq[i].vq = vqs[txq2vq(i)];
1281 }
1282
1283 kfree(names);
1284 kfree(callbacks);
1285 kfree(vqs);
1286
1080 return 0; 1287 return 0;
1288
1289err_find:
1290 kfree(names);
1291err_names:
1292 kfree(callbacks);
1293err_callback:
1294 kfree(vqs);
1295err_vq:
1296 return ret;
1297}
1298
1299static int virtnet_alloc_queues(struct virtnet_info *vi)
1300{
1301 int i;
1302
1303 vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
1304 if (!vi->sq)
1305 goto err_sq;
1306 vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
1307 if (!vi->sq)
1308 goto err_rq;
1309
1310 INIT_DELAYED_WORK(&vi->refill, refill_work);
1311 for (i = 0; i < vi->max_queue_pairs; i++) {
1312 vi->rq[i].pages = NULL;
1313 netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
1314 napi_weight);
1315
1316 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1317 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
1318 }
1319
1320 return 0;
1321
1322err_rq:
1323 kfree(vi->sq);
1324err_sq:
1325 return -ENOMEM;
1326}
1327
1328static int init_vqs(struct virtnet_info *vi)
1329{
1330 int ret;
1331
1332 /* Allocate send & receive queues */
1333 ret = virtnet_alloc_queues(vi);
1334 if (ret)
1335 goto err;
1336
1337 ret = virtnet_find_vqs(vi);
1338 if (ret)
1339 goto err_free;
1340
1341 virtnet_set_affinity(vi, true);
1342 return 0;
1343
1344err_free:
1345 virtnet_free_queues(vi);
1346err:
1347 return ret;
1081} 1348}
1082 1349
1083static int virtnet_probe(struct virtio_device *vdev) 1350static int virtnet_probe(struct virtio_device *vdev)
1084{ 1351{
1085 int err; 1352 int i, err;
1086 struct net_device *dev; 1353 struct net_device *dev;
1087 struct virtnet_info *vi; 1354 struct virtnet_info *vi;
1355 u16 max_queue_pairs;
1356
1357 /* Find if host supports multiqueue virtio_net device */
1358 err = virtio_config_val(vdev, VIRTIO_NET_F_MQ,
1359 offsetof(struct virtio_net_config,
1360 max_virtqueue_pairs), &max_queue_pairs);
1361
1362 /* We need at least 2 queue's */
1363 if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1364 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1365 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1366 max_queue_pairs = 1;
1088 1367
1089 /* Allocate ourselves a network device with room for our info */ 1368 /* Allocate ourselves a network device with room for our info */
1090 dev = alloc_etherdev(sizeof(struct virtnet_info)); 1369 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
1091 if (!dev) 1370 if (!dev)
1092 return -ENOMEM; 1371 return -ENOMEM;
1093 1372
@@ -1133,22 +1412,17 @@ static int virtnet_probe(struct virtio_device *vdev)
1133 1412
1134 /* Set up our device-specific information */ 1413 /* Set up our device-specific information */
1135 vi = netdev_priv(dev); 1414 vi = netdev_priv(dev);
1136 netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight);
1137 vi->dev = dev; 1415 vi->dev = dev;
1138 vi->vdev = vdev; 1416 vi->vdev = vdev;
1139 vdev->priv = vi; 1417 vdev->priv = vi;
1140 vi->rq.pages = NULL;
1141 vi->stats = alloc_percpu(struct virtnet_stats); 1418 vi->stats = alloc_percpu(struct virtnet_stats);
1142 err = -ENOMEM; 1419 err = -ENOMEM;
1143 if (vi->stats == NULL) 1420 if (vi->stats == NULL)
1144 goto free; 1421 goto free;
1145 1422
1146 INIT_DELAYED_WORK(&vi->refill, refill_work);
1147 mutex_init(&vi->config_lock); 1423 mutex_init(&vi->config_lock);
1148 vi->config_enable = true; 1424 vi->config_enable = true;
1149 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 1425 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
1150 sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg));
1151 sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg));
1152 1426
1153 /* If we can receive ANY GSO packets, we must allocate large ones. */ 1427 /* If we can receive ANY GSO packets, we must allocate large ones. */
1154 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 1428 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -1159,10 +1433,21 @@ static int virtnet_probe(struct virtio_device *vdev)
1159 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) 1433 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
1160 vi->mergeable_rx_bufs = true; 1434 vi->mergeable_rx_bufs = true;
1161 1435
1436 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1437 vi->has_cvq = true;
1438
1439 /* Use single tx/rx queue pair as default */
1440 vi->curr_queue_pairs = 1;
1441 vi->max_queue_pairs = max_queue_pairs;
1442
1443 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
1162 err = init_vqs(vi); 1444 err = init_vqs(vi);
1163 if (err) 1445 if (err)
1164 goto free_stats; 1446 goto free_stats;
1165 1447
1448 netif_set_real_num_tx_queues(dev, 1);
1449 netif_set_real_num_rx_queues(dev, 1);
1450
1166 err = register_netdev(dev); 1451 err = register_netdev(dev);
1167 if (err) { 1452 if (err) {
1168 pr_debug("virtio_net: registering device failed\n"); 1453 pr_debug("virtio_net: registering device failed\n");
@@ -1170,12 +1455,15 @@ static int virtnet_probe(struct virtio_device *vdev)
1170 } 1455 }
1171 1456
1172 /* Last of all, set up some receive buffers. */ 1457 /* Last of all, set up some receive buffers. */
1173 try_fill_recv(&vi->rq, GFP_KERNEL); 1458 for (i = 0; i < vi->max_queue_pairs; i++) {
1174 1459 try_fill_recv(&vi->rq[i], GFP_KERNEL);
1175 /* If we didn't even get one input buffer, we're useless. */ 1460
1176 if (vi->rq.num == 0) { 1461 /* If we didn't even get one input buffer, we're useless. */
1177 err = -ENOMEM; 1462 if (vi->rq[i].num == 0) {
1178 goto unregister; 1463 free_unused_bufs(vi);
1464 err = -ENOMEM;
1465 goto free_recv_bufs;
1466 }
1179 } 1467 }
1180 1468
1181 /* Assume link up if device can't report link status, 1469 /* Assume link up if device can't report link status,
@@ -1188,12 +1476,16 @@ static int virtnet_probe(struct virtio_device *vdev)
1188 netif_carrier_on(dev); 1476 netif_carrier_on(dev);
1189 } 1477 }
1190 1478
1191 pr_debug("virtnet: registered device %s\n", dev->name); 1479 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
1480 dev->name, max_queue_pairs);
1481
1192 return 0; 1482 return 0;
1193 1483
1194unregister: 1484free_recv_bufs:
1485 free_receive_bufs(vi);
1195 unregister_netdev(dev); 1486 unregister_netdev(dev);
1196free_vqs: 1487free_vqs:
1488 cancel_delayed_work_sync(&vi->refill);
1197 virtnet_del_vqs(vi); 1489 virtnet_del_vqs(vi);
1198free_stats: 1490free_stats:
1199 free_percpu(vi->stats); 1491 free_percpu(vi->stats);
@@ -1202,28 +1494,6 @@ free:
1202 return err; 1494 return err;
1203} 1495}
1204 1496
1205static void free_unused_bufs(struct virtnet_info *vi)
1206{
1207 void *buf;
1208 while (1) {
1209 buf = virtqueue_detach_unused_buf(vi->sq.vq);
1210 if (!buf)
1211 break;
1212 dev_kfree_skb(buf);
1213 }
1214 while (1) {
1215 buf = virtqueue_detach_unused_buf(vi->rq.vq);
1216 if (!buf)
1217 break;
1218 if (vi->mergeable_rx_bufs || vi->big_packets)
1219 give_pages(&vi->rq, buf);
1220 else
1221 dev_kfree_skb(buf);
1222 --vi->rq.num;
1223 }
1224 BUG_ON(vi->rq.num != 0);
1225}
1226
1227static void remove_vq_common(struct virtnet_info *vi) 1497static void remove_vq_common(struct virtnet_info *vi)
1228{ 1498{
1229 vi->vdev->config->reset(vi->vdev); 1499 vi->vdev->config->reset(vi->vdev);
@@ -1231,10 +1501,9 @@ static void remove_vq_common(struct virtnet_info *vi)
1231 /* Free unused buffers in both send and recv, if any. */ 1501 /* Free unused buffers in both send and recv, if any. */
1232 free_unused_bufs(vi); 1502 free_unused_bufs(vi);
1233 1503
1234 virtnet_del_vqs(vi); 1504 free_receive_bufs(vi);
1235 1505
1236 while (vi->rq.pages) 1506 virtnet_del_vqs(vi);
1237 __free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0);
1238} 1507}
1239 1508
1240static void virtnet_remove(struct virtio_device *vdev) 1509static void virtnet_remove(struct virtio_device *vdev)
@@ -1260,6 +1529,7 @@ static void virtnet_remove(struct virtio_device *vdev)
1260static int virtnet_freeze(struct virtio_device *vdev) 1529static int virtnet_freeze(struct virtio_device *vdev)
1261{ 1530{
1262 struct virtnet_info *vi = vdev->priv; 1531 struct virtnet_info *vi = vdev->priv;
1532 int i;
1263 1533
1264 /* Prevent config work handler from accessing the device */ 1534 /* Prevent config work handler from accessing the device */
1265 mutex_lock(&vi->config_lock); 1535 mutex_lock(&vi->config_lock);
@@ -1270,7 +1540,10 @@ static int virtnet_freeze(struct virtio_device *vdev)
1270 cancel_delayed_work_sync(&vi->refill); 1540 cancel_delayed_work_sync(&vi->refill);
1271 1541
1272 if (netif_running(vi->dev)) 1542 if (netif_running(vi->dev))
1273 napi_disable(&vi->rq.napi); 1543 for (i = 0; i < vi->max_queue_pairs; i++) {
1544 napi_disable(&vi->rq[i].napi);
1545 netif_napi_del(&vi->rq[i].napi);
1546 }
1274 1547
1275 remove_vq_common(vi); 1548 remove_vq_common(vi);
1276 1549
@@ -1282,24 +1555,28 @@ static int virtnet_freeze(struct virtio_device *vdev)
1282static int virtnet_restore(struct virtio_device *vdev) 1555static int virtnet_restore(struct virtio_device *vdev)
1283{ 1556{
1284 struct virtnet_info *vi = vdev->priv; 1557 struct virtnet_info *vi = vdev->priv;
1285 int err; 1558 int err, i;
1286 1559
1287 err = init_vqs(vi); 1560 err = init_vqs(vi);
1288 if (err) 1561 if (err)
1289 return err; 1562 return err;
1290 1563
1291 if (netif_running(vi->dev)) 1564 if (netif_running(vi->dev))
1292 virtnet_napi_enable(&vi->rq); 1565 for (i = 0; i < vi->max_queue_pairs; i++)
1566 virtnet_napi_enable(&vi->rq[i]);
1293 1567
1294 netif_device_attach(vi->dev); 1568 netif_device_attach(vi->dev);
1295 1569
1296 if (!try_fill_recv(&vi->rq, GFP_KERNEL)) 1570 for (i = 0; i < vi->max_queue_pairs; i++)
1297 schedule_delayed_work(&vi->refill, 0); 1571 if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
1572 schedule_delayed_work(&vi->refill, 0);
1298 1573
1299 mutex_lock(&vi->config_lock); 1574 mutex_lock(&vi->config_lock);
1300 vi->config_enable = true; 1575 vi->config_enable = true;
1301 mutex_unlock(&vi->config_lock); 1576 mutex_unlock(&vi->config_lock);
1302 1577
1578 virtnet_set_queues(vi, vi->curr_queue_pairs);
1579
1303 return 0; 1580 return 0;
1304} 1581}
1305#endif 1582#endif
@@ -1317,7 +1594,7 @@ static unsigned int features[] = {
1317 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, 1594 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
1318 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, 1595 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
1319 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, 1596 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
1320 VIRTIO_NET_F_GUEST_ANNOUNCE, 1597 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
1321}; 1598};
1322 1599
1323static struct virtio_driver virtio_net_driver = { 1600static struct virtio_driver virtio_net_driver = {
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 2470f541af50..848e3584d7c8 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -51,6 +51,8 @@
51#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ 51#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
52#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the 52#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the
53 * network */ 53 * network */
54#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow
55 * Steering */
54 56
55#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ 57#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
56#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ 58#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */
@@ -60,6 +62,11 @@ struct virtio_net_config {
60 __u8 mac[6]; 62 __u8 mac[6];
61 /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ 63 /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
62 __u16 status; 64 __u16 status;
65 /* Maximum number of each of transmit and receive queues;
66 * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ.
67 * Legal values are between 1 and 0x8000
68 */
69 __u16 max_virtqueue_pairs;
63} __attribute__((packed)); 70} __attribute__((packed));
64 71
65/* This is the first element of the scatter-gather list. If you don't 72/* This is the first element of the scatter-gather list. If you don't
@@ -166,4 +173,24 @@ struct virtio_net_ctrl_mac {
166#define VIRTIO_NET_CTRL_ANNOUNCE 3 173#define VIRTIO_NET_CTRL_ANNOUNCE 3
167 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 174 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
168 175
176/*
177 * Control Receive Flow Steering
178 *
179 * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
180 * enables Receive Flow Steering, specifying the number of the transmit and
181 * receive queues that will be used. After the command is consumed and acked by
182 * the device, the device will not steer new packets on receive virtqueues
183 * other than specified nor read from transmit virtqueues other than specified.
184 * Accordingly, driver should not transmit new packets on virtqueues other than
185 * specified.
186 */
187struct virtio_net_ctrl_mq {
188 u16 virtqueue_pairs;
189};
190
191#define VIRTIO_NET_CTRL_MQ 4
192 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0
193 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1
194 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000
195
169#endif /* _LINUX_VIRTIO_NET_H */ 196#endif /* _LINUX_VIRTIO_NET_H */