aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback/interface.c
diff options
context:
space:
mode:
authorWei Liu <wei.liu2@citrix.com>2014-06-04 05:30:42 -0400
committerDavid S. Miller <davem@davemloft.net>2014-06-04 17:48:16 -0400
commite9ce7cb6b107407e4798e8905b18ad8b642766f6 (patch)
treedd99d31fa4f2bae0e836c99a811e5de4e1202567 /drivers/net/xen-netback/interface.c
parenta55d9766cecf2b1b9af4fcf93b2d41b71e599c76 (diff)
xen-netback: Factor queue-specific data into queue struct
In preparation for multi-queue support in xen-netback, move the queue-specific data from struct xenvif into struct xenvif_queue, and update the rest of the code to use this. Also adds loops over queues where appropriate, even though only one is configured at this point, and uses alloc_netdev_mq() and the corresponding multi-queue netif wake/start/stop functions in preparation for multiple active queues. Finally, implements a trivial queue selection function suitable for ndo_select_queue, which simply returns 0 for a single queue and uses skb_get_hash() to compute the queue index otherwise. Signed-off-by: Andrew J. Bennieston <andrew.bennieston@citrix.com> Signed-off-by: Wei Liu <wei.liu2@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback/interface.c')
-rw-r--r--drivers/net/xen-netback/interface.c502
1 files changed, 326 insertions, 176 deletions
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 8fdedac3fab2..6005b5d1d404 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -34,7 +34,6 @@
34#include <linux/ethtool.h> 34#include <linux/ethtool.h>
35#include <linux/rtnetlink.h> 35#include <linux/rtnetlink.h>
36#include <linux/if_vlan.h> 36#include <linux/if_vlan.h>
37#include <linux/vmalloc.h>
38 37
39#include <xen/events.h> 38#include <xen/events.h>
40#include <asm/xen/hypercall.h> 39#include <asm/xen/hypercall.h>
@@ -43,6 +42,16 @@
43#define XENVIF_QUEUE_LENGTH 32 42#define XENVIF_QUEUE_LENGTH 32
44#define XENVIF_NAPI_WEIGHT 64 43#define XENVIF_NAPI_WEIGHT 64
45 44
45static inline void xenvif_stop_queue(struct xenvif_queue *queue)
46{
47 struct net_device *dev = queue->vif->dev;
48
49 if (!queue->vif->can_queue)
50 return;
51
52 netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
53}
54
46int xenvif_schedulable(struct xenvif *vif) 55int xenvif_schedulable(struct xenvif *vif)
47{ 56{
48 return netif_running(vif->dev) && netif_carrier_ok(vif->dev); 57 return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
@@ -50,33 +59,34 @@ int xenvif_schedulable(struct xenvif *vif)
50 59
51static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) 60static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
52{ 61{
53 struct xenvif *vif = dev_id; 62 struct xenvif_queue *queue = dev_id;
54 63
55 if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) 64 if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))
56 napi_schedule(&vif->napi); 65 napi_schedule(&queue->napi);
57 66
58 return IRQ_HANDLED; 67 return IRQ_HANDLED;
59} 68}
60 69
61static int xenvif_poll(struct napi_struct *napi, int budget) 70int xenvif_poll(struct napi_struct *napi, int budget)
62{ 71{
63 struct xenvif *vif = container_of(napi, struct xenvif, napi); 72 struct xenvif_queue *queue =
73 container_of(napi, struct xenvif_queue, napi);
64 int work_done; 74 int work_done;
65 75
66 /* This vif is rogue, we pretend we've there is nothing to do 76 /* This vif is rogue, we pretend we've there is nothing to do
67 * for this vif to deschedule it from NAPI. But this interface 77 * for this vif to deschedule it from NAPI. But this interface
68 * will be turned off in thread context later. 78 * will be turned off in thread context later.
69 */ 79 */
70 if (unlikely(vif->disabled)) { 80 if (unlikely(queue->vif->disabled)) {
71 napi_complete(napi); 81 napi_complete(napi);
72 return 0; 82 return 0;
73 } 83 }
74 84
75 work_done = xenvif_tx_action(vif, budget); 85 work_done = xenvif_tx_action(queue, budget);
76 86
77 if (work_done < budget) { 87 if (work_done < budget) {
78 napi_complete(napi); 88 napi_complete(napi);
79 xenvif_napi_schedule_or_enable_events(vif); 89 xenvif_napi_schedule_or_enable_events(queue);
80 } 90 }
81 91
82 return work_done; 92 return work_done;
@@ -84,9 +94,9 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
84 94
85static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) 95static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
86{ 96{
87 struct xenvif *vif = dev_id; 97 struct xenvif_queue *queue = dev_id;
88 98
89 xenvif_kick_thread(vif); 99 xenvif_kick_thread(queue);
90 100
91 return IRQ_HANDLED; 101 return IRQ_HANDLED;
92} 102}
@@ -99,28 +109,81 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
99 return IRQ_HANDLED; 109 return IRQ_HANDLED;
100} 110}
101 111
102static void xenvif_wake_queue(unsigned long data) 112int xenvif_queue_stopped(struct xenvif_queue *queue)
113{
114 struct net_device *dev = queue->vif->dev;
115 unsigned int id = queue->id;
116 return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id));
117}
118
119void xenvif_wake_queue(struct xenvif_queue *queue)
120{
121 struct net_device *dev = queue->vif->dev;
122 unsigned int id = queue->id;
123 netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
124}
125
126/* Callback to wake the queue and drain it on timeout */
127static void xenvif_wake_queue_callback(unsigned long data)
103{ 128{
104 struct xenvif *vif = (struct xenvif *)data; 129 struct xenvif_queue *queue = (struct xenvif_queue *)data;
130
131 if (xenvif_queue_stopped(queue)) {
132 netdev_err(queue->vif->dev, "draining TX queue\n");
133 queue->rx_queue_purge = true;
134 xenvif_kick_thread(queue);
135 xenvif_wake_queue(queue);
136 }
137}
105 138
106 if (netif_queue_stopped(vif->dev)) { 139static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
107 netdev_err(vif->dev, "draining TX queue\n"); 140 void *accel_priv, select_queue_fallback_t fallback)
108 vif->rx_queue_purge = true; 141{
109 xenvif_kick_thread(vif); 142 struct xenvif *vif = netdev_priv(dev);
110 netif_wake_queue(vif->dev); 143 unsigned int num_queues = dev->real_num_tx_queues;
144 u32 hash;
145 u16 queue_index;
146
147 /* First, check if there is only one queue to optimise the
148 * single-queue or old frontend scenario.
149 */
150 if (num_queues == 1) {
151 queue_index = 0;
152 } else {
153 /* Use skb_get_hash to obtain an L4 hash if available */
154 hash = skb_get_hash(skb);
155 queue_index = hash % num_queues;
111 } 156 }
157
158 return queue_index;
112} 159}
113 160
114static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) 161static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
115{ 162{
116 struct xenvif *vif = netdev_priv(dev); 163 struct xenvif *vif = netdev_priv(dev);
164 struct xenvif_queue *queue = NULL;
165 unsigned int num_queues = dev->real_num_tx_queues;
166 u16 index;
117 int min_slots_needed; 167 int min_slots_needed;
118 168
119 BUG_ON(skb->dev != dev); 169 BUG_ON(skb->dev != dev);
120 170
121 /* Drop the packet if vif is not ready */ 171 /* Drop the packet if queues are not set up */
122 if (vif->task == NULL || 172 if (num_queues < 1)
123 vif->dealloc_task == NULL || 173 goto drop;
174
175 /* Obtain the queue to be used to transmit this packet */
176 index = skb_get_queue_mapping(skb);
177 if (index >= num_queues) {
178 pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.",
179 index, vif->dev->name);
180 index %= num_queues;
181 }
182 queue = &vif->queues[index];
183
184 /* Drop the packet if queue is not ready */
185 if (queue->task == NULL ||
186 queue->dealloc_task == NULL ||
124 !xenvif_schedulable(vif)) 187 !xenvif_schedulable(vif))
125 goto drop; 188 goto drop;
126 189
@@ -139,16 +202,16 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
139 * then turn off the queue to give the ring a chance to 202 * then turn off the queue to give the ring a chance to
140 * drain. 203 * drain.
141 */ 204 */
142 if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) { 205 if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
143 vif->wake_queue.function = xenvif_wake_queue; 206 queue->wake_queue.function = xenvif_wake_queue_callback;
144 vif->wake_queue.data = (unsigned long)vif; 207 queue->wake_queue.data = (unsigned long)queue;
145 xenvif_stop_queue(vif); 208 xenvif_stop_queue(queue);
146 mod_timer(&vif->wake_queue, 209 mod_timer(&queue->wake_queue,
147 jiffies + rx_drain_timeout_jiffies); 210 jiffies + rx_drain_timeout_jiffies);
148 } 211 }
149 212
150 skb_queue_tail(&vif->rx_queue, skb); 213 skb_queue_tail(&queue->rx_queue, skb);
151 xenvif_kick_thread(vif); 214 xenvif_kick_thread(queue);
152 215
153 return NETDEV_TX_OK; 216 return NETDEV_TX_OK;
154 217
@@ -161,25 +224,65 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
161static struct net_device_stats *xenvif_get_stats(struct net_device *dev) 224static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
162{ 225{
163 struct xenvif *vif = netdev_priv(dev); 226 struct xenvif *vif = netdev_priv(dev);
227 struct xenvif_queue *queue = NULL;
228 unsigned int num_queues = dev->real_num_tx_queues;
229 unsigned long rx_bytes = 0;
230 unsigned long rx_packets = 0;
231 unsigned long tx_bytes = 0;
232 unsigned long tx_packets = 0;
233 unsigned int index;
234
235 if (vif->queues == NULL)
236 goto out;
237
238 /* Aggregate tx and rx stats from each queue */
239 for (index = 0; index < num_queues; ++index) {
240 queue = &vif->queues[index];
241 rx_bytes += queue->stats.rx_bytes;
242 rx_packets += queue->stats.rx_packets;
243 tx_bytes += queue->stats.tx_bytes;
244 tx_packets += queue->stats.tx_packets;
245 }
246
247out:
248 vif->dev->stats.rx_bytes = rx_bytes;
249 vif->dev->stats.rx_packets = rx_packets;
250 vif->dev->stats.tx_bytes = tx_bytes;
251 vif->dev->stats.tx_packets = tx_packets;
252
164 return &vif->dev->stats; 253 return &vif->dev->stats;
165} 254}
166 255
167static void xenvif_up(struct xenvif *vif) 256static void xenvif_up(struct xenvif *vif)
168{ 257{
169 napi_enable(&vif->napi); 258 struct xenvif_queue *queue = NULL;
170 enable_irq(vif->tx_irq); 259 unsigned int num_queues = vif->dev->real_num_tx_queues;
171 if (vif->tx_irq != vif->rx_irq) 260 unsigned int queue_index;
172 enable_irq(vif->rx_irq); 261
173 xenvif_napi_schedule_or_enable_events(vif); 262 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
263 queue = &vif->queues[queue_index];
264 napi_enable(&queue->napi);
265 enable_irq(queue->tx_irq);
266 if (queue->tx_irq != queue->rx_irq)
267 enable_irq(queue->rx_irq);
268 xenvif_napi_schedule_or_enable_events(queue);
269 }
174} 270}
175 271
176static void xenvif_down(struct xenvif *vif) 272static void xenvif_down(struct xenvif *vif)
177{ 273{
178 napi_disable(&vif->napi); 274 struct xenvif_queue *queue = NULL;
179 disable_irq(vif->tx_irq); 275 unsigned int num_queues = vif->dev->real_num_tx_queues;
180 if (vif->tx_irq != vif->rx_irq) 276 unsigned int queue_index;
181 disable_irq(vif->rx_irq); 277
182 del_timer_sync(&vif->credit_timeout); 278 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
279 queue = &vif->queues[queue_index];
280 napi_disable(&queue->napi);
281 disable_irq(queue->tx_irq);
282 if (queue->tx_irq != queue->rx_irq)
283 disable_irq(queue->rx_irq);
284 del_timer_sync(&queue->credit_timeout);
285 }
183} 286}
184 287
185static int xenvif_open(struct net_device *dev) 288static int xenvif_open(struct net_device *dev)
@@ -187,7 +290,7 @@ static int xenvif_open(struct net_device *dev)
187 struct xenvif *vif = netdev_priv(dev); 290 struct xenvif *vif = netdev_priv(dev);
188 if (netif_carrier_ok(dev)) 291 if (netif_carrier_ok(dev))
189 xenvif_up(vif); 292 xenvif_up(vif);
190 netif_start_queue(dev); 293 netif_tx_start_all_queues(dev);
191 return 0; 294 return 0;
192} 295}
193 296
@@ -196,7 +299,7 @@ static int xenvif_close(struct net_device *dev)
196 struct xenvif *vif = netdev_priv(dev); 299 struct xenvif *vif = netdev_priv(dev);
197 if (netif_carrier_ok(dev)) 300 if (netif_carrier_ok(dev))
198 xenvif_down(vif); 301 xenvif_down(vif);
199 netif_stop_queue(dev); 302 netif_tx_stop_all_queues(dev);
200 return 0; 303 return 0;
201} 304}
202 305
@@ -236,29 +339,29 @@ static const struct xenvif_stat {
236} xenvif_stats[] = { 339} xenvif_stats[] = {
237 { 340 {
238 "rx_gso_checksum_fixup", 341 "rx_gso_checksum_fixup",
239 offsetof(struct xenvif, rx_gso_checksum_fixup) 342 offsetof(struct xenvif_stats, rx_gso_checksum_fixup)
240 }, 343 },
241 /* If (sent != success + fail), there are probably packets never 344 /* If (sent != success + fail), there are probably packets never
242 * freed up properly! 345 * freed up properly!
243 */ 346 */
244 { 347 {
245 "tx_zerocopy_sent", 348 "tx_zerocopy_sent",
246 offsetof(struct xenvif, tx_zerocopy_sent), 349 offsetof(struct xenvif_stats, tx_zerocopy_sent),
247 }, 350 },
248 { 351 {
249 "tx_zerocopy_success", 352 "tx_zerocopy_success",
250 offsetof(struct xenvif, tx_zerocopy_success), 353 offsetof(struct xenvif_stats, tx_zerocopy_success),
251 }, 354 },
252 { 355 {
253 "tx_zerocopy_fail", 356 "tx_zerocopy_fail",
254 offsetof(struct xenvif, tx_zerocopy_fail) 357 offsetof(struct xenvif_stats, tx_zerocopy_fail)
255 }, 358 },
256 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use 359 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use
257 * a guest with the same MAX_SKB_FRAG 360 * a guest with the same MAX_SKB_FRAG
258 */ 361 */
259 { 362 {
260 "tx_frag_overflow", 363 "tx_frag_overflow",
261 offsetof(struct xenvif, tx_frag_overflow) 364 offsetof(struct xenvif_stats, tx_frag_overflow)
262 }, 365 },
263}; 366};
264 367
@@ -275,11 +378,20 @@ static int xenvif_get_sset_count(struct net_device *dev, int string_set)
275static void xenvif_get_ethtool_stats(struct net_device *dev, 378static void xenvif_get_ethtool_stats(struct net_device *dev,
276 struct ethtool_stats *stats, u64 * data) 379 struct ethtool_stats *stats, u64 * data)
277{ 380{
278 void *vif = netdev_priv(dev); 381 struct xenvif *vif = netdev_priv(dev);
382 unsigned int num_queues = dev->real_num_tx_queues;
279 int i; 383 int i;
280 384 unsigned int queue_index;
281 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) 385 struct xenvif_stats *vif_stats;
282 data[i] = *(unsigned long *)(vif + xenvif_stats[i].offset); 386
387 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
388 unsigned long accum = 0;
389 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
390 vif_stats = &vif->queues[queue_index].stats;
391 accum += *(unsigned long *)(vif_stats + xenvif_stats[i].offset);
392 }
393 data[i] = accum;
394 }
283} 395}
284 396
285static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data) 397static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
@@ -312,6 +424,7 @@ static const struct net_device_ops xenvif_netdev_ops = {
312 .ndo_fix_features = xenvif_fix_features, 424 .ndo_fix_features = xenvif_fix_features,
313 .ndo_set_mac_address = eth_mac_addr, 425 .ndo_set_mac_address = eth_mac_addr,
314 .ndo_validate_addr = eth_validate_addr, 426 .ndo_validate_addr = eth_validate_addr,
427 .ndo_select_queue = xenvif_select_queue,
315}; 428};
316 429
317struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, 430struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
@@ -321,10 +434,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
321 struct net_device *dev; 434 struct net_device *dev;
322 struct xenvif *vif; 435 struct xenvif *vif;
323 char name[IFNAMSIZ] = {}; 436 char name[IFNAMSIZ] = {};
324 int i;
325 437
326 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); 438 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
327 dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup); 439 dev = alloc_netdev_mq(sizeof(struct xenvif), name, ether_setup, 1);
328 if (dev == NULL) { 440 if (dev == NULL) {
329 pr_warn("Could not allocate netdev for %s\n", name); 441 pr_warn("Could not allocate netdev for %s\n", name);
330 return ERR_PTR(-ENOMEM); 442 return ERR_PTR(-ENOMEM);
@@ -339,15 +451,13 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
339 vif->can_sg = 1; 451 vif->can_sg = 1;
340 vif->ip_csum = 1; 452 vif->ip_csum = 1;
341 vif->dev = dev; 453 vif->dev = dev;
342
343 vif->disabled = false; 454 vif->disabled = false;
344 455
345 vif->credit_bytes = vif->remaining_credit = ~0UL; 456 /* Start out with no queues. The call below does not require
346 vif->credit_usec = 0UL; 457 * rtnl_lock() as it happens before register_netdev().
347 init_timer(&vif->credit_timeout); 458 */
348 vif->credit_window_start = get_jiffies_64(); 459 vif->queues = NULL;
349 460 netif_set_real_num_tx_queues(dev, 0);
350 init_timer(&vif->wake_queue);
351 461
352 dev->netdev_ops = &xenvif_netdev_ops; 462 dev->netdev_ops = &xenvif_netdev_ops;
353 dev->hw_features = NETIF_F_SG | 463 dev->hw_features = NETIF_F_SG |
@@ -358,34 +468,6 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
358 468
359 dev->tx_queue_len = XENVIF_QUEUE_LENGTH; 469 dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
360 470
361 skb_queue_head_init(&vif->rx_queue);
362 skb_queue_head_init(&vif->tx_queue);
363
364 vif->pending_cons = 0;
365 vif->pending_prod = MAX_PENDING_REQS;
366 for (i = 0; i < MAX_PENDING_REQS; i++)
367 vif->pending_ring[i] = i;
368 spin_lock_init(&vif->callback_lock);
369 spin_lock_init(&vif->response_lock);
370 /* If ballooning is disabled, this will consume real memory, so you
371 * better enable it. The long term solution would be to use just a
372 * bunch of valid page descriptors, without dependency on ballooning
373 */
374 err = alloc_xenballooned_pages(MAX_PENDING_REQS,
375 vif->mmap_pages,
376 false);
377 if (err) {
378 netdev_err(dev, "Could not reserve mmap_pages\n");
379 return ERR_PTR(-ENOMEM);
380 }
381 for (i = 0; i < MAX_PENDING_REQS; i++) {
382 vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
383 { .callback = xenvif_zerocopy_callback,
384 .ctx = NULL,
385 .desc = i };
386 vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
387 }
388
389 /* 471 /*
390 * Initialise a dummy MAC address. We choose the numerically 472 * Initialise a dummy MAC address. We choose the numerically
391 * largest non-broadcast address to prevent the address getting 473 * largest non-broadcast address to prevent the address getting
@@ -395,8 +477,6 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
395 memset(dev->dev_addr, 0xFF, ETH_ALEN); 477 memset(dev->dev_addr, 0xFF, ETH_ALEN);
396 dev->dev_addr[0] &= ~0x01; 478 dev->dev_addr[0] &= ~0x01;
397 479
398 netif_napi_add(dev, &vif->napi, xenvif_poll, XENVIF_NAPI_WEIGHT);
399
400 netif_carrier_off(dev); 480 netif_carrier_off(dev);
401 481
402 err = register_netdev(dev); 482 err = register_netdev(dev);
@@ -413,98 +493,147 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
413 return vif; 493 return vif;
414} 494}
415 495
416int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, 496int xenvif_init_queue(struct xenvif_queue *queue)
497{
498 int err, i;
499
500 queue->credit_bytes = queue->remaining_credit = ~0UL;
501 queue->credit_usec = 0UL;
502 init_timer(&queue->credit_timeout);
503 queue->credit_window_start = get_jiffies_64();
504
505 skb_queue_head_init(&queue->rx_queue);
506 skb_queue_head_init(&queue->tx_queue);
507
508 queue->pending_cons = 0;
509 queue->pending_prod = MAX_PENDING_REQS;
510 for (i = 0; i < MAX_PENDING_REQS; ++i)
511 queue->pending_ring[i] = i;
512
513 spin_lock_init(&queue->callback_lock);
514 spin_lock_init(&queue->response_lock);
515
516 /* If ballooning is disabled, this will consume real memory, so you
517 * better enable it. The long term solution would be to use just a
518 * bunch of valid page descriptors, without dependency on ballooning
519 */
520 err = alloc_xenballooned_pages(MAX_PENDING_REQS,
521 queue->mmap_pages,
522 false);
523 if (err) {
524 netdev_err(queue->vif->dev, "Could not reserve mmap_pages\n");
525 return -ENOMEM;
526 }
527
528 for (i = 0; i < MAX_PENDING_REQS; i++) {
529 queue->pending_tx_info[i].callback_struct = (struct ubuf_info)
530 { .callback = xenvif_zerocopy_callback,
531 .ctx = NULL,
532 .desc = i };
533 queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
534 }
535
536 init_timer(&queue->wake_queue);
537
538 netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
539 XENVIF_NAPI_WEIGHT);
540
541 return 0;
542}
543
544void xenvif_carrier_on(struct xenvif *vif)
545{
546 rtnl_lock();
547 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
548 dev_set_mtu(vif->dev, ETH_DATA_LEN);
549 netdev_update_features(vif->dev);
550 netif_carrier_on(vif->dev);
551 if (netif_running(vif->dev))
552 xenvif_up(vif);
553 rtnl_unlock();
554}
555
556int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
417 unsigned long rx_ring_ref, unsigned int tx_evtchn, 557 unsigned long rx_ring_ref, unsigned int tx_evtchn,
418 unsigned int rx_evtchn) 558 unsigned int rx_evtchn)
419{ 559{
420 struct task_struct *task; 560 struct task_struct *task;
421 int err = -ENOMEM; 561 int err = -ENOMEM;
422 562
423 BUG_ON(vif->tx_irq); 563 BUG_ON(queue->tx_irq);
424 BUG_ON(vif->task); 564 BUG_ON(queue->task);
425 BUG_ON(vif->dealloc_task); 565 BUG_ON(queue->dealloc_task);
426 566
427 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); 567 err = xenvif_map_frontend_rings(queue, tx_ring_ref, rx_ring_ref);
428 if (err < 0) 568 if (err < 0)
429 goto err; 569 goto err;
430 570
431 init_waitqueue_head(&vif->wq); 571 init_waitqueue_head(&queue->wq);
432 init_waitqueue_head(&vif->dealloc_wq); 572 init_waitqueue_head(&queue->dealloc_wq);
433 573
434 if (tx_evtchn == rx_evtchn) { 574 if (tx_evtchn == rx_evtchn) {
435 /* feature-split-event-channels == 0 */ 575 /* feature-split-event-channels == 0 */
436 err = bind_interdomain_evtchn_to_irqhandler( 576 err = bind_interdomain_evtchn_to_irqhandler(
437 vif->domid, tx_evtchn, xenvif_interrupt, 0, 577 queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
438 vif->dev->name, vif); 578 queue->name, queue);
439 if (err < 0) 579 if (err < 0)
440 goto err_unmap; 580 goto err_unmap;
441 vif->tx_irq = vif->rx_irq = err; 581 queue->tx_irq = queue->rx_irq = err;
442 disable_irq(vif->tx_irq); 582 disable_irq(queue->tx_irq);
443 } else { 583 } else {
444 /* feature-split-event-channels == 1 */ 584 /* feature-split-event-channels == 1 */
445 snprintf(vif->tx_irq_name, sizeof(vif->tx_irq_name), 585 snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
446 "%s-tx", vif->dev->name); 586 "%s-tx", queue->name);
447 err = bind_interdomain_evtchn_to_irqhandler( 587 err = bind_interdomain_evtchn_to_irqhandler(
448 vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, 588 queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
449 vif->tx_irq_name, vif); 589 queue->tx_irq_name, queue);
450 if (err < 0) 590 if (err < 0)
451 goto err_unmap; 591 goto err_unmap;
452 vif->tx_irq = err; 592 queue->tx_irq = err;
453 disable_irq(vif->tx_irq); 593 disable_irq(queue->tx_irq);
454 594
455 snprintf(vif->rx_irq_name, sizeof(vif->rx_irq_name), 595 snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
456 "%s-rx", vif->dev->name); 596 "%s-rx", queue->name);
457 err = bind_interdomain_evtchn_to_irqhandler( 597 err = bind_interdomain_evtchn_to_irqhandler(
458 vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, 598 queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
459 vif->rx_irq_name, vif); 599 queue->rx_irq_name, queue);
460 if (err < 0) 600 if (err < 0)
461 goto err_tx_unbind; 601 goto err_tx_unbind;
462 vif->rx_irq = err; 602 queue->rx_irq = err;
463 disable_irq(vif->rx_irq); 603 disable_irq(queue->rx_irq);
464 } 604 }
465 605
466 task = kthread_create(xenvif_kthread_guest_rx, 606 task = kthread_create(xenvif_kthread_guest_rx,
467 (void *)vif, "%s-guest-rx", vif->dev->name); 607 (void *)queue, "%s-guest-rx", queue->name);
468 if (IS_ERR(task)) { 608 if (IS_ERR(task)) {
469 pr_warn("Could not allocate kthread for %s\n", vif->dev->name); 609 pr_warn("Could not allocate kthread for %s\n", queue->name);
470 err = PTR_ERR(task); 610 err = PTR_ERR(task);
471 goto err_rx_unbind; 611 goto err_rx_unbind;
472 } 612 }
473 613 queue->task = task;
474 vif->task = task;
475 614
476 task = kthread_create(xenvif_dealloc_kthread, 615 task = kthread_create(xenvif_dealloc_kthread,
477 (void *)vif, "%s-dealloc", vif->dev->name); 616 (void *)queue, "%s-dealloc", queue->name);
478 if (IS_ERR(task)) { 617 if (IS_ERR(task)) {
479 pr_warn("Could not allocate kthread for %s\n", vif->dev->name); 618 pr_warn("Could not allocate kthread for %s\n", queue->name);
480 err = PTR_ERR(task); 619 err = PTR_ERR(task);
481 goto err_rx_unbind; 620 goto err_rx_unbind;
482 } 621 }
622 queue->dealloc_task = task;
483 623
484 vif->dealloc_task = task; 624 wake_up_process(queue->task);
485 625 wake_up_process(queue->dealloc_task);
486 rtnl_lock();
487 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
488 dev_set_mtu(vif->dev, ETH_DATA_LEN);
489 netdev_update_features(vif->dev);
490 netif_carrier_on(vif->dev);
491 if (netif_running(vif->dev))
492 xenvif_up(vif);
493 rtnl_unlock();
494
495 wake_up_process(vif->task);
496 wake_up_process(vif->dealloc_task);
497 626
498 return 0; 627 return 0;
499 628
500err_rx_unbind: 629err_rx_unbind:
501 unbind_from_irqhandler(vif->rx_irq, vif); 630 unbind_from_irqhandler(queue->rx_irq, queue);
502 vif->rx_irq = 0; 631 queue->rx_irq = 0;
503err_tx_unbind: 632err_tx_unbind:
504 unbind_from_irqhandler(vif->tx_irq, vif); 633 unbind_from_irqhandler(queue->tx_irq, queue);
505 vif->tx_irq = 0; 634 queue->tx_irq = 0;
506err_unmap: 635err_unmap:
507 xenvif_unmap_frontend_rings(vif); 636 xenvif_unmap_frontend_rings(queue);
508err: 637err:
509 module_put(THIS_MODULE); 638 module_put(THIS_MODULE);
510 return err; 639 return err;
@@ -521,38 +650,67 @@ void xenvif_carrier_off(struct xenvif *vif)
521 rtnl_unlock(); 650 rtnl_unlock();
522} 651}
523 652
653static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
654 unsigned int worst_case_skb_lifetime)
655{
656 int i, unmap_timeout = 0;
657
658 for (i = 0; i < MAX_PENDING_REQS; ++i) {
659 if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
660 unmap_timeout++;
661 schedule_timeout(msecs_to_jiffies(1000));
662 if (unmap_timeout > worst_case_skb_lifetime &&
663 net_ratelimit())
664 netdev_err(queue->vif->dev,
665 "Page still granted! Index: %x\n",
666 i);
667 i = -1;
668 }
669 }
670}
671
524void xenvif_disconnect(struct xenvif *vif) 672void xenvif_disconnect(struct xenvif *vif)
525{ 673{
674 struct xenvif_queue *queue = NULL;
675 unsigned int num_queues = vif->dev->real_num_tx_queues;
676 unsigned int queue_index;
677
526 if (netif_carrier_ok(vif->dev)) 678 if (netif_carrier_ok(vif->dev))
527 xenvif_carrier_off(vif); 679 xenvif_carrier_off(vif);
528 680
529 if (vif->task) { 681 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
530 del_timer_sync(&vif->wake_queue); 682 queue = &vif->queues[queue_index];
531 kthread_stop(vif->task);
532 vif->task = NULL;
533 }
534 683
535 if (vif->dealloc_task) { 684 if (queue->task) {
536 kthread_stop(vif->dealloc_task); 685 del_timer_sync(&queue->wake_queue);
537 vif->dealloc_task = NULL; 686 kthread_stop(queue->task);
538 } 687 queue->task = NULL;
688 }
539 689
540 if (vif->tx_irq) { 690 if (queue->dealloc_task) {
541 if (vif->tx_irq == vif->rx_irq) 691 kthread_stop(queue->dealloc_task);
542 unbind_from_irqhandler(vif->tx_irq, vif); 692 queue->dealloc_task = NULL;
543 else { 693 }
544 unbind_from_irqhandler(vif->tx_irq, vif); 694
545 unbind_from_irqhandler(vif->rx_irq, vif); 695 if (queue->tx_irq) {
696 if (queue->tx_irq == queue->rx_irq)
697 unbind_from_irqhandler(queue->tx_irq, queue);
698 else {
699 unbind_from_irqhandler(queue->tx_irq, queue);
700 unbind_from_irqhandler(queue->rx_irq, queue);
701 }
702 queue->tx_irq = 0;
546 } 703 }
547 vif->tx_irq = 0;
548 }
549 704
550 xenvif_unmap_frontend_rings(vif); 705 xenvif_unmap_frontend_rings(queue);
706 }
551} 707}
552 708
553void xenvif_free(struct xenvif *vif) 709void xenvif_free(struct xenvif *vif)
554{ 710{
555 int i, unmap_timeout = 0; 711 struct xenvif_queue *queue = NULL;
712 unsigned int num_queues = vif->dev->real_num_tx_queues;
713 unsigned int queue_index;
556 /* Here we want to avoid timeout messages if an skb can be legitimately 714 /* Here we want to avoid timeout messages if an skb can be legitimately
557 * stuck somewhere else. Realistically this could be an another vif's 715 * stuck somewhere else. Realistically this could be an another vif's
558 * internal or QDisc queue. That another vif also has this 716 * internal or QDisc queue. That another vif also has this
@@ -567,31 +725,23 @@ void xenvif_free(struct xenvif *vif)
567 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * 725 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
568 DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS)); 726 DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
569 727
570 for (i = 0; i < MAX_PENDING_REQS; ++i) { 728 unregister_netdev(vif->dev);
571 if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
572 unmap_timeout++;
573 schedule_timeout(msecs_to_jiffies(1000));
574 if (unmap_timeout > worst_case_skb_lifetime &&
575 net_ratelimit())
576 netdev_err(vif->dev,
577 "Page still granted! Index: %x\n",
578 i);
579 /* If there are still unmapped pages, reset the loop to
580 * start checking again. We shouldn't exit here until
581 * dealloc thread and NAPI instance release all the
582 * pages. If a kernel bug causes the skbs to stall
583 * somewhere, the interface cannot be brought down
584 * properly.
585 */
586 i = -1;
587 }
588 }
589 729
590 free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages); 730 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
731 queue = &vif->queues[queue_index];
591 732
592 netif_napi_del(&vif->napi); 733 xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
734 free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
593 735
594 unregister_netdev(vif->dev); 736 netif_napi_del(&queue->napi);
737 }
738
739 /* Free the array of queues. The call below does not require
740 * rtnl_lock() because it happens after unregister_netdev().
741 */
742 netif_set_real_num_tx_queues(vif->dev, 0);
743 vfree(vif->queues);
744 vif->queues = NULL;
595 745
596 free_netdev(vif->dev); 746 free_netdev(vif->dev);
597 747