aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback
diff options
context:
space:
mode:
authorWei Liu <wei.liu2@citrix.com>2013-08-26 07:59:38 -0400
committerDavid S. Miller <davem@davemloft.net>2013-08-29 01:18:04 -0400
commitb3f980bd827e6e81a050c518d60ed7811a83061d (patch)
tree2104c030c277c9eab340ab424fa669562fc0189a /drivers/net/xen-netback
parent43e9d1943278e96150b449ea1fa81f4ae27729d5 (diff)
xen-netback: switch to NAPI + kthread 1:1 model
This patch implements 1:1 model netback. NAPI and kthread are utilized to do the weight-lifting job: - NAPI is used for guest side TX (host side RX) - kthread is used for guest side RX (host side TX) Xenvif and xen_netbk are made into one structure to reduce code size. This model provides better scheduling fairness among vifs. It is also prerequisite for implementing multiqueue for Xen netback. Signed-off-by: Wei Liu <wei.liu2@citrix.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback')
-rw-r--r--drivers/net/xen-netback/common.h132
-rw-r--r--drivers/net/xen-netback/interface.c119
-rw-r--r--drivers/net/xen-netback/netback.c607
3 files changed, 351 insertions, 507 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 8a4d77ee9c5b..9c1f15872e17 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -45,31 +45,109 @@
45#include <xen/grant_table.h> 45#include <xen/grant_table.h>
46#include <xen/xenbus.h> 46#include <xen/xenbus.h>
47 47
48struct xen_netbk; 48typedef unsigned int pending_ring_idx_t;
49#define INVALID_PENDING_RING_IDX (~0U)
50
51/* For the head field in pending_tx_info: it is used to indicate
52 * whether this tx info is the head of one or more coalesced requests.
53 *
54 * When head != INVALID_PENDING_RING_IDX, it means the start of a new
55 * tx requests queue and the end of previous queue.
56 *
57 * An example sequence of head fields (I = INVALID_PENDING_RING_IDX):
58 *
59 * ...|0 I I I|5 I|9 I I I|...
60 * -->|<-INUSE----------------
61 *
62 * After consuming the first slot(s) we have:
63 *
64 * ...|V V V V|5 I|9 I I I|...
65 * -----FREE->|<-INUSE--------
66 *
67 * where V stands for "valid pending ring index". Any number other
68 * than INVALID_PENDING_RING_IDX is OK. These entries are considered
69 * free and can contain any number other than
70 * INVALID_PENDING_RING_IDX. In practice we use 0.
71 *
72 * The in use non-INVALID_PENDING_RING_IDX (say 0, 5 and 9 in the
73 * above example) number is the index into pending_tx_info and
74 * mmap_pages arrays.
75 */
76struct pending_tx_info {
77 struct xen_netif_tx_request req; /* coalesced tx request */
78 pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
79 * if it is head of one or more tx
80 * reqs
81 */
82};
83
84#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
85#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
86
87struct xenvif_rx_meta {
88 int id;
89 int size;
90 int gso_size;
91};
92
93/* Discriminate from any valid pending_idx value. */
94#define INVALID_PENDING_IDX 0xFFFF
95
96#define MAX_BUFFER_OFFSET PAGE_SIZE
97
98#define MAX_PENDING_REQS 256
49 99
50struct xenvif { 100struct xenvif {
51 /* Unique identifier for this interface. */ 101 /* Unique identifier for this interface. */
52 domid_t domid; 102 domid_t domid;
53 unsigned int handle; 103 unsigned int handle;
54 104
55 /* Reference to netback processing backend. */ 105 /* Use NAPI for guest TX */
56 struct xen_netbk *netbk; 106 struct napi_struct napi;
107 /* When feature-split-event-channels = 0, tx_irq = rx_irq. */
108 unsigned int tx_irq;
109 /* Only used when feature-split-event-channels = 1 */
110 char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
111 struct xen_netif_tx_back_ring tx;
112 struct sk_buff_head tx_queue;
113 struct page *mmap_pages[MAX_PENDING_REQS];
114 pending_ring_idx_t pending_prod;
115 pending_ring_idx_t pending_cons;
116 u16 pending_ring[MAX_PENDING_REQS];
117 struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
118
119 /* Coalescing tx requests before copying makes number of grant
120 * copy ops greater or equal to number of slots required. In
121 * worst case a tx request consumes 2 gnttab_copy.
122 */
123 struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
57 124
58 u8 fe_dev_addr[6];
59 125
126 /* Use kthread for guest RX */
127 struct task_struct *task;
128 wait_queue_head_t wq;
60 /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ 129 /* When feature-split-event-channels = 0, tx_irq = rx_irq. */
61 unsigned int tx_irq;
62 unsigned int rx_irq; 130 unsigned int rx_irq;
63 /* Only used when feature-split-event-channels = 1 */ 131 /* Only used when feature-split-event-channels = 1 */
64 char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
65 char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */ 132 char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
133 struct xen_netif_rx_back_ring rx;
134 struct sk_buff_head rx_queue;
66 135
67 /* List of frontends to notify after a batch of frames sent. */ 136 /* Allow xenvif_start_xmit() to peek ahead in the rx request
68 struct list_head notify_list; 137 * ring. This is a prediction of what rx_req_cons will be
138 * once all queued skbs are put on the ring.
139 */
140 RING_IDX rx_req_cons_peek;
141
142 /* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
143 * head/fragment page uses 2 copy operations because it
144 * straddles two buffers in the frontend.
145 */
146 struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
147 struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
69 148
70 /* The shared rings and indexes. */ 149
71 struct xen_netif_tx_back_ring tx; 150 u8 fe_dev_addr[6];
72 struct xen_netif_rx_back_ring rx;
73 151
74 /* Frontend feature information. */ 152 /* Frontend feature information. */
75 u8 can_sg:1; 153 u8 can_sg:1;
@@ -80,13 +158,6 @@ struct xenvif {
80 /* Internal feature information. */ 158 /* Internal feature information. */
81 u8 can_queue:1; /* can queue packets for receiver? */ 159 u8 can_queue:1; /* can queue packets for receiver? */
82 160
83 /*
84 * Allow xenvif_start_xmit() to peek ahead in the rx request
85 * ring. This is a prediction of what rx_req_cons will be
86 * once all queued skbs are put on the ring.
87 */
88 RING_IDX rx_req_cons_peek;
89
90 /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ 161 /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
91 unsigned long credit_bytes; 162 unsigned long credit_bytes;
92 unsigned long credit_usec; 163 unsigned long credit_usec;
@@ -97,11 +168,7 @@ struct xenvif {
97 unsigned long rx_gso_checksum_fixup; 168 unsigned long rx_gso_checksum_fixup;
98 169
99 /* Miscellaneous private stuff. */ 170 /* Miscellaneous private stuff. */
100 struct list_head schedule_list;
101 atomic_t refcnt;
102 struct net_device *dev; 171 struct net_device *dev;
103
104 wait_queue_head_t waiting_to_free;
105}; 172};
106 173
107static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif) 174static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
@@ -109,9 +176,6 @@ static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
109 return to_xenbus_device(vif->dev->dev.parent); 176 return to_xenbus_device(vif->dev->dev.parent);
110} 177}
111 178
112#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
113#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
114
115struct xenvif *xenvif_alloc(struct device *parent, 179struct xenvif *xenvif_alloc(struct device *parent,
116 domid_t domid, 180 domid_t domid,
117 unsigned int handle); 181 unsigned int handle);
@@ -121,9 +185,6 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
121 unsigned int rx_evtchn); 185 unsigned int rx_evtchn);
122void xenvif_disconnect(struct xenvif *vif); 186void xenvif_disconnect(struct xenvif *vif);
123 187
124void xenvif_get(struct xenvif *vif);
125void xenvif_put(struct xenvif *vif);
126
127int xenvif_xenbus_init(void); 188int xenvif_xenbus_init(void);
128void xenvif_xenbus_fini(void); 189void xenvif_xenbus_fini(void);
129 190
@@ -139,18 +200,8 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
139 grant_ref_t tx_ring_ref, 200 grant_ref_t tx_ring_ref,
140 grant_ref_t rx_ring_ref); 201 grant_ref_t rx_ring_ref);
141 202
142/* (De)Register a xenvif with the netback backend. */
143void xen_netbk_add_xenvif(struct xenvif *vif);
144void xen_netbk_remove_xenvif(struct xenvif *vif);
145
146/* (De)Schedule backend processing for a xenvif */
147void xen_netbk_schedule_xenvif(struct xenvif *vif);
148void xen_netbk_deschedule_xenvif(struct xenvif *vif);
149
150/* Check for SKBs from frontend and schedule backend processing */ 203/* Check for SKBs from frontend and schedule backend processing */
151void xen_netbk_check_rx_xenvif(struct xenvif *vif); 204void xen_netbk_check_rx_xenvif(struct xenvif *vif);
152/* Receive an SKB from the frontend */
153void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb);
154 205
155/* Queue an SKB for transmission to the frontend */ 206/* Queue an SKB for transmission to the frontend */
156void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb); 207void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
@@ -163,6 +214,11 @@ void xenvif_carrier_off(struct xenvif *vif);
163/* Returns number of ring slots required to send an skb to the frontend */ 214/* Returns number of ring slots required to send an skb to the frontend */
164unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb); 215unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
165 216
217int xen_netbk_tx_action(struct xenvif *vif, int budget);
218void xen_netbk_rx_action(struct xenvif *vif);
219
220int xen_netbk_kthread(void *data);
221
166extern bool separate_tx_rx_irq; 222extern bool separate_tx_rx_irq;
167 223
168#endif /* __XEN_NETBACK__COMMON_H__ */ 224#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 087d2db0389d..44d6b707c77e 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -30,6 +30,7 @@
30 30
31#include "common.h" 31#include "common.h"
32 32
33#include <linux/kthread.h>
33#include <linux/ethtool.h> 34#include <linux/ethtool.h>
34#include <linux/rtnetlink.h> 35#include <linux/rtnetlink.h>
35#include <linux/if_vlan.h> 36#include <linux/if_vlan.h>
@@ -38,17 +39,7 @@
38#include <asm/xen/hypercall.h> 39#include <asm/xen/hypercall.h>
39 40
40#define XENVIF_QUEUE_LENGTH 32 41#define XENVIF_QUEUE_LENGTH 32
41 42#define XENVIF_NAPI_WEIGHT 64
42void xenvif_get(struct xenvif *vif)
43{
44 atomic_inc(&vif->refcnt);
45}
46
47void xenvif_put(struct xenvif *vif)
48{
49 if (atomic_dec_and_test(&vif->refcnt))
50 wake_up(&vif->waiting_to_free);
51}
52 43
53int xenvif_schedulable(struct xenvif *vif) 44int xenvif_schedulable(struct xenvif *vif)
54{ 45{
@@ -64,21 +55,55 @@ static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
64{ 55{
65 struct xenvif *vif = dev_id; 56 struct xenvif *vif = dev_id;
66 57
67 if (vif->netbk == NULL) 58 if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))
68 return IRQ_HANDLED; 59 napi_schedule(&vif->napi);
69
70 xen_netbk_schedule_xenvif(vif);
71 60
72 return IRQ_HANDLED; 61 return IRQ_HANDLED;
73} 62}
74 63
64static int xenvif_poll(struct napi_struct *napi, int budget)
65{
66 struct xenvif *vif = container_of(napi, struct xenvif, napi);
67 int work_done;
68
69 work_done = xen_netbk_tx_action(vif, budget);
70
71 if (work_done < budget) {
72 int more_to_do = 0;
73 unsigned long flags;
74
75 /* It is necessary to disable IRQ before calling
76 * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might
77 * lose event from the frontend.
78 *
79 * Consider:
80 * RING_HAS_UNCONSUMED_REQUESTS
81 * <frontend generates event to trigger napi_schedule>
82 * __napi_complete
83 *
84 * This handler is still in scheduled state so the
85 * event has no effect at all. After __napi_complete
86 * this handler is descheduled and cannot get
87 * scheduled again. We lose event in this case and the ring
88 * will be completely stalled.
89 */
90
91 local_irq_save(flags);
92
93 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
94 if (!more_to_do)
95 __napi_complete(napi);
96
97 local_irq_restore(flags);
98 }
99
100 return work_done;
101}
102
75static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) 103static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
76{ 104{
77 struct xenvif *vif = dev_id; 105 struct xenvif *vif = dev_id;
78 106
79 if (vif->netbk == NULL)
80 return IRQ_HANDLED;
81
82 if (xenvif_rx_schedulable(vif)) 107 if (xenvif_rx_schedulable(vif))
83 netif_wake_queue(vif->dev); 108 netif_wake_queue(vif->dev);
84 109
@@ -99,7 +124,8 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
99 124
100 BUG_ON(skb->dev != dev); 125 BUG_ON(skb->dev != dev);
101 126
102 if (vif->netbk == NULL) 127 /* Drop the packet if vif is not ready */
128 if (vif->task == NULL)
103 goto drop; 129 goto drop;
104 130
105 /* Drop the packet if the target domain has no receive buffers. */ 131 /* Drop the packet if the target domain has no receive buffers. */
@@ -108,7 +134,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
108 134
109 /* Reserve ring slots for the worst-case number of fragments. */ 135 /* Reserve ring slots for the worst-case number of fragments. */
110 vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb); 136 vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
111 xenvif_get(vif);
112 137
113 if (vif->can_queue && xen_netbk_must_stop_queue(vif)) 138 if (vif->can_queue && xen_netbk_must_stop_queue(vif))
114 netif_stop_queue(dev); 139 netif_stop_queue(dev);
@@ -123,11 +148,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
123 return NETDEV_TX_OK; 148 return NETDEV_TX_OK;
124} 149}
125 150
126void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb)
127{
128 netif_rx_ni(skb);
129}
130
131void xenvif_notify_tx_completion(struct xenvif *vif) 151void xenvif_notify_tx_completion(struct xenvif *vif)
132{ 152{
133 if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif)) 153 if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
@@ -142,7 +162,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
142 162
143static void xenvif_up(struct xenvif *vif) 163static void xenvif_up(struct xenvif *vif)
144{ 164{
145 xen_netbk_add_xenvif(vif); 165 napi_enable(&vif->napi);
146 enable_irq(vif->tx_irq); 166 enable_irq(vif->tx_irq);
147 if (vif->tx_irq != vif->rx_irq) 167 if (vif->tx_irq != vif->rx_irq)
148 enable_irq(vif->rx_irq); 168 enable_irq(vif->rx_irq);
@@ -151,12 +171,11 @@ static void xenvif_up(struct xenvif *vif)
151 171
152static void xenvif_down(struct xenvif *vif) 172static void xenvif_down(struct xenvif *vif)
153{ 173{
174 napi_disable(&vif->napi);
154 disable_irq(vif->tx_irq); 175 disable_irq(vif->tx_irq);
155 if (vif->tx_irq != vif->rx_irq) 176 if (vif->tx_irq != vif->rx_irq)
156 disable_irq(vif->rx_irq); 177 disable_irq(vif->rx_irq);
157 del_timer_sync(&vif->credit_timeout); 178 del_timer_sync(&vif->credit_timeout);
158 xen_netbk_deschedule_xenvif(vif);
159 xen_netbk_remove_xenvif(vif);
160} 179}
161 180
162static int xenvif_open(struct net_device *dev) 181static int xenvif_open(struct net_device *dev)
@@ -272,11 +291,12 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
272 struct net_device *dev; 291 struct net_device *dev;
273 struct xenvif *vif; 292 struct xenvif *vif;
274 char name[IFNAMSIZ] = {}; 293 char name[IFNAMSIZ] = {};
294 int i;
275 295
276 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); 296 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
277 dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup); 297 dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
278 if (dev == NULL) { 298 if (dev == NULL) {
279 pr_warn("Could not allocate netdev\n"); 299 pr_warn("Could not allocate netdev for %s\n", name);
280 return ERR_PTR(-ENOMEM); 300 return ERR_PTR(-ENOMEM);
281 } 301 }
282 302
@@ -285,14 +305,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
285 vif = netdev_priv(dev); 305 vif = netdev_priv(dev);
286 vif->domid = domid; 306 vif->domid = domid;
287 vif->handle = handle; 307 vif->handle = handle;
288 vif->netbk = NULL;
289 vif->can_sg = 1; 308 vif->can_sg = 1;
290 vif->csum = 1; 309 vif->csum = 1;
291 atomic_set(&vif->refcnt, 1);
292 init_waitqueue_head(&vif->waiting_to_free);
293 vif->dev = dev; 310 vif->dev = dev;
294 INIT_LIST_HEAD(&vif->schedule_list);
295 INIT_LIST_HEAD(&vif->notify_list);
296 311
297 vif->credit_bytes = vif->remaining_credit = ~0UL; 312 vif->credit_bytes = vif->remaining_credit = ~0UL;
298 vif->credit_usec = 0UL; 313 vif->credit_usec = 0UL;
@@ -307,6 +322,16 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
307 322
308 dev->tx_queue_len = XENVIF_QUEUE_LENGTH; 323 dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
309 324
325 skb_queue_head_init(&vif->rx_queue);
326 skb_queue_head_init(&vif->tx_queue);
327
328 vif->pending_cons = 0;
329 vif->pending_prod = MAX_PENDING_REQS;
330 for (i = 0; i < MAX_PENDING_REQS; i++)
331 vif->pending_ring[i] = i;
332 for (i = 0; i < MAX_PENDING_REQS; i++)
333 vif->mmap_pages[i] = NULL;
334
310 /* 335 /*
311 * Initialise a dummy MAC address. We choose the numerically 336 * Initialise a dummy MAC address. We choose the numerically
312 * largest non-broadcast address to prevent the address getting 337 * largest non-broadcast address to prevent the address getting
@@ -316,6 +341,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
316 memset(dev->dev_addr, 0xFF, ETH_ALEN); 341 memset(dev->dev_addr, 0xFF, ETH_ALEN);
317 dev->dev_addr[0] &= ~0x01; 342 dev->dev_addr[0] &= ~0x01;
318 343
344 netif_napi_add(dev, &vif->napi, xenvif_poll, XENVIF_NAPI_WEIGHT);
345
319 netif_carrier_off(dev); 346 netif_carrier_off(dev);
320 347
321 err = register_netdev(dev); 348 err = register_netdev(dev);
@@ -377,7 +404,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
377 disable_irq(vif->rx_irq); 404 disable_irq(vif->rx_irq);
378 } 405 }
379 406
380 xenvif_get(vif); 407 init_waitqueue_head(&vif->wq);
408 vif->task = kthread_create(xen_netbk_kthread,
409 (void *)vif, vif->dev->name);
410 if (IS_ERR(vif->task)) {
411 pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
412 err = PTR_ERR(vif->task);
413 goto err_rx_unbind;
414 }
381 415
382 rtnl_lock(); 416 rtnl_lock();
383 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) 417 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
@@ -388,7 +422,13 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
388 xenvif_up(vif); 422 xenvif_up(vif);
389 rtnl_unlock(); 423 rtnl_unlock();
390 424
425 wake_up_process(vif->task);
426
391 return 0; 427 return 0;
428
429err_rx_unbind:
430 unbind_from_irqhandler(vif->rx_irq, vif);
431 vif->rx_irq = 0;
392err_tx_unbind: 432err_tx_unbind:
393 unbind_from_irqhandler(vif->tx_irq, vif); 433 unbind_from_irqhandler(vif->tx_irq, vif);
394 vif->tx_irq = 0; 434 vif->tx_irq = 0;
@@ -408,7 +448,6 @@ void xenvif_carrier_off(struct xenvif *vif)
408 if (netif_running(dev)) 448 if (netif_running(dev))
409 xenvif_down(vif); 449 xenvif_down(vif);
410 rtnl_unlock(); 450 rtnl_unlock();
411 xenvif_put(vif);
412} 451}
413 452
414void xenvif_disconnect(struct xenvif *vif) 453void xenvif_disconnect(struct xenvif *vif)
@@ -422,9 +461,6 @@ void xenvif_disconnect(struct xenvif *vif)
422 if (netif_carrier_ok(vif->dev)) 461 if (netif_carrier_ok(vif->dev))
423 xenvif_carrier_off(vif); 462 xenvif_carrier_off(vif);
424 463
425 atomic_dec(&vif->refcnt);
426 wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);
427
428 if (vif->tx_irq) { 464 if (vif->tx_irq) {
429 if (vif->tx_irq == vif->rx_irq) 465 if (vif->tx_irq == vif->rx_irq)
430 unbind_from_irqhandler(vif->tx_irq, vif); 466 unbind_from_irqhandler(vif->tx_irq, vif);
@@ -438,6 +474,11 @@ void xenvif_disconnect(struct xenvif *vif)
438 need_module_put = 1; 474 need_module_put = 1;
439 } 475 }
440 476
477 if (vif->task)
478 kthread_stop(vif->task);
479
480 netif_napi_del(&vif->napi);
481
441 unregister_netdev(vif->dev); 482 unregister_netdev(vif->dev);
442 483
443 xen_netbk_unmap_frontend_rings(vif); 484 xen_netbk_unmap_frontend_rings(vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 91f163d03a49..44ccc674c02f 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -70,116 +70,25 @@ module_param(fatal_skb_slots, uint, 0444);
70 */ 70 */
71#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN 71#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
72 72
73typedef unsigned int pending_ring_idx_t;
74#define INVALID_PENDING_RING_IDX (~0U)
75
76struct pending_tx_info {
77 struct xen_netif_tx_request req; /* coalesced tx request */
78 struct xenvif *vif;
79 pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
80 * if it is head of one or more tx
81 * reqs
82 */
83};
84
85struct netbk_rx_meta {
86 int id;
87 int size;
88 int gso_size;
89};
90
91#define MAX_PENDING_REQS 256
92
93/* Discriminate from any valid pending_idx value. */
94#define INVALID_PENDING_IDX 0xFFFF
95
96#define MAX_BUFFER_OFFSET PAGE_SIZE
97
98struct xen_netbk {
99 wait_queue_head_t wq;
100 struct task_struct *task;
101
102 struct sk_buff_head rx_queue;
103 struct sk_buff_head tx_queue;
104
105 struct timer_list net_timer;
106
107 struct page *mmap_pages[MAX_PENDING_REQS];
108
109 pending_ring_idx_t pending_prod;
110 pending_ring_idx_t pending_cons;
111 struct list_head net_schedule_list;
112
113 /* Protect the net_schedule_list in netif. */
114 spinlock_t net_schedule_list_lock;
115
116 atomic_t netfront_count;
117
118 struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
119 /* Coalescing tx requests before copying makes number of grant
120 * copy ops greater or equal to number of slots required. In
121 * worst case a tx request consumes 2 gnttab_copy.
122 */
123 struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
124
125 u16 pending_ring[MAX_PENDING_REQS];
126
127 /*
128 * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
129 * head/fragment page uses 2 copy operations because it
130 * straddles two buffers in the frontend.
131 */
132 struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
133 struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
134};
135
136static struct xen_netbk *xen_netbk;
137static int xen_netbk_group_nr;
138
139/* 73/*
140 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of 74 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
141 * one or more merged tx requests, otherwise it is the continuation of 75 * one or more merged tx requests, otherwise it is the continuation of
142 * previous tx request. 76 * previous tx request.
143 */ 77 */
144static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx) 78static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx)
145{
146 return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
147}
148
149void xen_netbk_add_xenvif(struct xenvif *vif)
150{
151 int i;
152 int min_netfront_count;
153 int min_group = 0;
154 struct xen_netbk *netbk;
155
156 min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
157 for (i = 0; i < xen_netbk_group_nr; i++) {
158 int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
159 if (netfront_count < min_netfront_count) {
160 min_group = i;
161 min_netfront_count = netfront_count;
162 }
163 }
164
165 netbk = &xen_netbk[min_group];
166
167 vif->netbk = netbk;
168 atomic_inc(&netbk->netfront_count);
169}
170
171void xen_netbk_remove_xenvif(struct xenvif *vif)
172{ 79{
173 struct xen_netbk *netbk = vif->netbk; 80 return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
174 vif->netbk = NULL;
175 atomic_dec(&netbk->netfront_count);
176} 81}
177 82
178static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, 83static void xen_netbk_idx_release(struct xenvif *vif, u16 pending_idx,
179 u8 status); 84 u8 status);
180static void make_tx_response(struct xenvif *vif, 85static void make_tx_response(struct xenvif *vif,
181 struct xen_netif_tx_request *txp, 86 struct xen_netif_tx_request *txp,
182 s8 st); 87 s8 st);
88
89static inline int tx_work_todo(struct xenvif *vif);
90static inline int rx_work_todo(struct xenvif *vif);
91
183static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 92static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
184 u16 id, 93 u16 id,
185 s8 st, 94 s8 st,
@@ -187,16 +96,16 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
187 u16 size, 96 u16 size,
188 u16 flags); 97 u16 flags);
189 98
190static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, 99static inline unsigned long idx_to_pfn(struct xenvif *vif,
191 u16 idx) 100 u16 idx)
192{ 101{
193 return page_to_pfn(netbk->mmap_pages[idx]); 102 return page_to_pfn(vif->mmap_pages[idx]);
194} 103}
195 104
196static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, 105static inline unsigned long idx_to_kaddr(struct xenvif *vif,
197 u16 idx) 106 u16 idx)
198{ 107{
199 return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx)); 108 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
200} 109}
201 110
202/* 111/*
@@ -224,15 +133,10 @@ static inline pending_ring_idx_t pending_index(unsigned i)
224 return i & (MAX_PENDING_REQS-1); 133 return i & (MAX_PENDING_REQS-1);
225} 134}
226 135
227static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) 136static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
228{ 137{
229 return MAX_PENDING_REQS - 138 return MAX_PENDING_REQS -
230 netbk->pending_prod + netbk->pending_cons; 139 vif->pending_prod + vif->pending_cons;
231}
232
233static void xen_netbk_kick_thread(struct xen_netbk *netbk)
234{
235 wake_up(&netbk->wq);
236} 140}
237 141
238static int max_required_rx_slots(struct xenvif *vif) 142static int max_required_rx_slots(struct xenvif *vif)
@@ -364,15 +268,15 @@ struct netrx_pending_operations {
364 unsigned copy_prod, copy_cons; 268 unsigned copy_prod, copy_cons;
365 unsigned meta_prod, meta_cons; 269 unsigned meta_prod, meta_cons;
366 struct gnttab_copy *copy; 270 struct gnttab_copy *copy;
367 struct netbk_rx_meta *meta; 271 struct xenvif_rx_meta *meta;
368 int copy_off; 272 int copy_off;
369 grant_ref_t copy_gref; 273 grant_ref_t copy_gref;
370}; 274};
371 275
372static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif, 276static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
373 struct netrx_pending_operations *npo) 277 struct netrx_pending_operations *npo)
374{ 278{
375 struct netbk_rx_meta *meta; 279 struct xenvif_rx_meta *meta;
376 struct xen_netif_rx_request *req; 280 struct xen_netif_rx_request *req;
377 281
378 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 282 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
@@ -398,7 +302,7 @@ static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
398 unsigned long offset, int *head) 302 unsigned long offset, int *head)
399{ 303{
400 struct gnttab_copy *copy_gop; 304 struct gnttab_copy *copy_gop;
401 struct netbk_rx_meta *meta; 305 struct xenvif_rx_meta *meta;
402 unsigned long bytes; 306 unsigned long bytes;
403 307
404 /* Data must not cross a page boundary. */ 308 /* Data must not cross a page boundary. */
@@ -434,15 +338,15 @@ static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
434 338
435 copy_gop = npo->copy + npo->copy_prod++; 339 copy_gop = npo->copy + npo->copy_prod++;
436 copy_gop->flags = GNTCOPY_dest_gref; 340 copy_gop->flags = GNTCOPY_dest_gref;
341 copy_gop->len = bytes;
342
437 copy_gop->source.domid = DOMID_SELF; 343 copy_gop->source.domid = DOMID_SELF;
438 copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); 344 copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
439
440 copy_gop->source.offset = offset; 345 copy_gop->source.offset = offset;
441 copy_gop->dest.domid = vif->domid;
442 346
347 copy_gop->dest.domid = vif->domid;
443 copy_gop->dest.offset = npo->copy_off; 348 copy_gop->dest.offset = npo->copy_off;
444 copy_gop->dest.u.ref = npo->copy_gref; 349 copy_gop->dest.u.ref = npo->copy_gref;
445 copy_gop->len = bytes;
446 350
447 npo->copy_off += bytes; 351 npo->copy_off += bytes;
448 meta->size += bytes; 352 meta->size += bytes;
@@ -485,7 +389,7 @@ static int netbk_gop_skb(struct sk_buff *skb,
485 int nr_frags = skb_shinfo(skb)->nr_frags; 389 int nr_frags = skb_shinfo(skb)->nr_frags;
486 int i; 390 int i;
487 struct xen_netif_rx_request *req; 391 struct xen_netif_rx_request *req;
488 struct netbk_rx_meta *meta; 392 struct xenvif_rx_meta *meta;
489 unsigned char *data; 393 unsigned char *data;
490 int head = 1; 394 int head = 1;
491 int old_meta_prod; 395 int old_meta_prod;
@@ -565,7 +469,7 @@ static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,
565} 469}
566 470
567static void netbk_add_frag_responses(struct xenvif *vif, int status, 471static void netbk_add_frag_responses(struct xenvif *vif, int status,
568 struct netbk_rx_meta *meta, 472 struct xenvif_rx_meta *meta,
569 int nr_meta_slots) 473 int nr_meta_slots)
570{ 474{
571 int i; 475 int i;
@@ -594,9 +498,13 @@ struct skb_cb_overlay {
594 int meta_slots_used; 498 int meta_slots_used;
595}; 499};
596 500
597static void xen_netbk_rx_action(struct xen_netbk *netbk) 501static void xen_netbk_kick_thread(struct xenvif *vif)
502{
503 wake_up(&vif->wq);
504}
505
506void xen_netbk_rx_action(struct xenvif *vif)
598{ 507{
599 struct xenvif *vif = NULL, *tmp;
600 s8 status; 508 s8 status;
601 u16 flags; 509 u16 flags;
602 struct xen_netif_rx_response *resp; 510 struct xen_netif_rx_response *resp;
@@ -608,17 +516,18 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
608 int count; 516 int count;
609 unsigned long offset; 517 unsigned long offset;
610 struct skb_cb_overlay *sco; 518 struct skb_cb_overlay *sco;
519 int need_to_notify = 0;
611 520
612 struct netrx_pending_operations npo = { 521 struct netrx_pending_operations npo = {
613 .copy = netbk->grant_copy_op, 522 .copy = vif->grant_copy_op,
614 .meta = netbk->meta, 523 .meta = vif->meta,
615 }; 524 };
616 525
617 skb_queue_head_init(&rxq); 526 skb_queue_head_init(&rxq);
618 527
619 count = 0; 528 count = 0;
620 529
621 while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) { 530 while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
622 vif = netdev_priv(skb->dev); 531 vif = netdev_priv(skb->dev);
623 nr_frags = skb_shinfo(skb)->nr_frags; 532 nr_frags = skb_shinfo(skb)->nr_frags;
624 533
@@ -635,27 +544,27 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
635 break; 544 break;
636 } 545 }
637 546
638 BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); 547 BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));
639 548
640 if (!npo.copy_prod) 549 if (!npo.copy_prod)
641 return; 550 return;
642 551
643 BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); 552 BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op));
644 gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod); 553 gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
645 554
646 while ((skb = __skb_dequeue(&rxq)) != NULL) { 555 while ((skb = __skb_dequeue(&rxq)) != NULL) {
647 sco = (struct skb_cb_overlay *)skb->cb; 556 sco = (struct skb_cb_overlay *)skb->cb;
648 557
649 vif = netdev_priv(skb->dev); 558 vif = netdev_priv(skb->dev);
650 559
651 if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) { 560 if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
652 resp = RING_GET_RESPONSE(&vif->rx, 561 resp = RING_GET_RESPONSE(&vif->rx,
653 vif->rx.rsp_prod_pvt++); 562 vif->rx.rsp_prod_pvt++);
654 563
655 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; 564 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
656 565
657 resp->offset = netbk->meta[npo.meta_cons].gso_size; 566 resp->offset = vif->meta[npo.meta_cons].gso_size;
658 resp->id = netbk->meta[npo.meta_cons].id; 567 resp->id = vif->meta[npo.meta_cons].id;
659 resp->status = sco->meta_slots_used; 568 resp->status = sco->meta_slots_used;
660 569
661 npo.meta_cons++; 570 npo.meta_cons++;
@@ -680,12 +589,12 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
680 flags |= XEN_NETRXF_data_validated; 589 flags |= XEN_NETRXF_data_validated;
681 590
682 offset = 0; 591 offset = 0;
683 resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id, 592 resp = make_rx_response(vif, vif->meta[npo.meta_cons].id,
684 status, offset, 593 status, offset,
685 netbk->meta[npo.meta_cons].size, 594 vif->meta[npo.meta_cons].size,
686 flags); 595 flags);
687 596
688 if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { 597 if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
689 struct xen_netif_extra_info *gso = 598 struct xen_netif_extra_info *gso =
690 (struct xen_netif_extra_info *) 599 (struct xen_netif_extra_info *)
691 RING_GET_RESPONSE(&vif->rx, 600 RING_GET_RESPONSE(&vif->rx,
@@ -693,7 +602,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
693 602
694 resp->flags |= XEN_NETRXF_extra_info; 603 resp->flags |= XEN_NETRXF_extra_info;
695 604
696 gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size; 605 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
697 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; 606 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
698 gso->u.gso.pad = 0; 607 gso->u.gso.pad = 0;
699 gso->u.gso.features = 0; 608 gso->u.gso.features = 0;
@@ -703,112 +612,33 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
703 } 612 }
704 613
705 netbk_add_frag_responses(vif, status, 614 netbk_add_frag_responses(vif, status,
706 netbk->meta + npo.meta_cons + 1, 615 vif->meta + npo.meta_cons + 1,
707 sco->meta_slots_used); 616 sco->meta_slots_used);
708 617
709 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); 618 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
710 619
620 if (ret)
621 need_to_notify = 1;
622
711 xenvif_notify_tx_completion(vif); 623 xenvif_notify_tx_completion(vif);
712 624
713 if (ret && list_empty(&vif->notify_list))
714 list_add_tail(&vif->notify_list, &notify);
715 else
716 xenvif_put(vif);
717 npo.meta_cons += sco->meta_slots_used; 625 npo.meta_cons += sco->meta_slots_used;
718 dev_kfree_skb(skb); 626 dev_kfree_skb(skb);
719 } 627 }
720 628
721 list_for_each_entry_safe(vif, tmp, &notify, notify_list) { 629 if (need_to_notify)
722 notify_remote_via_irq(vif->rx_irq); 630 notify_remote_via_irq(vif->rx_irq);
723 list_del_init(&vif->notify_list);
724 xenvif_put(vif);
725 }
726 631
727 /* More work to do? */ 632 /* More work to do? */
728 if (!skb_queue_empty(&netbk->rx_queue) && 633 if (!skb_queue_empty(&vif->rx_queue))
729 !timer_pending(&netbk->net_timer)) 634 xen_netbk_kick_thread(vif);
730 xen_netbk_kick_thread(netbk);
731} 635}
732 636
733void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) 637void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
734{ 638{
735 struct xen_netbk *netbk = vif->netbk; 639 skb_queue_tail(&vif->rx_queue, skb);
736 640
737 skb_queue_tail(&netbk->rx_queue, skb); 641 xen_netbk_kick_thread(vif);
738
739 xen_netbk_kick_thread(netbk);
740}
741
742static void xen_netbk_alarm(unsigned long data)
743{
744 struct xen_netbk *netbk = (struct xen_netbk *)data;
745 xen_netbk_kick_thread(netbk);
746}
747
748static int __on_net_schedule_list(struct xenvif *vif)
749{
750 return !list_empty(&vif->schedule_list);
751}
752
753/* Must be called with net_schedule_list_lock held */
754static void remove_from_net_schedule_list(struct xenvif *vif)
755{
756 if (likely(__on_net_schedule_list(vif))) {
757 list_del_init(&vif->schedule_list);
758 xenvif_put(vif);
759 }
760}
761
762static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk)
763{
764 struct xenvif *vif = NULL;
765
766 spin_lock_irq(&netbk->net_schedule_list_lock);
767 if (list_empty(&netbk->net_schedule_list))
768 goto out;
769
770 vif = list_first_entry(&netbk->net_schedule_list,
771 struct xenvif, schedule_list);
772 if (!vif)
773 goto out;
774
775 xenvif_get(vif);
776
777 remove_from_net_schedule_list(vif);
778out:
779 spin_unlock_irq(&netbk->net_schedule_list_lock);
780 return vif;
781}
782
783void xen_netbk_schedule_xenvif(struct xenvif *vif)
784{
785 unsigned long flags;
786 struct xen_netbk *netbk = vif->netbk;
787
788 if (__on_net_schedule_list(vif))
789 goto kick;
790
791 spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
792 if (!__on_net_schedule_list(vif) &&
793 likely(xenvif_schedulable(vif))) {
794 list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);
795 xenvif_get(vif);
796 }
797 spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
798
799kick:
800 smp_mb();
801 if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
802 !list_empty(&netbk->net_schedule_list))
803 xen_netbk_kick_thread(netbk);
804}
805
806void xen_netbk_deschedule_xenvif(struct xenvif *vif)
807{
808 struct xen_netbk *netbk = vif->netbk;
809 spin_lock_irq(&netbk->net_schedule_list_lock);
810 remove_from_net_schedule_list(vif);
811 spin_unlock_irq(&netbk->net_schedule_list_lock);
812} 642}
813 643
814void xen_netbk_check_rx_xenvif(struct xenvif *vif) 644void xen_netbk_check_rx_xenvif(struct xenvif *vif)
@@ -818,7 +648,7 @@ void xen_netbk_check_rx_xenvif(struct xenvif *vif)
818 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 648 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
819 649
820 if (more_to_do) 650 if (more_to_do)
821 xen_netbk_schedule_xenvif(vif); 651 napi_schedule(&vif->napi);
822} 652}
823 653
824static void tx_add_credit(struct xenvif *vif) 654static void tx_add_credit(struct xenvif *vif)
@@ -860,15 +690,12 @@ static void netbk_tx_err(struct xenvif *vif,
860 txp = RING_GET_REQUEST(&vif->tx, cons++); 690 txp = RING_GET_REQUEST(&vif->tx, cons++);
861 } while (1); 691 } while (1);
862 vif->tx.req_cons = cons; 692 vif->tx.req_cons = cons;
863 xen_netbk_check_rx_xenvif(vif);
864 xenvif_put(vif);
865} 693}
866 694
867static void netbk_fatal_tx_err(struct xenvif *vif) 695static void netbk_fatal_tx_err(struct xenvif *vif)
868{ 696{
869 netdev_err(vif->dev, "fatal error; disabling device\n"); 697 netdev_err(vif->dev, "fatal error; disabling device\n");
870 xenvif_carrier_off(vif); 698 xenvif_carrier_off(vif);
871 xenvif_put(vif);
872} 699}
873 700
874static int netbk_count_requests(struct xenvif *vif, 701static int netbk_count_requests(struct xenvif *vif,
@@ -969,19 +796,20 @@ static int netbk_count_requests(struct xenvif *vif,
969 return slots; 796 return slots;
970} 797}
971 798
972static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, 799static struct page *xen_netbk_alloc_page(struct xenvif *vif,
973 u16 pending_idx) 800 u16 pending_idx)
974{ 801{
975 struct page *page; 802 struct page *page;
976 page = alloc_page(GFP_KERNEL|__GFP_COLD); 803
804 page = alloc_page(GFP_ATOMIC|__GFP_COLD);
977 if (!page) 805 if (!page)
978 return NULL; 806 return NULL;
979 netbk->mmap_pages[pending_idx] = page; 807 vif->mmap_pages[pending_idx] = page;
808
980 return page; 809 return page;
981} 810}
982 811
983static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, 812static struct gnttab_copy *xen_netbk_get_requests(struct xenvif *vif,
984 struct xenvif *vif,
985 struct sk_buff *skb, 813 struct sk_buff *skb,
986 struct xen_netif_tx_request *txp, 814 struct xen_netif_tx_request *txp,
987 struct gnttab_copy *gop) 815 struct gnttab_copy *gop)
@@ -1012,9 +840,9 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
1012 for (shinfo->nr_frags = slot = start; slot < nr_slots; 840 for (shinfo->nr_frags = slot = start; slot < nr_slots;
1013 shinfo->nr_frags++) { 841 shinfo->nr_frags++) {
1014 struct pending_tx_info *pending_tx_info = 842 struct pending_tx_info *pending_tx_info =
1015 netbk->pending_tx_info; 843 vif->pending_tx_info;
1016 844
1017 page = alloc_page(GFP_KERNEL|__GFP_COLD); 845 page = alloc_page(GFP_ATOMIC|__GFP_COLD);
1018 if (!page) 846 if (!page)
1019 goto err; 847 goto err;
1020 848
@@ -1049,21 +877,18 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
1049 gop->len = txp->size; 877 gop->len = txp->size;
1050 dst_offset += gop->len; 878 dst_offset += gop->len;
1051 879
1052 index = pending_index(netbk->pending_cons++); 880 index = pending_index(vif->pending_cons++);
1053 881
1054 pending_idx = netbk->pending_ring[index]; 882 pending_idx = vif->pending_ring[index];
1055 883
1056 memcpy(&pending_tx_info[pending_idx].req, txp, 884 memcpy(&pending_tx_info[pending_idx].req, txp,
1057 sizeof(*txp)); 885 sizeof(*txp));
1058 xenvif_get(vif);
1059
1060 pending_tx_info[pending_idx].vif = vif;
1061 886
1062 /* Poison these fields, corresponding 887 /* Poison these fields, corresponding
1063 * fields for head tx req will be set 888 * fields for head tx req will be set
1064 * to correct values after the loop. 889 * to correct values after the loop.
1065 */ 890 */
1066 netbk->mmap_pages[pending_idx] = (void *)(~0UL); 891 vif->mmap_pages[pending_idx] = (void *)(~0UL);
1067 pending_tx_info[pending_idx].head = 892 pending_tx_info[pending_idx].head =
1068 INVALID_PENDING_RING_IDX; 893 INVALID_PENDING_RING_IDX;
1069 894
@@ -1083,7 +908,7 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
1083 first->req.offset = 0; 908 first->req.offset = 0;
1084 first->req.size = dst_offset; 909 first->req.size = dst_offset;
1085 first->head = start_idx; 910 first->head = start_idx;
1086 netbk->mmap_pages[head_idx] = page; 911 vif->mmap_pages[head_idx] = page;
1087 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); 912 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
1088 } 913 }
1089 914
@@ -1093,18 +918,18 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
1093err: 918err:
1094 /* Unwind, freeing all pages and sending error responses. */ 919 /* Unwind, freeing all pages and sending error responses. */
1095 while (shinfo->nr_frags-- > start) { 920 while (shinfo->nr_frags-- > start) {
1096 xen_netbk_idx_release(netbk, 921 xen_netbk_idx_release(vif,
1097 frag_get_pending_idx(&frags[shinfo->nr_frags]), 922 frag_get_pending_idx(&frags[shinfo->nr_frags]),
1098 XEN_NETIF_RSP_ERROR); 923 XEN_NETIF_RSP_ERROR);
1099 } 924 }
1100 /* The head too, if necessary. */ 925 /* The head too, if necessary. */
1101 if (start) 926 if (start)
1102 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); 927 xen_netbk_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
1103 928
1104 return NULL; 929 return NULL;
1105} 930}
1106 931
1107static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, 932static int xen_netbk_tx_check_gop(struct xenvif *vif,
1108 struct sk_buff *skb, 933 struct sk_buff *skb,
1109 struct gnttab_copy **gopp) 934 struct gnttab_copy **gopp)
1110{ 935{
@@ -1119,7 +944,7 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
1119 /* Check status of header. */ 944 /* Check status of header. */
1120 err = gop->status; 945 err = gop->status;
1121 if (unlikely(err)) 946 if (unlikely(err))
1122 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); 947 xen_netbk_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
1123 948
1124 /* Skip first skb fragment if it is on same page as header fragment. */ 949 /* Skip first skb fragment if it is on same page as header fragment. */
1125 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 950 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
@@ -1129,7 +954,7 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
1129 pending_ring_idx_t head; 954 pending_ring_idx_t head;
1130 955
1131 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 956 pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
1132 tx_info = &netbk->pending_tx_info[pending_idx]; 957 tx_info = &vif->pending_tx_info[pending_idx];
1133 head = tx_info->head; 958 head = tx_info->head;
1134 959
1135 /* Check error status: if okay then remember grant handle. */ 960 /* Check error status: if okay then remember grant handle. */
@@ -1137,18 +962,19 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
1137 newerr = (++gop)->status; 962 newerr = (++gop)->status;
1138 if (newerr) 963 if (newerr)
1139 break; 964 break;
1140 peek = netbk->pending_ring[pending_index(++head)]; 965 peek = vif->pending_ring[pending_index(++head)];
1141 } while (!pending_tx_is_head(netbk, peek)); 966 } while (!pending_tx_is_head(vif, peek));
1142 967
1143 if (likely(!newerr)) { 968 if (likely(!newerr)) {
1144 /* Had a previous error? Invalidate this fragment. */ 969 /* Had a previous error? Invalidate this fragment. */
1145 if (unlikely(err)) 970 if (unlikely(err))
1146 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 971 xen_netbk_idx_release(vif, pending_idx,
972 XEN_NETIF_RSP_OKAY);
1147 continue; 973 continue;
1148 } 974 }
1149 975
1150 /* Error on this fragment: respond to client with an error. */ 976 /* Error on this fragment: respond to client with an error. */
1151 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); 977 xen_netbk_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
1152 978
1153 /* Not the first error? Preceding frags already invalidated. */ 979 /* Not the first error? Preceding frags already invalidated. */
1154 if (err) 980 if (err)
@@ -1156,10 +982,11 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
1156 982
1157 /* First error: invalidate header and preceding fragments. */ 983 /* First error: invalidate header and preceding fragments. */
1158 pending_idx = *((u16 *)skb->data); 984 pending_idx = *((u16 *)skb->data);
1159 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 985 xen_netbk_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
1160 for (j = start; j < i; j++) { 986 for (j = start; j < i; j++) {
1161 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 987 pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1162 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 988 xen_netbk_idx_release(vif, pending_idx,
989 XEN_NETIF_RSP_OKAY);
1163 } 990 }
1164 991
1165 /* Remember the error: invalidate all subsequent fragments. */ 992 /* Remember the error: invalidate all subsequent fragments. */
@@ -1170,7 +997,7 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
1170 return err; 997 return err;
1171} 998}
1172 999
1173static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) 1000static void xen_netbk_fill_frags(struct xenvif *vif, struct sk_buff *skb)
1174{ 1001{
1175 struct skb_shared_info *shinfo = skb_shinfo(skb); 1002 struct skb_shared_info *shinfo = skb_shinfo(skb);
1176 int nr_frags = shinfo->nr_frags; 1003 int nr_frags = shinfo->nr_frags;
@@ -1184,16 +1011,16 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
1184 1011
1185 pending_idx = frag_get_pending_idx(frag); 1012 pending_idx = frag_get_pending_idx(frag);
1186 1013
1187 txp = &netbk->pending_tx_info[pending_idx].req; 1014 txp = &vif->pending_tx_info[pending_idx].req;
1188 page = virt_to_page(idx_to_kaddr(netbk, pending_idx)); 1015 page = virt_to_page(idx_to_kaddr(vif, pending_idx));
1189 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 1016 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
1190 skb->len += txp->size; 1017 skb->len += txp->size;
1191 skb->data_len += txp->size; 1018 skb->data_len += txp->size;
1192 skb->truesize += txp->size; 1019 skb->truesize += txp->size;
1193 1020
1194 /* Take an extra reference to offset xen_netbk_idx_release */ 1021 /* Take an extra reference to offset xen_netbk_idx_release */
1195 get_page(netbk->mmap_pages[pending_idx]); 1022 get_page(vif->mmap_pages[pending_idx]);
1196 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 1023 xen_netbk_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
1197 } 1024 }
1198} 1025}
1199 1026
@@ -1353,16 +1180,14 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1353 return false; 1180 return false;
1354} 1181}
1355 1182
1356static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) 1183static unsigned xen_netbk_tx_build_gops(struct xenvif *vif)
1357{ 1184{
1358 struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop; 1185 struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
1359 struct sk_buff *skb; 1186 struct sk_buff *skb;
1360 int ret; 1187 int ret;
1361 1188
1362 while ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX 1189 while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
1363 < MAX_PENDING_REQS) && 1190 < MAX_PENDING_REQS)) {
1364 !list_empty(&netbk->net_schedule_list)) {
1365 struct xenvif *vif;
1366 struct xen_netif_tx_request txreq; 1191 struct xen_netif_tx_request txreq;
1367 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 1192 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
1368 struct page *page; 1193 struct page *page;
@@ -1373,16 +1198,6 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1373 unsigned int data_len; 1198 unsigned int data_len;
1374 pending_ring_idx_t index; 1199 pending_ring_idx_t index;
1375 1200
1376 /* Get a netif from the list with work to do. */
1377 vif = poll_net_schedule_list(netbk);
1378 /* This can sometimes happen because the test of
1379 * list_empty(net_schedule_list) at the top of the
1380 * loop is unlocked. Just go back and have another
1381 * look.
1382 */
1383 if (!vif)
1384 continue;
1385
1386 if (vif->tx.sring->req_prod - vif->tx.req_cons > 1201 if (vif->tx.sring->req_prod - vif->tx.req_cons >
1387 XEN_NETIF_TX_RING_SIZE) { 1202 XEN_NETIF_TX_RING_SIZE) {
1388 netdev_err(vif->dev, 1203 netdev_err(vif->dev,
@@ -1395,10 +1210,8 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1395 } 1210 }
1396 1211
1397 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do); 1212 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
1398 if (!work_to_do) { 1213 if (!work_to_do)
1399 xenvif_put(vif); 1214 break;
1400 continue;
1401 }
1402 1215
1403 idx = vif->tx.req_cons; 1216 idx = vif->tx.req_cons;
1404 rmb(); /* Ensure that we see the request before we copy it. */ 1217 rmb(); /* Ensure that we see the request before we copy it. */
@@ -1406,10 +1219,8 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1406 1219
1407 /* Credit-based scheduling. */ 1220 /* Credit-based scheduling. */
1408 if (txreq.size > vif->remaining_credit && 1221 if (txreq.size > vif->remaining_credit &&
1409 tx_credit_exceeded(vif, txreq.size)) { 1222 tx_credit_exceeded(vif, txreq.size))
1410 xenvif_put(vif); 1223 break;
1411 continue;
1412 }
1413 1224
1414 vif->remaining_credit -= txreq.size; 1225 vif->remaining_credit -= txreq.size;
1415 1226
@@ -1422,12 +1233,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1422 work_to_do); 1233 work_to_do);
1423 idx = vif->tx.req_cons; 1234 idx = vif->tx.req_cons;
1424 if (unlikely(work_to_do < 0)) 1235 if (unlikely(work_to_do < 0))
1425 continue; 1236 break;
1426 } 1237 }
1427 1238
1428 ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); 1239 ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
1429 if (unlikely(ret < 0)) 1240 if (unlikely(ret < 0))
1430 continue; 1241 break;
1431 1242
1432 idx += ret; 1243 idx += ret;
1433 1244
@@ -1435,7 +1246,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1435 netdev_dbg(vif->dev, 1246 netdev_dbg(vif->dev,
1436 "Bad packet size: %d\n", txreq.size); 1247 "Bad packet size: %d\n", txreq.size);
1437 netbk_tx_err(vif, &txreq, idx); 1248 netbk_tx_err(vif, &txreq, idx);
1438 continue; 1249 break;
1439 } 1250 }
1440 1251
1441 /* No crossing a page as the payload mustn't fragment. */ 1252 /* No crossing a page as the payload mustn't fragment. */
@@ -1445,11 +1256,11 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1445 txreq.offset, txreq.size, 1256 txreq.offset, txreq.size,
1446 (txreq.offset&~PAGE_MASK) + txreq.size); 1257 (txreq.offset&~PAGE_MASK) + txreq.size);
1447 netbk_fatal_tx_err(vif); 1258 netbk_fatal_tx_err(vif);
1448 continue; 1259 break;
1449 } 1260 }
1450 1261
1451 index = pending_index(netbk->pending_cons); 1262 index = pending_index(vif->pending_cons);
1452 pending_idx = netbk->pending_ring[index]; 1263 pending_idx = vif->pending_ring[index];
1453 1264
1454 data_len = (txreq.size > PKT_PROT_LEN && 1265 data_len = (txreq.size > PKT_PROT_LEN &&
1455 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 1266 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
@@ -1474,16 +1285,16 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1474 if (netbk_set_skb_gso(vif, skb, gso)) { 1285 if (netbk_set_skb_gso(vif, skb, gso)) {
1475 /* Failure in netbk_set_skb_gso is fatal. */ 1286 /* Failure in netbk_set_skb_gso is fatal. */
1476 kfree_skb(skb); 1287 kfree_skb(skb);
1477 continue; 1288 break;
1478 } 1289 }
1479 } 1290 }
1480 1291
1481 /* XXX could copy straight to head */ 1292 /* XXX could copy straight to head */
1482 page = xen_netbk_alloc_page(netbk, pending_idx); 1293 page = xen_netbk_alloc_page(vif, pending_idx);
1483 if (!page) { 1294 if (!page) {
1484 kfree_skb(skb); 1295 kfree_skb(skb);
1485 netbk_tx_err(vif, &txreq, idx); 1296 netbk_tx_err(vif, &txreq, idx);
1486 continue; 1297 break;
1487 } 1298 }
1488 1299
1489 gop->source.u.ref = txreq.gref; 1300 gop->source.u.ref = txreq.gref;
@@ -1499,10 +1310,9 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1499 1310
1500 gop++; 1311 gop++;
1501 1312
1502 memcpy(&netbk->pending_tx_info[pending_idx].req, 1313 memcpy(&vif->pending_tx_info[pending_idx].req,
1503 &txreq, sizeof(txreq)); 1314 &txreq, sizeof(txreq));
1504 netbk->pending_tx_info[pending_idx].vif = vif; 1315 vif->pending_tx_info[pending_idx].head = index;
1505 netbk->pending_tx_info[pending_idx].head = index;
1506 *((u16 *)skb->data) = pending_idx; 1316 *((u16 *)skb->data) = pending_idx;
1507 1317
1508 __skb_put(skb, data_len); 1318 __skb_put(skb, data_len);
@@ -1517,46 +1327,45 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1517 INVALID_PENDING_IDX); 1327 INVALID_PENDING_IDX);
1518 } 1328 }
1519 1329
1520 netbk->pending_cons++; 1330 vif->pending_cons++;
1521 1331
1522 request_gop = xen_netbk_get_requests(netbk, vif, 1332 request_gop = xen_netbk_get_requests(vif, skb, txfrags, gop);
1523 skb, txfrags, gop);
1524 if (request_gop == NULL) { 1333 if (request_gop == NULL) {
1525 kfree_skb(skb); 1334 kfree_skb(skb);
1526 netbk_tx_err(vif, &txreq, idx); 1335 netbk_tx_err(vif, &txreq, idx);
1527 continue; 1336 break;
1528 } 1337 }
1529 gop = request_gop; 1338 gop = request_gop;
1530 1339
1531 __skb_queue_tail(&netbk->tx_queue, skb); 1340 __skb_queue_tail(&vif->tx_queue, skb);
1532 1341
1533 vif->tx.req_cons = idx; 1342 vif->tx.req_cons = idx;
1534 xen_netbk_check_rx_xenvif(vif);
1535 1343
1536 if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops)) 1344 if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops))
1537 break; 1345 break;
1538 } 1346 }
1539 1347
1540 return gop - netbk->tx_copy_ops; 1348 return gop - vif->tx_copy_ops;
1541} 1349}
1542 1350
1543static void xen_netbk_tx_submit(struct xen_netbk *netbk) 1351
1352static int xen_netbk_tx_submit(struct xenvif *vif, int budget)
1544{ 1353{
1545 struct gnttab_copy *gop = netbk->tx_copy_ops; 1354 struct gnttab_copy *gop = vif->tx_copy_ops;
1546 struct sk_buff *skb; 1355 struct sk_buff *skb;
1356 int work_done = 0;
1547 1357
1548 while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { 1358 while (work_done < budget &&
1359 (skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
1549 struct xen_netif_tx_request *txp; 1360 struct xen_netif_tx_request *txp;
1550 struct xenvif *vif;
1551 u16 pending_idx; 1361 u16 pending_idx;
1552 unsigned data_len; 1362 unsigned data_len;
1553 1363
1554 pending_idx = *((u16 *)skb->data); 1364 pending_idx = *((u16 *)skb->data);
1555 vif = netbk->pending_tx_info[pending_idx].vif; 1365 txp = &vif->pending_tx_info[pending_idx].req;
1556 txp = &netbk->pending_tx_info[pending_idx].req;
1557 1366
1558 /* Check the remap error code. */ 1367 /* Check the remap error code. */
1559 if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) { 1368 if (unlikely(xen_netbk_tx_check_gop(vif, skb, &gop))) {
1560 netdev_dbg(vif->dev, "netback grant failed.\n"); 1369 netdev_dbg(vif->dev, "netback grant failed.\n");
1561 skb_shinfo(skb)->nr_frags = 0; 1370 skb_shinfo(skb)->nr_frags = 0;
1562 kfree_skb(skb); 1371 kfree_skb(skb);
@@ -1565,7 +1374,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
1565 1374
1566 data_len = skb->len; 1375 data_len = skb->len;
1567 memcpy(skb->data, 1376 memcpy(skb->data,
1568 (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset), 1377 (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
1569 data_len); 1378 data_len);
1570 if (data_len < txp->size) { 1379 if (data_len < txp->size) {
1571 /* Append the packet payload as a fragment. */ 1380 /* Append the packet payload as a fragment. */
@@ -1573,7 +1382,8 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
1573 txp->size -= data_len; 1382 txp->size -= data_len;
1574 } else { 1383 } else {
1575 /* Schedule a response immediately. */ 1384 /* Schedule a response immediately. */
1576 xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); 1385 xen_netbk_idx_release(vif, pending_idx,
1386 XEN_NETIF_RSP_OKAY);
1577 } 1387 }
1578 1388
1579 if (txp->flags & XEN_NETTXF_csum_blank) 1389 if (txp->flags & XEN_NETTXF_csum_blank)
@@ -1581,7 +1391,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
1581 else if (txp->flags & XEN_NETTXF_data_validated) 1391 else if (txp->flags & XEN_NETTXF_data_validated)
1582 skb->ip_summed = CHECKSUM_UNNECESSARY; 1392 skb->ip_summed = CHECKSUM_UNNECESSARY;
1583 1393
1584 xen_netbk_fill_frags(netbk, skb); 1394 xen_netbk_fill_frags(vif, skb);
1585 1395
1586 /* 1396 /*
1587 * If the initial fragment was < PKT_PROT_LEN then 1397 * If the initial fragment was < PKT_PROT_LEN then
@@ -1609,53 +1419,61 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
1609 vif->dev->stats.rx_bytes += skb->len; 1419 vif->dev->stats.rx_bytes += skb->len;
1610 vif->dev->stats.rx_packets++; 1420 vif->dev->stats.rx_packets++;
1611 1421
1612 xenvif_receive_skb(vif, skb); 1422 work_done++;
1423
1424 netif_receive_skb(skb);
1613 } 1425 }
1426
1427 return work_done;
1614} 1428}
1615 1429
1616/* Called after netfront has transmitted */ 1430/* Called after netfront has transmitted */
1617static void xen_netbk_tx_action(struct xen_netbk *netbk) 1431int xen_netbk_tx_action(struct xenvif *vif, int budget)
1618{ 1432{
1619 unsigned nr_gops; 1433 unsigned nr_gops;
1434 int work_done;
1620 1435
1621 nr_gops = xen_netbk_tx_build_gops(netbk); 1436 if (unlikely(!tx_work_todo(vif)))
1437 return 0;
1438
1439 nr_gops = xen_netbk_tx_build_gops(vif);
1622 1440
1623 if (nr_gops == 0) 1441 if (nr_gops == 0)
1624 return; 1442 return 0;
1443
1444 gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
1625 1445
1626 gnttab_batch_copy(netbk->tx_copy_ops, nr_gops); 1446 work_done = xen_netbk_tx_submit(vif, nr_gops);
1627 1447
1628 xen_netbk_tx_submit(netbk); 1448 return work_done;
1629} 1449}
1630 1450
1631static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, 1451static void xen_netbk_idx_release(struct xenvif *vif, u16 pending_idx,
1632 u8 status) 1452 u8 status)
1633{ 1453{
1634 struct xenvif *vif;
1635 struct pending_tx_info *pending_tx_info; 1454 struct pending_tx_info *pending_tx_info;
1636 pending_ring_idx_t head; 1455 pending_ring_idx_t head;
1637 u16 peek; /* peek into next tx request */ 1456 u16 peek; /* peek into next tx request */
1638 1457
1639 BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL)); 1458 BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL));
1640 1459
1641 /* Already complete? */ 1460 /* Already complete? */
1642 if (netbk->mmap_pages[pending_idx] == NULL) 1461 if (vif->mmap_pages[pending_idx] == NULL)
1643 return; 1462 return;
1644 1463
1645 pending_tx_info = &netbk->pending_tx_info[pending_idx]; 1464 pending_tx_info = &vif->pending_tx_info[pending_idx];
1646 1465
1647 vif = pending_tx_info->vif;
1648 head = pending_tx_info->head; 1466 head = pending_tx_info->head;
1649 1467
1650 BUG_ON(!pending_tx_is_head(netbk, head)); 1468 BUG_ON(!pending_tx_is_head(vif, head));
1651 BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx); 1469 BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
1652 1470
1653 do { 1471 do {
1654 pending_ring_idx_t index; 1472 pending_ring_idx_t index;
1655 pending_ring_idx_t idx = pending_index(head); 1473 pending_ring_idx_t idx = pending_index(head);
1656 u16 info_idx = netbk->pending_ring[idx]; 1474 u16 info_idx = vif->pending_ring[idx];
1657 1475
1658 pending_tx_info = &netbk->pending_tx_info[info_idx]; 1476 pending_tx_info = &vif->pending_tx_info[info_idx];
1659 make_tx_response(vif, &pending_tx_info->req, status); 1477 make_tx_response(vif, &pending_tx_info->req, status);
1660 1478
1661 /* Setting any number other than 1479 /* Setting any number other than
@@ -1664,18 +1482,15 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
1664 */ 1482 */
1665 pending_tx_info->head = 0; 1483 pending_tx_info->head = 0;
1666 1484
1667 index = pending_index(netbk->pending_prod++); 1485 index = pending_index(vif->pending_prod++);
1668 netbk->pending_ring[index] = netbk->pending_ring[info_idx]; 1486 vif->pending_ring[index] = vif->pending_ring[info_idx];
1669 1487
1670 xenvif_put(vif); 1488 peek = vif->pending_ring[pending_index(++head)];
1671 1489
1672 peek = netbk->pending_ring[pending_index(++head)]; 1490 } while (!pending_tx_is_head(vif, peek));
1673 1491
1674 } while (!pending_tx_is_head(netbk, peek)); 1492 put_page(vif->mmap_pages[pending_idx]);
1675 1493 vif->mmap_pages[pending_idx] = NULL;
1676 netbk->mmap_pages[pending_idx]->mapping = 0;
1677 put_page(netbk->mmap_pages[pending_idx]);
1678 netbk->mmap_pages[pending_idx] = NULL;
1679} 1494}
1680 1495
1681 1496
@@ -1723,45 +1538,22 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
1723 return resp; 1538 return resp;
1724} 1539}
1725 1540
1726static inline int rx_work_todo(struct xen_netbk *netbk) 1541static inline int rx_work_todo(struct xenvif *vif)
1727{ 1542{
1728 return !skb_queue_empty(&netbk->rx_queue); 1543 return !skb_queue_empty(&vif->rx_queue);
1729} 1544}
1730 1545
1731static inline int tx_work_todo(struct xen_netbk *netbk) 1546static inline int tx_work_todo(struct xenvif *vif)
1732{ 1547{
1733 1548
1734 if ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX 1549 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) &&
1735 < MAX_PENDING_REQS) && 1550 (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
1736 !list_empty(&netbk->net_schedule_list)) 1551 < MAX_PENDING_REQS))
1737 return 1; 1552 return 1;
1738 1553
1739 return 0; 1554 return 0;
1740} 1555}
1741 1556
1742static int xen_netbk_kthread(void *data)
1743{
1744 struct xen_netbk *netbk = data;
1745 while (!kthread_should_stop()) {
1746 wait_event_interruptible(netbk->wq,
1747 rx_work_todo(netbk) ||
1748 tx_work_todo(netbk) ||
1749 kthread_should_stop());
1750 cond_resched();
1751
1752 if (kthread_should_stop())
1753 break;
1754
1755 if (rx_work_todo(netbk))
1756 xen_netbk_rx_action(netbk);
1757
1758 if (tx_work_todo(netbk))
1759 xen_netbk_tx_action(netbk);
1760 }
1761
1762 return 0;
1763}
1764
1765void xen_netbk_unmap_frontend_rings(struct xenvif *vif) 1557void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
1766{ 1558{
1767 if (vif->tx.sring) 1559 if (vif->tx.sring)
@@ -1807,11 +1599,29 @@ err:
1807 return err; 1599 return err;
1808} 1600}
1809 1601
1602int xen_netbk_kthread(void *data)
1603{
1604 struct xenvif *vif = data;
1605
1606 while (!kthread_should_stop()) {
1607 wait_event_interruptible(vif->wq,
1608 rx_work_todo(vif) ||
1609 kthread_should_stop());
1610 if (kthread_should_stop())
1611 break;
1612
1613 if (rx_work_todo(vif))
1614 xen_netbk_rx_action(vif);
1615
1616 cond_resched();
1617 }
1618
1619 return 0;
1620}
1621
1810static int __init netback_init(void) 1622static int __init netback_init(void)
1811{ 1623{
1812 int i;
1813 int rc = 0; 1624 int rc = 0;
1814 int group;
1815 1625
1816 if (!xen_domain()) 1626 if (!xen_domain())
1817 return -ENODEV; 1627 return -ENODEV;
@@ -1822,48 +1632,6 @@ static int __init netback_init(void)
1822 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; 1632 fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
1823 } 1633 }
1824 1634
1825 xen_netbk_group_nr = num_online_cpus();
1826 xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
1827 if (!xen_netbk)
1828 return -ENOMEM;
1829
1830 for (group = 0; group < xen_netbk_group_nr; group++) {
1831 struct xen_netbk *netbk = &xen_netbk[group];
1832 skb_queue_head_init(&netbk->rx_queue);
1833 skb_queue_head_init(&netbk->tx_queue);
1834
1835 init_timer(&netbk->net_timer);
1836 netbk->net_timer.data = (unsigned long)netbk;
1837 netbk->net_timer.function = xen_netbk_alarm;
1838
1839 netbk->pending_cons = 0;
1840 netbk->pending_prod = MAX_PENDING_REQS;
1841 for (i = 0; i < MAX_PENDING_REQS; i++)
1842 netbk->pending_ring[i] = i;
1843
1844 init_waitqueue_head(&netbk->wq);
1845 netbk->task = kthread_create(xen_netbk_kthread,
1846 (void *)netbk,
1847 "netback/%u", group);
1848
1849 if (IS_ERR(netbk->task)) {
1850 pr_alert("kthread_create() fails at netback\n");
1851 del_timer(&netbk->net_timer);
1852 rc = PTR_ERR(netbk->task);
1853 goto failed_init;
1854 }
1855
1856 kthread_bind(netbk->task, group);
1857
1858 INIT_LIST_HEAD(&netbk->net_schedule_list);
1859
1860 spin_lock_init(&netbk->net_schedule_list_lock);
1861
1862 atomic_set(&netbk->netfront_count, 0);
1863
1864 wake_up_process(netbk->task);
1865 }
1866
1867 rc = xenvif_xenbus_init(); 1635 rc = xenvif_xenbus_init();
1868 if (rc) 1636 if (rc)
1869 goto failed_init; 1637 goto failed_init;
@@ -1871,35 +1639,14 @@ static int __init netback_init(void)
1871 return 0; 1639 return 0;
1872 1640
1873failed_init: 1641failed_init:
1874 while (--group >= 0) {
1875 struct xen_netbk *netbk = &xen_netbk[group];
1876 del_timer(&netbk->net_timer);
1877 kthread_stop(netbk->task);
1878 }
1879 vfree(xen_netbk);
1880 return rc; 1642 return rc;
1881
1882} 1643}
1883 1644
1884module_init(netback_init); 1645module_init(netback_init);
1885 1646
1886static void __exit netback_fini(void) 1647static void __exit netback_fini(void)
1887{ 1648{
1888 int i, j;
1889
1890 xenvif_xenbus_fini(); 1649 xenvif_xenbus_fini();
1891
1892 for (i = 0; i < xen_netbk_group_nr; i++) {
1893 struct xen_netbk *netbk = &xen_netbk[i];
1894 del_timer_sync(&netbk->net_timer);
1895 kthread_stop(netbk->task);
1896 for (j = 0; j < MAX_PENDING_REQS; j++) {
1897 if (netbk->mmap_pages[j])
1898 __free_page(netbk->mmap_pages[j]);
1899 }
1900 }
1901
1902 vfree(xen_netbk);
1903} 1650}
1904module_exit(netback_fini); 1651module_exit(netback_fini);
1905 1652