aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/xen-netback')
-rw-r--r--drivers/net/xen-netback/common.h138
-rw-r--r--drivers/net/xen-netback/interface.c544
-rw-r--r--drivers/net/xen-netback/netback.c920
-rw-r--r--drivers/net/xen-netback/xenbus.c360
4 files changed, 1300 insertions, 662 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 0d4a285cbd7e..ef3026f46a37 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -44,6 +44,7 @@
44#include <xen/interface/grant_table.h> 44#include <xen/interface/grant_table.h>
45#include <xen/grant_table.h> 45#include <xen/grant_table.h>
46#include <xen/xenbus.h> 46#include <xen/xenbus.h>
47#include <linux/debugfs.h>
47 48
48typedef unsigned int pending_ring_idx_t; 49typedef unsigned int pending_ring_idx_t;
49#define INVALID_PENDING_RING_IDX (~0U) 50#define INVALID_PENDING_RING_IDX (~0U)
@@ -99,22 +100,43 @@ struct xenvif_rx_meta {
99 */ 100 */
100#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN 101#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
101 102
102struct xenvif { 103/* Queue name is interface name with "-qNNN" appended */
103 /* Unique identifier for this interface. */ 104#define QUEUE_NAME_SIZE (IFNAMSIZ + 5)
104 domid_t domid;
105 unsigned int handle;
106 105
107 /* Is this interface disabled? True when backend discovers 106/* IRQ name is queue name with "-tx" or "-rx" appended */
108 * frontend is rogue. 107#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
108
109struct xenvif;
110
111struct xenvif_stats {
112 /* Stats fields to be updated per-queue.
113 * A subset of struct net_device_stats that contains only the
114 * fields that are updated in netback.c for each queue.
109 */ 115 */
110 bool disabled; 116 unsigned int rx_bytes;
117 unsigned int rx_packets;
118 unsigned int tx_bytes;
119 unsigned int tx_packets;
120
121 /* Additional stats used by xenvif */
122 unsigned long rx_gso_checksum_fixup;
123 unsigned long tx_zerocopy_sent;
124 unsigned long tx_zerocopy_success;
125 unsigned long tx_zerocopy_fail;
126 unsigned long tx_frag_overflow;
127};
128
129struct xenvif_queue { /* Per-queue data for xenvif */
130 unsigned int id; /* Queue ID, 0-based */
131 char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
132 struct xenvif *vif; /* Parent VIF */
111 133
112 /* Use NAPI for guest TX */ 134 /* Use NAPI for guest TX */
113 struct napi_struct napi; 135 struct napi_struct napi;
114 /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ 136 /* When feature-split-event-channels = 0, tx_irq = rx_irq. */
115 unsigned int tx_irq; 137 unsigned int tx_irq;
116 /* Only used when feature-split-event-channels = 1 */ 138 /* Only used when feature-split-event-channels = 1 */
117 char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */ 139 char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
118 struct xen_netif_tx_back_ring tx; 140 struct xen_netif_tx_back_ring tx;
119 struct sk_buff_head tx_queue; 141 struct sk_buff_head tx_queue;
120 struct page *mmap_pages[MAX_PENDING_REQS]; 142 struct page *mmap_pages[MAX_PENDING_REQS];
@@ -150,22 +172,51 @@ struct xenvif {
150 /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ 172 /* When feature-split-event-channels = 0, tx_irq = rx_irq. */
151 unsigned int rx_irq; 173 unsigned int rx_irq;
152 /* Only used when feature-split-event-channels = 1 */ 174 /* Only used when feature-split-event-channels = 1 */
153 char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */ 175 char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
154 struct xen_netif_rx_back_ring rx; 176 struct xen_netif_rx_back_ring rx;
155 struct sk_buff_head rx_queue; 177 struct sk_buff_head rx_queue;
156 RING_IDX rx_last_skb_slots; 178 RING_IDX rx_last_skb_slots;
157 bool rx_queue_purge; 179 unsigned long status;
158 180
159 struct timer_list wake_queue; 181 struct timer_list rx_stalled;
160 182
161 /* This array is allocated seperately as it is large */ 183 struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
162 struct gnttab_copy *grant_copy_op;
163 184
164 /* We create one meta structure per ring request we consume, so 185 /* We create one meta structure per ring request we consume, so
165 * the maximum number is the same as the ring size. 186 * the maximum number is the same as the ring size.
166 */ 187 */
167 struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE]; 188 struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE];
168 189
190 /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
191 unsigned long credit_bytes;
192 unsigned long credit_usec;
193 unsigned long remaining_credit;
194 struct timer_list credit_timeout;
195 u64 credit_window_start;
196
197 /* Statistics */
198 struct xenvif_stats stats;
199};
200
201enum state_bit_shift {
202 /* This bit marks that the vif is connected */
203 VIF_STATUS_CONNECTED,
204 /* This bit signals the RX thread that queuing was stopped (in
205 * start_xmit), and either the timer fired or an RX interrupt came
206 */
207 QUEUE_STATUS_RX_PURGE_EVENT,
208 /* This bit tells the interrupt handler that this queue was the reason
209 * for the carrier off, so it should kick the thread. Only queues which
210 * brought it down can turn on the carrier.
211 */
212 QUEUE_STATUS_RX_STALLED
213};
214
215struct xenvif {
216 /* Unique identifier for this interface. */
217 domid_t domid;
218 unsigned int handle;
219
169 u8 fe_dev_addr[6]; 220 u8 fe_dev_addr[6];
170 221
171 /* Frontend feature information. */ 222 /* Frontend feature information. */
@@ -179,19 +230,19 @@ struct xenvif {
179 /* Internal feature information. */ 230 /* Internal feature information. */
180 u8 can_queue:1; /* can queue packets for receiver? */ 231 u8 can_queue:1; /* can queue packets for receiver? */
181 232
182 /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ 233 /* Is this interface disabled? True when backend discovers
183 unsigned long credit_bytes; 234 * frontend is rogue.
184 unsigned long credit_usec; 235 */
185 unsigned long remaining_credit; 236 bool disabled;
186 struct timer_list credit_timeout; 237 unsigned long status;
187 u64 credit_window_start;
188 238
189 /* Statistics */ 239 /* Queues */
190 unsigned long rx_gso_checksum_fixup; 240 struct xenvif_queue *queues;
191 unsigned long tx_zerocopy_sent; 241 unsigned int num_queues; /* active queues, resource allocated */
192 unsigned long tx_zerocopy_success; 242
193 unsigned long tx_zerocopy_fail; 243#ifdef CONFIG_DEBUG_FS
194 unsigned long tx_frag_overflow; 244 struct dentry *xenvif_dbg_root;
245#endif
195 246
196 /* Miscellaneous private stuff. */ 247 /* Miscellaneous private stuff. */
197 struct net_device *dev; 248 struct net_device *dev;
@@ -206,7 +257,10 @@ struct xenvif *xenvif_alloc(struct device *parent,
206 domid_t domid, 257 domid_t domid,
207 unsigned int handle); 258 unsigned int handle);
208 259
209int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, 260int xenvif_init_queue(struct xenvif_queue *queue);
261void xenvif_deinit_queue(struct xenvif_queue *queue);
262
263int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
210 unsigned long rx_ring_ref, unsigned int tx_evtchn, 264 unsigned long rx_ring_ref, unsigned int tx_evtchn,
211 unsigned int rx_evtchn); 265 unsigned int rx_evtchn);
212void xenvif_disconnect(struct xenvif *vif); 266void xenvif_disconnect(struct xenvif *vif);
@@ -217,52 +271,62 @@ void xenvif_xenbus_fini(void);
217 271
218int xenvif_schedulable(struct xenvif *vif); 272int xenvif_schedulable(struct xenvif *vif);
219 273
220int xenvif_must_stop_queue(struct xenvif *vif); 274int xenvif_must_stop_queue(struct xenvif_queue *queue);
275
276int xenvif_queue_stopped(struct xenvif_queue *queue);
277void xenvif_wake_queue(struct xenvif_queue *queue);
221 278
222/* (Un)Map communication rings. */ 279/* (Un)Map communication rings. */
223void xenvif_unmap_frontend_rings(struct xenvif *vif); 280void xenvif_unmap_frontend_rings(struct xenvif_queue *queue);
224int xenvif_map_frontend_rings(struct xenvif *vif, 281int xenvif_map_frontend_rings(struct xenvif_queue *queue,
225 grant_ref_t tx_ring_ref, 282 grant_ref_t tx_ring_ref,
226 grant_ref_t rx_ring_ref); 283 grant_ref_t rx_ring_ref);
227 284
228/* Check for SKBs from frontend and schedule backend processing */ 285/* Check for SKBs from frontend and schedule backend processing */
229void xenvif_napi_schedule_or_enable_events(struct xenvif *vif); 286void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue);
230 287
231/* Prevent the device from generating any further traffic. */ 288/* Prevent the device from generating any further traffic. */
232void xenvif_carrier_off(struct xenvif *vif); 289void xenvif_carrier_off(struct xenvif *vif);
233 290
234int xenvif_tx_action(struct xenvif *vif, int budget); 291int xenvif_tx_action(struct xenvif_queue *queue, int budget);
235 292
236int xenvif_kthread_guest_rx(void *data); 293int xenvif_kthread_guest_rx(void *data);
237void xenvif_kick_thread(struct xenvif *vif); 294void xenvif_kick_thread(struct xenvif_queue *queue);
238 295
239int xenvif_dealloc_kthread(void *data); 296int xenvif_dealloc_kthread(void *data);
240 297
241/* Determine whether the needed number of slots (req) are available, 298/* Determine whether the needed number of slots (req) are available,
242 * and set req_event if not. 299 * and set req_event if not.
243 */ 300 */
244bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed); 301bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed);
245 302
246void xenvif_stop_queue(struct xenvif *vif); 303void xenvif_carrier_on(struct xenvif *vif);
247 304
248/* Callback from stack when TX packet can be released */ 305/* Callback from stack when TX packet can be released */
249void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); 306void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
250 307
251/* Unmap a pending page and release it back to the guest */ 308/* Unmap a pending page and release it back to the guest */
252void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx); 309void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
253 310
254static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) 311static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
255{ 312{
256 return MAX_PENDING_REQS - 313 return MAX_PENDING_REQS -
257 vif->pending_prod + vif->pending_cons; 314 queue->pending_prod + queue->pending_cons;
258} 315}
259 316
260/* Callback from stack when TX packet can be released */ 317/* Callback from stack when TX packet can be released */
261void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); 318void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
262 319
320irqreturn_t xenvif_interrupt(int irq, void *dev_id);
321
263extern bool separate_tx_rx_irq; 322extern bool separate_tx_rx_irq;
264 323
265extern unsigned int rx_drain_timeout_msecs; 324extern unsigned int rx_drain_timeout_msecs;
266extern unsigned int rx_drain_timeout_jiffies; 325extern unsigned int rx_drain_timeout_jiffies;
326extern unsigned int xenvif_max_queues;
327
328#ifdef CONFIG_DEBUG_FS
329extern struct dentry *xen_netback_dbg_root;
330#endif
267 331
268#endif /* __XEN_NETBACK__COMMON_H__ */ 332#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 20e9defa1060..48a55cda979b 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,40 +43,56 @@
43#define XENVIF_QUEUE_LENGTH 32 43#define XENVIF_QUEUE_LENGTH 32
44#define XENVIF_NAPI_WEIGHT 64 44#define XENVIF_NAPI_WEIGHT 64
45 45
46static inline void xenvif_stop_queue(struct xenvif_queue *queue)
47{
48 struct net_device *dev = queue->vif->dev;
49
50 if (!queue->vif->can_queue)
51 return;
52
53 netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
54}
55
46int xenvif_schedulable(struct xenvif *vif) 56int xenvif_schedulable(struct xenvif *vif)
47{ 57{
48 return netif_running(vif->dev) && netif_carrier_ok(vif->dev); 58 return netif_running(vif->dev) &&
59 test_bit(VIF_STATUS_CONNECTED, &vif->status);
49} 60}
50 61
51static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) 62static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
52{ 63{
53 struct xenvif *vif = dev_id; 64 struct xenvif_queue *queue = dev_id;
54 65
55 if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) 66 if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))
56 napi_schedule(&vif->napi); 67 napi_schedule(&queue->napi);
57 68
58 return IRQ_HANDLED; 69 return IRQ_HANDLED;
59} 70}
60 71
61static int xenvif_poll(struct napi_struct *napi, int budget) 72int xenvif_poll(struct napi_struct *napi, int budget)
62{ 73{
63 struct xenvif *vif = container_of(napi, struct xenvif, napi); 74 struct xenvif_queue *queue =
75 container_of(napi, struct xenvif_queue, napi);
64 int work_done; 76 int work_done;
65 77
66 /* This vif is rogue, we pretend we've there is nothing to do 78 /* This vif is rogue, we pretend we've there is nothing to do
67 * for this vif to deschedule it from NAPI. But this interface 79 * for this vif to deschedule it from NAPI. But this interface
68 * will be turned off in thread context later. 80 * will be turned off in thread context later.
81 * Also, if a guest doesn't post enough slots to receive data on one of
82 * its queues, the carrier goes down and NAPI is descheduled here so
83 * the guest can't send more packets until it's ready to receive.
69 */ 84 */
70 if (unlikely(vif->disabled)) { 85 if (unlikely(queue->vif->disabled ||
86 !netif_carrier_ok(queue->vif->dev))) {
71 napi_complete(napi); 87 napi_complete(napi);
72 return 0; 88 return 0;
73 } 89 }
74 90
75 work_done = xenvif_tx_action(vif, budget); 91 work_done = xenvif_tx_action(queue, budget);
76 92
77 if (work_done < budget) { 93 if (work_done < budget) {
78 napi_complete(napi); 94 napi_complete(napi);
79 xenvif_napi_schedule_or_enable_events(vif); 95 xenvif_napi_schedule_or_enable_events(queue);
80 } 96 }
81 97
82 return work_done; 98 return work_done;
@@ -84,14 +100,23 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
84 100
85static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) 101static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
86{ 102{
87 struct xenvif *vif = dev_id; 103 struct xenvif_queue *queue = dev_id;
104 struct netdev_queue *net_queue =
105 netdev_get_tx_queue(queue->vif->dev, queue->id);
88 106
89 xenvif_kick_thread(vif); 107 /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
108 * the carrier went down and this queue was previously blocked
109 */
110 if (unlikely(netif_tx_queue_stopped(net_queue) ||
111 (!netif_carrier_ok(queue->vif->dev) &&
112 test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
113 set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
114 xenvif_kick_thread(queue);
90 115
91 return IRQ_HANDLED; 116 return IRQ_HANDLED;
92} 117}
93 118
94static irqreturn_t xenvif_interrupt(int irq, void *dev_id) 119irqreturn_t xenvif_interrupt(int irq, void *dev_id)
95{ 120{
96 xenvif_tx_interrupt(irq, dev_id); 121 xenvif_tx_interrupt(irq, dev_id);
97 xenvif_rx_interrupt(irq, dev_id); 122 xenvif_rx_interrupt(irq, dev_id);
@@ -99,28 +124,57 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
99 return IRQ_HANDLED; 124 return IRQ_HANDLED;
100} 125}
101 126
102static void xenvif_wake_queue(unsigned long data) 127int xenvif_queue_stopped(struct xenvif_queue *queue)
128{
129 struct net_device *dev = queue->vif->dev;
130 unsigned int id = queue->id;
131 return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id));
132}
133
134void xenvif_wake_queue(struct xenvif_queue *queue)
103{ 135{
104 struct xenvif *vif = (struct xenvif *)data; 136 struct net_device *dev = queue->vif->dev;
137 unsigned int id = queue->id;
138 netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
139}
140
141/* Callback to wake the queue's thread and turn the carrier off on timeout */
142static void xenvif_rx_stalled(unsigned long data)
143{
144 struct xenvif_queue *queue = (struct xenvif_queue *)data;
105 145
106 if (netif_queue_stopped(vif->dev)) { 146 if (xenvif_queue_stopped(queue)) {
107 netdev_err(vif->dev, "draining TX queue\n"); 147 set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
108 vif->rx_queue_purge = true; 148 xenvif_kick_thread(queue);
109 xenvif_kick_thread(vif);
110 netif_wake_queue(vif->dev);
111 } 149 }
112} 150}
113 151
114static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) 152static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
115{ 153{
116 struct xenvif *vif = netdev_priv(dev); 154 struct xenvif *vif = netdev_priv(dev);
155 struct xenvif_queue *queue = NULL;
156 unsigned int num_queues = vif->num_queues;
157 u16 index;
117 int min_slots_needed; 158 int min_slots_needed;
118 159
119 BUG_ON(skb->dev != dev); 160 BUG_ON(skb->dev != dev);
120 161
121 /* Drop the packet if vif is not ready */ 162 /* Drop the packet if queues are not set up */
122 if (vif->task == NULL || 163 if (num_queues < 1)
123 vif->dealloc_task == NULL || 164 goto drop;
165
166 /* Obtain the queue to be used to transmit this packet */
167 index = skb_get_queue_mapping(skb);
168 if (index >= num_queues) {
169 pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.",
170 index, vif->dev->name);
171 index %= num_queues;
172 }
173 queue = &vif->queues[index];
174
175 /* Drop the packet if queue is not ready */
176 if (queue->task == NULL ||
177 queue->dealloc_task == NULL ||
124 !xenvif_schedulable(vif)) 178 !xenvif_schedulable(vif))
125 goto drop; 179 goto drop;
126 180
@@ -139,16 +193,16 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
139 * then turn off the queue to give the ring a chance to 193 * then turn off the queue to give the ring a chance to
140 * drain. 194 * drain.
141 */ 195 */
142 if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) { 196 if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
143 vif->wake_queue.function = xenvif_wake_queue; 197 queue->rx_stalled.function = xenvif_rx_stalled;
144 vif->wake_queue.data = (unsigned long)vif; 198 queue->rx_stalled.data = (unsigned long)queue;
145 xenvif_stop_queue(vif); 199 xenvif_stop_queue(queue);
146 mod_timer(&vif->wake_queue, 200 mod_timer(&queue->rx_stalled,
147 jiffies + rx_drain_timeout_jiffies); 201 jiffies + rx_drain_timeout_jiffies);
148 } 202 }
149 203
150 skb_queue_tail(&vif->rx_queue, skb); 204 skb_queue_tail(&queue->rx_queue, skb);
151 xenvif_kick_thread(vif); 205 xenvif_kick_thread(queue);
152 206
153 return NETDEV_TX_OK; 207 return NETDEV_TX_OK;
154 208
@@ -161,42 +215,82 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
161static struct net_device_stats *xenvif_get_stats(struct net_device *dev) 215static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
162{ 216{
163 struct xenvif *vif = netdev_priv(dev); 217 struct xenvif *vif = netdev_priv(dev);
218 struct xenvif_queue *queue = NULL;
219 unsigned int num_queues = vif->num_queues;
220 unsigned long rx_bytes = 0;
221 unsigned long rx_packets = 0;
222 unsigned long tx_bytes = 0;
223 unsigned long tx_packets = 0;
224 unsigned int index;
225
226 if (vif->queues == NULL)
227 goto out;
228
229 /* Aggregate tx and rx stats from each queue */
230 for (index = 0; index < num_queues; ++index) {
231 queue = &vif->queues[index];
232 rx_bytes += queue->stats.rx_bytes;
233 rx_packets += queue->stats.rx_packets;
234 tx_bytes += queue->stats.tx_bytes;
235 tx_packets += queue->stats.tx_packets;
236 }
237
238out:
239 vif->dev->stats.rx_bytes = rx_bytes;
240 vif->dev->stats.rx_packets = rx_packets;
241 vif->dev->stats.tx_bytes = tx_bytes;
242 vif->dev->stats.tx_packets = tx_packets;
243
164 return &vif->dev->stats; 244 return &vif->dev->stats;
165} 245}
166 246
167static void xenvif_up(struct xenvif *vif) 247static void xenvif_up(struct xenvif *vif)
168{ 248{
169 napi_enable(&vif->napi); 249 struct xenvif_queue *queue = NULL;
170 enable_irq(vif->tx_irq); 250 unsigned int num_queues = vif->num_queues;
171 if (vif->tx_irq != vif->rx_irq) 251 unsigned int queue_index;
172 enable_irq(vif->rx_irq); 252
173 xenvif_napi_schedule_or_enable_events(vif); 253 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
254 queue = &vif->queues[queue_index];
255 napi_enable(&queue->napi);
256 enable_irq(queue->tx_irq);
257 if (queue->tx_irq != queue->rx_irq)
258 enable_irq(queue->rx_irq);
259 xenvif_napi_schedule_or_enable_events(queue);
260 }
174} 261}
175 262
176static void xenvif_down(struct xenvif *vif) 263static void xenvif_down(struct xenvif *vif)
177{ 264{
178 napi_disable(&vif->napi); 265 struct xenvif_queue *queue = NULL;
179 disable_irq(vif->tx_irq); 266 unsigned int num_queues = vif->num_queues;
180 if (vif->tx_irq != vif->rx_irq) 267 unsigned int queue_index;
181 disable_irq(vif->rx_irq); 268
182 del_timer_sync(&vif->credit_timeout); 269 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
270 queue = &vif->queues[queue_index];
271 napi_disable(&queue->napi);
272 disable_irq(queue->tx_irq);
273 if (queue->tx_irq != queue->rx_irq)
274 disable_irq(queue->rx_irq);
275 del_timer_sync(&queue->credit_timeout);
276 }
183} 277}
184 278
185static int xenvif_open(struct net_device *dev) 279static int xenvif_open(struct net_device *dev)
186{ 280{
187 struct xenvif *vif = netdev_priv(dev); 281 struct xenvif *vif = netdev_priv(dev);
188 if (netif_carrier_ok(dev)) 282 if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
189 xenvif_up(vif); 283 xenvif_up(vif);
190 netif_start_queue(dev); 284 netif_tx_start_all_queues(dev);
191 return 0; 285 return 0;
192} 286}
193 287
194static int xenvif_close(struct net_device *dev) 288static int xenvif_close(struct net_device *dev)
195{ 289{
196 struct xenvif *vif = netdev_priv(dev); 290 struct xenvif *vif = netdev_priv(dev);
197 if (netif_carrier_ok(dev)) 291 if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
198 xenvif_down(vif); 292 xenvif_down(vif);
199 netif_stop_queue(dev); 293 netif_tx_stop_all_queues(dev);
200 return 0; 294 return 0;
201} 295}
202 296
@@ -236,29 +330,29 @@ static const struct xenvif_stat {
236} xenvif_stats[] = { 330} xenvif_stats[] = {
237 { 331 {
238 "rx_gso_checksum_fixup", 332 "rx_gso_checksum_fixup",
239 offsetof(struct xenvif, rx_gso_checksum_fixup) 333 offsetof(struct xenvif_stats, rx_gso_checksum_fixup)
240 }, 334 },
241 /* If (sent != success + fail), there are probably packets never 335 /* If (sent != success + fail), there are probably packets never
242 * freed up properly! 336 * freed up properly!
243 */ 337 */
244 { 338 {
245 "tx_zerocopy_sent", 339 "tx_zerocopy_sent",
246 offsetof(struct xenvif, tx_zerocopy_sent), 340 offsetof(struct xenvif_stats, tx_zerocopy_sent),
247 }, 341 },
248 { 342 {
249 "tx_zerocopy_success", 343 "tx_zerocopy_success",
250 offsetof(struct xenvif, tx_zerocopy_success), 344 offsetof(struct xenvif_stats, tx_zerocopy_success),
251 }, 345 },
252 { 346 {
253 "tx_zerocopy_fail", 347 "tx_zerocopy_fail",
254 offsetof(struct xenvif, tx_zerocopy_fail) 348 offsetof(struct xenvif_stats, tx_zerocopy_fail)
255 }, 349 },
256 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use 350 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use
257 * a guest with the same MAX_SKB_FRAG 351 * a guest with the same MAX_SKB_FRAG
258 */ 352 */
259 { 353 {
260 "tx_frag_overflow", 354 "tx_frag_overflow",
261 offsetof(struct xenvif, tx_frag_overflow) 355 offsetof(struct xenvif_stats, tx_frag_overflow)
262 }, 356 },
263}; 357};
264 358
@@ -275,11 +369,20 @@ static int xenvif_get_sset_count(struct net_device *dev, int string_set)
275static void xenvif_get_ethtool_stats(struct net_device *dev, 369static void xenvif_get_ethtool_stats(struct net_device *dev,
276 struct ethtool_stats *stats, u64 * data) 370 struct ethtool_stats *stats, u64 * data)
277{ 371{
278 void *vif = netdev_priv(dev); 372 struct xenvif *vif = netdev_priv(dev);
373 unsigned int num_queues = vif->num_queues;
279 int i; 374 int i;
280 375 unsigned int queue_index;
281 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) 376 struct xenvif_stats *vif_stats;
282 data[i] = *(unsigned long *)(vif + xenvif_stats[i].offset); 377
378 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
379 unsigned long accum = 0;
380 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
381 vif_stats = &vif->queues[queue_index].stats;
382 accum += *(unsigned long *)(vif_stats + xenvif_stats[i].offset);
383 }
384 data[i] = accum;
385 }
283} 386}
284 387
285static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data) 388static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
@@ -321,10 +424,14 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
321 struct net_device *dev; 424 struct net_device *dev;
322 struct xenvif *vif; 425 struct xenvif *vif;
323 char name[IFNAMSIZ] = {}; 426 char name[IFNAMSIZ] = {};
324 int i;
325 427
326 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); 428 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
327 dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup); 429 /* Allocate a netdev with the max. supported number of queues.
430 * When the guest selects the desired number, it will be updated
431 * via netif_set_real_num_*_queues().
432 */
433 dev = alloc_netdev_mq(sizeof(struct xenvif), name, NET_NAME_UNKNOWN,
434 ether_setup, xenvif_max_queues);
328 if (dev == NULL) { 435 if (dev == NULL) {
329 pr_warn("Could not allocate netdev for %s\n", name); 436 pr_warn("Could not allocate netdev for %s\n", name);
330 return ERR_PTR(-ENOMEM); 437 return ERR_PTR(-ENOMEM);
@@ -334,66 +441,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
334 441
335 vif = netdev_priv(dev); 442 vif = netdev_priv(dev);
336 443
337 vif->grant_copy_op = vmalloc(sizeof(struct gnttab_copy) *
338 MAX_GRANT_COPY_OPS);
339 if (vif->grant_copy_op == NULL) {
340 pr_warn("Could not allocate grant copy space for %s\n", name);
341 free_netdev(dev);
342 return ERR_PTR(-ENOMEM);
343 }
344
345 vif->domid = domid; 444 vif->domid = domid;
346 vif->handle = handle; 445 vif->handle = handle;
347 vif->can_sg = 1; 446 vif->can_sg = 1;
348 vif->ip_csum = 1; 447 vif->ip_csum = 1;
349 vif->dev = dev; 448 vif->dev = dev;
350
351 vif->disabled = false; 449 vif->disabled = false;
352 450
353 vif->credit_bytes = vif->remaining_credit = ~0UL; 451 /* Start out with no queues. */
354 vif->credit_usec = 0UL; 452 vif->queues = NULL;
355 init_timer(&vif->credit_timeout); 453 vif->num_queues = 0;
356 vif->credit_window_start = get_jiffies_64();
357
358 init_timer(&vif->wake_queue);
359 454
360 dev->netdev_ops = &xenvif_netdev_ops; 455 dev->netdev_ops = &xenvif_netdev_ops;
361 dev->hw_features = NETIF_F_SG | 456 dev->hw_features = NETIF_F_SG |
362 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 457 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
363 NETIF_F_TSO | NETIF_F_TSO6; 458 NETIF_F_TSO | NETIF_F_TSO6;
364 dev->features = dev->hw_features | NETIF_F_RXCSUM; 459 dev->features = dev->hw_features | NETIF_F_RXCSUM;
365 SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops); 460 dev->ethtool_ops = &xenvif_ethtool_ops;
366 461
367 dev->tx_queue_len = XENVIF_QUEUE_LENGTH; 462 dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
368 463
369 skb_queue_head_init(&vif->rx_queue);
370 skb_queue_head_init(&vif->tx_queue);
371
372 vif->pending_cons = 0;
373 vif->pending_prod = MAX_PENDING_REQS;
374 for (i = 0; i < MAX_PENDING_REQS; i++)
375 vif->pending_ring[i] = i;
376 spin_lock_init(&vif->callback_lock);
377 spin_lock_init(&vif->response_lock);
378 /* If ballooning is disabled, this will consume real memory, so you
379 * better enable it. The long term solution would be to use just a
380 * bunch of valid page descriptors, without dependency on ballooning
381 */
382 err = alloc_xenballooned_pages(MAX_PENDING_REQS,
383 vif->mmap_pages,
384 false);
385 if (err) {
386 netdev_err(dev, "Could not reserve mmap_pages\n");
387 return ERR_PTR(-ENOMEM);
388 }
389 for (i = 0; i < MAX_PENDING_REQS; i++) {
390 vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
391 { .callback = xenvif_zerocopy_callback,
392 .ctx = NULL,
393 .desc = i };
394 vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
395 }
396
397 /* 464 /*
398 * Initialise a dummy MAC address. We choose the numerically 465 * Initialise a dummy MAC address. We choose the numerically
399 * largest non-broadcast address to prevent the address getting 466 * largest non-broadcast address to prevent the address getting
@@ -403,8 +470,6 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
403 memset(dev->dev_addr, 0xFF, ETH_ALEN); 470 memset(dev->dev_addr, 0xFF, ETH_ALEN);
404 dev->dev_addr[0] &= ~0x01; 471 dev->dev_addr[0] &= ~0x01;
405 472
406 netif_napi_add(dev, &vif->napi, xenvif_poll, XENVIF_NAPI_WEIGHT);
407
408 netif_carrier_off(dev); 473 netif_carrier_off(dev);
409 474
410 err = register_netdev(dev); 475 err = register_netdev(dev);
@@ -421,98 +486,148 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
421 return vif; 486 return vif;
422} 487}
423 488
424int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, 489int xenvif_init_queue(struct xenvif_queue *queue)
490{
491 int err, i;
492
493 queue->credit_bytes = queue->remaining_credit = ~0UL;
494 queue->credit_usec = 0UL;
495 init_timer(&queue->credit_timeout);
496 queue->credit_window_start = get_jiffies_64();
497
498 skb_queue_head_init(&queue->rx_queue);
499 skb_queue_head_init(&queue->tx_queue);
500
501 queue->pending_cons = 0;
502 queue->pending_prod = MAX_PENDING_REQS;
503 for (i = 0; i < MAX_PENDING_REQS; ++i)
504 queue->pending_ring[i] = i;
505
506 spin_lock_init(&queue->callback_lock);
507 spin_lock_init(&queue->response_lock);
508
509 /* If ballooning is disabled, this will consume real memory, so you
510 * better enable it. The long term solution would be to use just a
511 * bunch of valid page descriptors, without dependency on ballooning
512 */
513 err = alloc_xenballooned_pages(MAX_PENDING_REQS,
514 queue->mmap_pages,
515 false);
516 if (err) {
517 netdev_err(queue->vif->dev, "Could not reserve mmap_pages\n");
518 return -ENOMEM;
519 }
520
521 for (i = 0; i < MAX_PENDING_REQS; i++) {
522 queue->pending_tx_info[i].callback_struct = (struct ubuf_info)
523 { .callback = xenvif_zerocopy_callback,
524 .ctx = NULL,
525 .desc = i };
526 queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
527 }
528
529 init_timer(&queue->rx_stalled);
530
531 netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
532 XENVIF_NAPI_WEIGHT);
533
534 return 0;
535}
536
537void xenvif_carrier_on(struct xenvif *vif)
538{
539 rtnl_lock();
540 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
541 dev_set_mtu(vif->dev, ETH_DATA_LEN);
542 netdev_update_features(vif->dev);
543 set_bit(VIF_STATUS_CONNECTED, &vif->status);
544 netif_carrier_on(vif->dev);
545 if (netif_running(vif->dev))
546 xenvif_up(vif);
547 rtnl_unlock();
548}
549
550int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
425 unsigned long rx_ring_ref, unsigned int tx_evtchn, 551 unsigned long rx_ring_ref, unsigned int tx_evtchn,
426 unsigned int rx_evtchn) 552 unsigned int rx_evtchn)
427{ 553{
428 struct task_struct *task; 554 struct task_struct *task;
429 int err = -ENOMEM; 555 int err = -ENOMEM;
430 556
431 BUG_ON(vif->tx_irq); 557 BUG_ON(queue->tx_irq);
432 BUG_ON(vif->task); 558 BUG_ON(queue->task);
433 BUG_ON(vif->dealloc_task); 559 BUG_ON(queue->dealloc_task);
434 560
435 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); 561 err = xenvif_map_frontend_rings(queue, tx_ring_ref, rx_ring_ref);
436 if (err < 0) 562 if (err < 0)
437 goto err; 563 goto err;
438 564
439 init_waitqueue_head(&vif->wq); 565 init_waitqueue_head(&queue->wq);
440 init_waitqueue_head(&vif->dealloc_wq); 566 init_waitqueue_head(&queue->dealloc_wq);
441 567
442 if (tx_evtchn == rx_evtchn) { 568 if (tx_evtchn == rx_evtchn) {
443 /* feature-split-event-channels == 0 */ 569 /* feature-split-event-channels == 0 */
444 err = bind_interdomain_evtchn_to_irqhandler( 570 err = bind_interdomain_evtchn_to_irqhandler(
445 vif->domid, tx_evtchn, xenvif_interrupt, 0, 571 queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
446 vif->dev->name, vif); 572 queue->name, queue);
447 if (err < 0) 573 if (err < 0)
448 goto err_unmap; 574 goto err_unmap;
449 vif->tx_irq = vif->rx_irq = err; 575 queue->tx_irq = queue->rx_irq = err;
450 disable_irq(vif->tx_irq); 576 disable_irq(queue->tx_irq);
451 } else { 577 } else {
452 /* feature-split-event-channels == 1 */ 578 /* feature-split-event-channels == 1 */
453 snprintf(vif->tx_irq_name, sizeof(vif->tx_irq_name), 579 snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
454 "%s-tx", vif->dev->name); 580 "%s-tx", queue->name);
455 err = bind_interdomain_evtchn_to_irqhandler( 581 err = bind_interdomain_evtchn_to_irqhandler(
456 vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, 582 queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
457 vif->tx_irq_name, vif); 583 queue->tx_irq_name, queue);
458 if (err < 0) 584 if (err < 0)
459 goto err_unmap; 585 goto err_unmap;
460 vif->tx_irq = err; 586 queue->tx_irq = err;
461 disable_irq(vif->tx_irq); 587 disable_irq(queue->tx_irq);
462 588
463 snprintf(vif->rx_irq_name, sizeof(vif->rx_irq_name), 589 snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
464 "%s-rx", vif->dev->name); 590 "%s-rx", queue->name);
465 err = bind_interdomain_evtchn_to_irqhandler( 591 err = bind_interdomain_evtchn_to_irqhandler(
466 vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, 592 queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
467 vif->rx_irq_name, vif); 593 queue->rx_irq_name, queue);
468 if (err < 0) 594 if (err < 0)
469 goto err_tx_unbind; 595 goto err_tx_unbind;
470 vif->rx_irq = err; 596 queue->rx_irq = err;
471 disable_irq(vif->rx_irq); 597 disable_irq(queue->rx_irq);
472 } 598 }
473 599
474 task = kthread_create(xenvif_kthread_guest_rx, 600 task = kthread_create(xenvif_kthread_guest_rx,
475 (void *)vif, "%s-guest-rx", vif->dev->name); 601 (void *)queue, "%s-guest-rx", queue->name);
476 if (IS_ERR(task)) { 602 if (IS_ERR(task)) {
477 pr_warn("Could not allocate kthread for %s\n", vif->dev->name); 603 pr_warn("Could not allocate kthread for %s\n", queue->name);
478 err = PTR_ERR(task); 604 err = PTR_ERR(task);
479 goto err_rx_unbind; 605 goto err_rx_unbind;
480 } 606 }
481 607 queue->task = task;
482 vif->task = task;
483 608
484 task = kthread_create(xenvif_dealloc_kthread, 609 task = kthread_create(xenvif_dealloc_kthread,
485 (void *)vif, "%s-dealloc", vif->dev->name); 610 (void *)queue, "%s-dealloc", queue->name);
486 if (IS_ERR(task)) { 611 if (IS_ERR(task)) {
487 pr_warn("Could not allocate kthread for %s\n", vif->dev->name); 612 pr_warn("Could not allocate kthread for %s\n", queue->name);
488 err = PTR_ERR(task); 613 err = PTR_ERR(task);
489 goto err_rx_unbind; 614 goto err_rx_unbind;
490 } 615 }
616 queue->dealloc_task = task;
491 617
492 vif->dealloc_task = task; 618 wake_up_process(queue->task);
493 619 wake_up_process(queue->dealloc_task);
494 rtnl_lock();
495 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
496 dev_set_mtu(vif->dev, ETH_DATA_LEN);
497 netdev_update_features(vif->dev);
498 netif_carrier_on(vif->dev);
499 if (netif_running(vif->dev))
500 xenvif_up(vif);
501 rtnl_unlock();
502
503 wake_up_process(vif->task);
504 wake_up_process(vif->dealloc_task);
505 620
506 return 0; 621 return 0;
507 622
508err_rx_unbind: 623err_rx_unbind:
509 unbind_from_irqhandler(vif->rx_irq, vif); 624 unbind_from_irqhandler(queue->rx_irq, queue);
510 vif->rx_irq = 0; 625 queue->rx_irq = 0;
511err_tx_unbind: 626err_tx_unbind:
512 unbind_from_irqhandler(vif->tx_irq, vif); 627 unbind_from_irqhandler(queue->tx_irq, queue);
513 vif->tx_irq = 0; 628 queue->tx_irq = 0;
514err_unmap: 629err_unmap:
515 xenvif_unmap_frontend_rings(vif); 630 xenvif_unmap_frontend_rings(queue);
516err: 631err:
517 module_put(THIS_MODULE); 632 module_put(THIS_MODULE);
518 return err; 633 return err;
@@ -523,85 +638,106 @@ void xenvif_carrier_off(struct xenvif *vif)
523 struct net_device *dev = vif->dev; 638 struct net_device *dev = vif->dev;
524 639
525 rtnl_lock(); 640 rtnl_lock();
526 netif_carrier_off(dev); /* discard queued packets */ 641 if (test_and_clear_bit(VIF_STATUS_CONNECTED, &vif->status)) {
527 if (netif_running(dev)) 642 netif_carrier_off(dev); /* discard queued packets */
528 xenvif_down(vif); 643 if (netif_running(dev))
644 xenvif_down(vif);
645 }
529 rtnl_unlock(); 646 rtnl_unlock();
530} 647}
531 648
532void xenvif_disconnect(struct xenvif *vif) 649static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
650 unsigned int worst_case_skb_lifetime)
533{ 651{
534 if (netif_carrier_ok(vif->dev)) 652 int i, unmap_timeout = 0;
535 xenvif_carrier_off(vif);
536 653
537 if (vif->task) { 654 for (i = 0; i < MAX_PENDING_REQS; ++i) {
538 del_timer_sync(&vif->wake_queue); 655 if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
539 kthread_stop(vif->task); 656 unmap_timeout++;
540 vif->task = NULL; 657 schedule_timeout(msecs_to_jiffies(1000));
658 if (unmap_timeout > worst_case_skb_lifetime &&
659 net_ratelimit())
660 netdev_err(queue->vif->dev,
661 "Page still granted! Index: %x\n",
662 i);
663 i = -1;
664 }
541 } 665 }
666}
542 667
543 if (vif->dealloc_task) { 668void xenvif_disconnect(struct xenvif *vif)
544 kthread_stop(vif->dealloc_task); 669{
545 vif->dealloc_task = NULL; 670 struct xenvif_queue *queue = NULL;
546 } 671 unsigned int num_queues = vif->num_queues;
672 unsigned int queue_index;
673
674 xenvif_carrier_off(vif);
675
676 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
677 queue = &vif->queues[queue_index];
678
679 if (queue->task) {
680 del_timer_sync(&queue->rx_stalled);
681 kthread_stop(queue->task);
682 queue->task = NULL;
683 }
547 684
548 if (vif->tx_irq) { 685 if (queue->dealloc_task) {
549 if (vif->tx_irq == vif->rx_irq) 686 kthread_stop(queue->dealloc_task);
550 unbind_from_irqhandler(vif->tx_irq, vif); 687 queue->dealloc_task = NULL;
551 else {
552 unbind_from_irqhandler(vif->tx_irq, vif);
553 unbind_from_irqhandler(vif->rx_irq, vif);
554 } 688 }
555 vif->tx_irq = 0; 689
690 if (queue->tx_irq) {
691 if (queue->tx_irq == queue->rx_irq)
692 unbind_from_irqhandler(queue->tx_irq, queue);
693 else {
694 unbind_from_irqhandler(queue->tx_irq, queue);
695 unbind_from_irqhandler(queue->rx_irq, queue);
696 }
697 queue->tx_irq = 0;
698 }
699
700 xenvif_unmap_frontend_rings(queue);
556 } 701 }
702}
557 703
558 xenvif_unmap_frontend_rings(vif); 704/* Reverse the relevant parts of xenvif_init_queue().
705 * Used for queue teardown from xenvif_free(), and on the
706 * error handling paths in xenbus.c:connect().
707 */
708void xenvif_deinit_queue(struct xenvif_queue *queue)
709{
710 free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
711 netif_napi_del(&queue->napi);
559} 712}
560 713
561void xenvif_free(struct xenvif *vif) 714void xenvif_free(struct xenvif *vif)
562{ 715{
563 int i, unmap_timeout = 0; 716 struct xenvif_queue *queue = NULL;
717 unsigned int num_queues = vif->num_queues;
718 unsigned int queue_index;
564 /* Here we want to avoid timeout messages if an skb can be legitimately 719 /* Here we want to avoid timeout messages if an skb can be legitimately
565 * stuck somewhere else. Realistically this could be an another vif's 720 * stuck somewhere else. Realistically this could be an another vif's
566 * internal or QDisc queue. That another vif also has this 721 * internal or QDisc queue. That another vif also has this
567 * rx_drain_timeout_msecs timeout, but the timer only ditches the 722 * rx_drain_timeout_msecs timeout, so give it time to drain out.
568 * internal queue. After that, the QDisc queue can put in worst case 723 * Although if that other guest wakes up just before its timeout happens
569 * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's 724 * and takes only one skb from QDisc, it can hold onto other skbs for a
570 * internal queue, so we need several rounds of such timeouts until we 725 * longer period.
571 * can be sure that no another vif should have skb's from us. We are
572 * not sending more skb's, so newly stuck packets are not interesting
573 * for us here.
574 */ 726 */
575 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * 727 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
576 DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
577 728
578 for (i = 0; i < MAX_PENDING_REQS; ++i) { 729 unregister_netdev(vif->dev);
579 if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
580 unmap_timeout++;
581 schedule_timeout(msecs_to_jiffies(1000));
582 if (unmap_timeout > worst_case_skb_lifetime &&
583 net_ratelimit())
584 netdev_err(vif->dev,
585 "Page still granted! Index: %x\n",
586 i);
587 /* If there are still unmapped pages, reset the loop to
588 * start checking again. We shouldn't exit here until
589 * dealloc thread and NAPI instance release all the
590 * pages. If a kernel bug causes the skbs to stall
591 * somewhere, the interface cannot be brought down
592 * properly.
593 */
594 i = -1;
595 }
596 }
597
598 free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
599 730
600 netif_napi_del(&vif->napi); 731 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
732 queue = &vif->queues[queue_index];
733 xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
734 xenvif_deinit_queue(queue);
735 }
601 736
602 unregister_netdev(vif->dev); 737 vfree(vif->queues);
738 vif->queues = NULL;
739 vif->num_queues = 0;
603 740
604 vfree(vif->grant_copy_op);
605 free_netdev(vif->dev); 741 free_netdev(vif->dev);
606 742
607 module_put(THIS_MODULE); 743 module_put(THIS_MODULE);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 7367208ee8cd..aa2093325be1 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -62,6 +62,11 @@ unsigned int rx_drain_timeout_msecs = 10000;
62module_param(rx_drain_timeout_msecs, uint, 0444); 62module_param(rx_drain_timeout_msecs, uint, 0444);
63unsigned int rx_drain_timeout_jiffies; 63unsigned int rx_drain_timeout_jiffies;
64 64
65unsigned int xenvif_max_queues;
66module_param_named(max_queues, xenvif_max_queues, uint, 0644);
67MODULE_PARM_DESC(max_queues,
68 "Maximum number of queues per virtual interface");
69
65/* 70/*
66 * This is the maximum slots a skb can have. If a guest sends a skb 71 * This is the maximum slots a skb can have. If a guest sends a skb
67 * which exceeds this limit it is considered malicious. 72 * which exceeds this limit it is considered malicious.
@@ -70,33 +75,33 @@ unsigned int rx_drain_timeout_jiffies;
70static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; 75static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
71module_param(fatal_skb_slots, uint, 0444); 76module_param(fatal_skb_slots, uint, 0444);
72 77
73static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 78static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
74 u8 status); 79 u8 status);
75 80
76static void make_tx_response(struct xenvif *vif, 81static void make_tx_response(struct xenvif_queue *queue,
77 struct xen_netif_tx_request *txp, 82 struct xen_netif_tx_request *txp,
78 s8 st); 83 s8 st);
79 84
80static inline int tx_work_todo(struct xenvif *vif); 85static inline int tx_work_todo(struct xenvif_queue *queue);
81static inline int rx_work_todo(struct xenvif *vif); 86static inline int rx_work_todo(struct xenvif_queue *queue);
82 87
83static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 88static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
84 u16 id, 89 u16 id,
85 s8 st, 90 s8 st,
86 u16 offset, 91 u16 offset,
87 u16 size, 92 u16 size,
88 u16 flags); 93 u16 flags);
89 94
90static inline unsigned long idx_to_pfn(struct xenvif *vif, 95static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
91 u16 idx) 96 u16 idx)
92{ 97{
93 return page_to_pfn(vif->mmap_pages[idx]); 98 return page_to_pfn(queue->mmap_pages[idx]);
94} 99}
95 100
96static inline unsigned long idx_to_kaddr(struct xenvif *vif, 101static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
97 u16 idx) 102 u16 idx)
98{ 103{
99 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 104 return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
100} 105}
101 106
102#define callback_param(vif, pending_idx) \ 107#define callback_param(vif, pending_idx) \
@@ -104,13 +109,13 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
104 109
105/* Find the containing VIF's structure from a pointer in pending_tx_info array 110/* Find the containing VIF's structure from a pointer in pending_tx_info array
106 */ 111 */
107static inline struct xenvif *ubuf_to_vif(const struct ubuf_info *ubuf) 112static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
108{ 113{
109 u16 pending_idx = ubuf->desc; 114 u16 pending_idx = ubuf->desc;
110 struct pending_tx_info *temp = 115 struct pending_tx_info *temp =
111 container_of(ubuf, struct pending_tx_info, callback_struct); 116 container_of(ubuf, struct pending_tx_info, callback_struct);
112 return container_of(temp - pending_idx, 117 return container_of(temp - pending_idx,
113 struct xenvif, 118 struct xenvif_queue,
114 pending_tx_info[0]); 119 pending_tx_info[0]);
115} 120}
116 121
@@ -136,24 +141,24 @@ static inline pending_ring_idx_t pending_index(unsigned i)
136 return i & (MAX_PENDING_REQS-1); 141 return i & (MAX_PENDING_REQS-1);
137} 142}
138 143
139bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed) 144bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
140{ 145{
141 RING_IDX prod, cons; 146 RING_IDX prod, cons;
142 147
143 do { 148 do {
144 prod = vif->rx.sring->req_prod; 149 prod = queue->rx.sring->req_prod;
145 cons = vif->rx.req_cons; 150 cons = queue->rx.req_cons;
146 151
147 if (prod - cons >= needed) 152 if (prod - cons >= needed)
148 return true; 153 return true;
149 154
150 vif->rx.sring->req_event = prod + 1; 155 queue->rx.sring->req_event = prod + 1;
151 156
152 /* Make sure event is visible before we check prod 157 /* Make sure event is visible before we check prod
153 * again. 158 * again.
154 */ 159 */
155 mb(); 160 mb();
156 } while (vif->rx.sring->req_prod != prod); 161 } while (queue->rx.sring->req_prod != prod);
157 162
158 return false; 163 return false;
159} 164}
@@ -163,7 +168,8 @@ bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
163 * adding 'size' bytes to a buffer which currently contains 'offset' 168 * adding 'size' bytes to a buffer which currently contains 'offset'
164 * bytes. 169 * bytes.
165 */ 170 */
166static bool start_new_rx_buffer(int offset, unsigned long size, int head) 171static bool start_new_rx_buffer(int offset, unsigned long size, int head,
172 bool full_coalesce)
167{ 173{
168 /* simple case: we have completely filled the current buffer. */ 174 /* simple case: we have completely filled the current buffer. */
169 if (offset == MAX_BUFFER_OFFSET) 175 if (offset == MAX_BUFFER_OFFSET)
@@ -175,6 +181,7 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
175 * (i) this frag would fit completely in the next buffer 181 * (i) this frag would fit completely in the next buffer
176 * and (ii) there is already some data in the current buffer 182 * and (ii) there is already some data in the current buffer
177 * and (iii) this is not the head buffer. 183 * and (iii) this is not the head buffer.
184 * and (iv) there is no need to fully utilize the buffers
178 * 185 *
179 * Where: 186 * Where:
180 * - (i) stops us splitting a frag into two copies 187 * - (i) stops us splitting a frag into two copies
@@ -185,6 +192,8 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
185 * by (ii) but is explicitly checked because 192 * by (ii) but is explicitly checked because
186 * netfront relies on the first buffer being 193 * netfront relies on the first buffer being
187 * non-empty and can crash otherwise. 194 * non-empty and can crash otherwise.
195 * - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS
196 * slot
188 * 197 *
189 * This means we will effectively linearise small 198 * This means we will effectively linearise small
190 * frags but do not needlessly split large buffers 199 * frags but do not needlessly split large buffers
@@ -192,7 +201,8 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
192 * own buffers as before. 201 * own buffers as before.
193 */ 202 */
194 BUG_ON(size > MAX_BUFFER_OFFSET); 203 BUG_ON(size > MAX_BUFFER_OFFSET);
195 if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head) 204 if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head &&
205 !full_coalesce)
196 return true; 206 return true;
197 207
198 return false; 208 return false;
@@ -207,13 +217,13 @@ struct netrx_pending_operations {
207 grant_ref_t copy_gref; 217 grant_ref_t copy_gref;
208}; 218};
209 219
210static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, 220static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
211 struct netrx_pending_operations *npo) 221 struct netrx_pending_operations *npo)
212{ 222{
213 struct xenvif_rx_meta *meta; 223 struct xenvif_rx_meta *meta;
214 struct xen_netif_rx_request *req; 224 struct xen_netif_rx_request *req;
215 225
216 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 226 req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
217 227
218 meta = npo->meta + npo->meta_prod++; 228 meta = npo->meta + npo->meta_prod++;
219 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; 229 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
@@ -227,15 +237,22 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
227 return meta; 237 return meta;
228} 238}
229 239
240struct xenvif_rx_cb {
241 int meta_slots_used;
242 bool full_coalesce;
243};
244
245#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
246
230/* 247/*
231 * Set up the grant operations for this fragment. If it's a flipping 248 * Set up the grant operations for this fragment. If it's a flipping
232 * interface, we also set up the unmap request from here. 249 * interface, we also set up the unmap request from here.
233 */ 250 */
234static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, 251static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb,
235 struct netrx_pending_operations *npo, 252 struct netrx_pending_operations *npo,
236 struct page *page, unsigned long size, 253 struct page *page, unsigned long size,
237 unsigned long offset, int *head, 254 unsigned long offset, int *head,
238 struct xenvif *foreign_vif, 255 struct xenvif_queue *foreign_queue,
239 grant_ref_t foreign_gref) 256 grant_ref_t foreign_gref)
240{ 257{
241 struct gnttab_copy *copy_gop; 258 struct gnttab_copy *copy_gop;
@@ -261,14 +278,17 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
261 if (bytes > size) 278 if (bytes > size)
262 bytes = size; 279 bytes = size;
263 280
264 if (start_new_rx_buffer(npo->copy_off, bytes, *head)) { 281 if (start_new_rx_buffer(npo->copy_off,
282 bytes,
283 *head,
284 XENVIF_RX_CB(skb)->full_coalesce)) {
265 /* 285 /*
266 * Netfront requires there to be some data in the head 286 * Netfront requires there to be some data in the head
267 * buffer. 287 * buffer.
268 */ 288 */
269 BUG_ON(*head); 289 BUG_ON(*head);
270 290
271 meta = get_next_rx_buffer(vif, npo); 291 meta = get_next_rx_buffer(queue, npo);
272 } 292 }
273 293
274 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) 294 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
@@ -278,8 +298,8 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
278 copy_gop->flags = GNTCOPY_dest_gref; 298 copy_gop->flags = GNTCOPY_dest_gref;
279 copy_gop->len = bytes; 299 copy_gop->len = bytes;
280 300
281 if (foreign_vif) { 301 if (foreign_queue) {
282 copy_gop->source.domid = foreign_vif->domid; 302 copy_gop->source.domid = foreign_queue->vif->domid;
283 copy_gop->source.u.ref = foreign_gref; 303 copy_gop->source.u.ref = foreign_gref;
284 copy_gop->flags |= GNTCOPY_source_gref; 304 copy_gop->flags |= GNTCOPY_source_gref;
285 } else { 305 } else {
@@ -289,7 +309,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
289 } 309 }
290 copy_gop->source.offset = offset; 310 copy_gop->source.offset = offset;
291 311
292 copy_gop->dest.domid = vif->domid; 312 copy_gop->dest.domid = queue->vif->domid;
293 copy_gop->dest.offset = npo->copy_off; 313 copy_gop->dest.offset = npo->copy_off;
294 copy_gop->dest.u.ref = npo->copy_gref; 314 copy_gop->dest.u.ref = npo->copy_gref;
295 315
@@ -314,8 +334,8 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
314 gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 334 gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
315 } 335 }
316 336
317 if (*head && ((1 << gso_type) & vif->gso_mask)) 337 if (*head && ((1 << gso_type) & queue->vif->gso_mask))
318 vif->rx.req_cons++; 338 queue->rx.req_cons++;
319 339
320 *head = 0; /* There must be something in this buffer now. */ 340 *head = 0; /* There must be something in this buffer now. */
321 341
@@ -337,13 +357,13 @@ static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb,
337 const int i, 357 const int i,
338 const struct ubuf_info *ubuf) 358 const struct ubuf_info *ubuf)
339{ 359{
340 struct xenvif *foreign_vif = ubuf_to_vif(ubuf); 360 struct xenvif_queue *foreign_queue = ubuf_to_queue(ubuf);
341 361
342 do { 362 do {
343 u16 pending_idx = ubuf->desc; 363 u16 pending_idx = ubuf->desc;
344 364
345 if (skb_shinfo(skb)->frags[i].page.p == 365 if (skb_shinfo(skb)->frags[i].page.p ==
346 foreign_vif->mmap_pages[pending_idx]) 366 foreign_queue->mmap_pages[pending_idx])
347 break; 367 break;
348 ubuf = (struct ubuf_info *) ubuf->ctx; 368 ubuf = (struct ubuf_info *) ubuf->ctx;
349 } while (ubuf); 369 } while (ubuf);
@@ -364,7 +384,8 @@ static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb,
364 * frontend-side LRO). 384 * frontend-side LRO).
365 */ 385 */
366static int xenvif_gop_skb(struct sk_buff *skb, 386static int xenvif_gop_skb(struct sk_buff *skb,
367 struct netrx_pending_operations *npo) 387 struct netrx_pending_operations *npo,
388 struct xenvif_queue *queue)
368{ 389{
369 struct xenvif *vif = netdev_priv(skb->dev); 390 struct xenvif *vif = netdev_priv(skb->dev);
370 int nr_frags = skb_shinfo(skb)->nr_frags; 391 int nr_frags = skb_shinfo(skb)->nr_frags;
@@ -390,7 +411,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
390 411
391 /* Set up a GSO prefix descriptor, if necessary */ 412 /* Set up a GSO prefix descriptor, if necessary */
392 if ((1 << gso_type) & vif->gso_prefix_mask) { 413 if ((1 << gso_type) & vif->gso_prefix_mask) {
393 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 414 req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
394 meta = npo->meta + npo->meta_prod++; 415 meta = npo->meta + npo->meta_prod++;
395 meta->gso_type = gso_type; 416 meta->gso_type = gso_type;
396 meta->gso_size = skb_shinfo(skb)->gso_size; 417 meta->gso_size = skb_shinfo(skb)->gso_size;
@@ -398,7 +419,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
398 meta->id = req->id; 419 meta->id = req->id;
399 } 420 }
400 421
401 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 422 req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
402 meta = npo->meta + npo->meta_prod++; 423 meta = npo->meta + npo->meta_prod++;
403 424
404 if ((1 << gso_type) & vif->gso_mask) { 425 if ((1 << gso_type) & vif->gso_mask) {
@@ -422,7 +443,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
422 if (data + len > skb_tail_pointer(skb)) 443 if (data + len > skb_tail_pointer(skb))
423 len = skb_tail_pointer(skb) - data; 444 len = skb_tail_pointer(skb) - data;
424 445
425 xenvif_gop_frag_copy(vif, skb, npo, 446 xenvif_gop_frag_copy(queue, skb, npo,
426 virt_to_page(data), len, offset, &head, 447 virt_to_page(data), len, offset, &head,
427 NULL, 448 NULL,
428 0); 449 0);
@@ -433,7 +454,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
433 /* This variable also signals whether foreign_gref has a real 454 /* This variable also signals whether foreign_gref has a real
434 * value or not. 455 * value or not.
435 */ 456 */
436 struct xenvif *foreign_vif = NULL; 457 struct xenvif_queue *foreign_queue = NULL;
437 grant_ref_t foreign_gref; 458 grant_ref_t foreign_gref;
438 459
439 if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && 460 if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) &&
@@ -458,8 +479,9 @@ static int xenvif_gop_skb(struct sk_buff *skb,
458 if (likely(ubuf)) { 479 if (likely(ubuf)) {
459 u16 pending_idx = ubuf->desc; 480 u16 pending_idx = ubuf->desc;
460 481
461 foreign_vif = ubuf_to_vif(ubuf); 482 foreign_queue = ubuf_to_queue(ubuf);
462 foreign_gref = foreign_vif->pending_tx_info[pending_idx].req.gref; 483 foreign_gref =
484 foreign_queue->pending_tx_info[pending_idx].req.gref;
463 /* Just a safety measure. If this was the last 485 /* Just a safety measure. If this was the last
464 * element on the list, the for loop will 486 * element on the list, the for loop will
465 * iterate again if a local page were added to 487 * iterate again if a local page were added to
@@ -477,13 +499,13 @@ static int xenvif_gop_skb(struct sk_buff *skb,
477 */ 499 */
478 ubuf = head_ubuf; 500 ubuf = head_ubuf;
479 } 501 }
480 xenvif_gop_frag_copy(vif, skb, npo, 502 xenvif_gop_frag_copy(queue, skb, npo,
481 skb_frag_page(&skb_shinfo(skb)->frags[i]), 503 skb_frag_page(&skb_shinfo(skb)->frags[i]),
482 skb_frag_size(&skb_shinfo(skb)->frags[i]), 504 skb_frag_size(&skb_shinfo(skb)->frags[i]),
483 skb_shinfo(skb)->frags[i].page_offset, 505 skb_shinfo(skb)->frags[i].page_offset,
484 &head, 506 &head,
485 foreign_vif, 507 foreign_queue,
486 foreign_vif ? foreign_gref : UINT_MAX); 508 foreign_queue ? foreign_gref : UINT_MAX);
487 } 509 }
488 510
489 return npo->meta_prod - old_meta_prod; 511 return npo->meta_prod - old_meta_prod;
@@ -515,7 +537,7 @@ static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
515 return status; 537 return status;
516} 538}
517 539
518static void xenvif_add_frag_responses(struct xenvif *vif, int status, 540static void xenvif_add_frag_responses(struct xenvif_queue *queue, int status,
519 struct xenvif_rx_meta *meta, 541 struct xenvif_rx_meta *meta,
520 int nr_meta_slots) 542 int nr_meta_slots)
521{ 543{
@@ -536,23 +558,17 @@ static void xenvif_add_frag_responses(struct xenvif *vif, int status,
536 flags = XEN_NETRXF_more_data; 558 flags = XEN_NETRXF_more_data;
537 559
538 offset = 0; 560 offset = 0;
539 make_rx_response(vif, meta[i].id, status, offset, 561 make_rx_response(queue, meta[i].id, status, offset,
540 meta[i].size, flags); 562 meta[i].size, flags);
541 } 563 }
542} 564}
543 565
544struct xenvif_rx_cb { 566void xenvif_kick_thread(struct xenvif_queue *queue)
545 int meta_slots_used;
546};
547
548#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
549
550void xenvif_kick_thread(struct xenvif *vif)
551{ 567{
552 wake_up(&vif->wq); 568 wake_up(&queue->wq);
553} 569}
554 570
555static void xenvif_rx_action(struct xenvif *vif) 571static void xenvif_rx_action(struct xenvif_queue *queue)
556{ 572{
557 s8 status; 573 s8 status;
558 u16 flags; 574 u16 flags;
@@ -565,13 +581,13 @@ static void xenvif_rx_action(struct xenvif *vif)
565 bool need_to_notify = false; 581 bool need_to_notify = false;
566 582
567 struct netrx_pending_operations npo = { 583 struct netrx_pending_operations npo = {
568 .copy = vif->grant_copy_op, 584 .copy = queue->grant_copy_op,
569 .meta = vif->meta, 585 .meta = queue->meta,
570 }; 586 };
571 587
572 skb_queue_head_init(&rxq); 588 skb_queue_head_init(&rxq);
573 589
574 while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { 590 while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) {
575 RING_IDX max_slots_needed; 591 RING_IDX max_slots_needed;
576 RING_IDX old_req_cons; 592 RING_IDX old_req_cons;
577 RING_IDX ring_slots_used; 593 RING_IDX ring_slots_used;
@@ -602,10 +618,15 @@ static void xenvif_rx_action(struct xenvif *vif)
602 618
603 /* To avoid the estimate becoming too pessimal for some 619 /* To avoid the estimate becoming too pessimal for some
604 * frontends that limit posted rx requests, cap the estimate 620 * frontends that limit posted rx requests, cap the estimate
605 * at MAX_SKB_FRAGS. 621 * at MAX_SKB_FRAGS. In this case netback will fully coalesce
622 * the skb into the provided slots.
606 */ 623 */
607 if (max_slots_needed > MAX_SKB_FRAGS) 624 if (max_slots_needed > MAX_SKB_FRAGS) {
608 max_slots_needed = MAX_SKB_FRAGS; 625 max_slots_needed = MAX_SKB_FRAGS;
626 XENVIF_RX_CB(skb)->full_coalesce = true;
627 } else {
628 XENVIF_RX_CB(skb)->full_coalesce = false;
629 }
609 630
610 /* We may need one more slot for GSO metadata */ 631 /* We may need one more slot for GSO metadata */
611 if (skb_is_gso(skb) && 632 if (skb_is_gso(skb) &&
@@ -614,42 +635,42 @@ static void xenvif_rx_action(struct xenvif *vif)
614 max_slots_needed++; 635 max_slots_needed++;
615 636
616 /* If the skb may not fit then bail out now */ 637 /* If the skb may not fit then bail out now */
617 if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) { 638 if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) {
618 skb_queue_head(&vif->rx_queue, skb); 639 skb_queue_head(&queue->rx_queue, skb);
619 need_to_notify = true; 640 need_to_notify = true;
620 vif->rx_last_skb_slots = max_slots_needed; 641 queue->rx_last_skb_slots = max_slots_needed;
621 break; 642 break;
622 } else 643 } else
623 vif->rx_last_skb_slots = 0; 644 queue->rx_last_skb_slots = 0;
624 645
625 old_req_cons = vif->rx.req_cons; 646 old_req_cons = queue->rx.req_cons;
626 XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo); 647 XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
627 ring_slots_used = vif->rx.req_cons - old_req_cons; 648 ring_slots_used = queue->rx.req_cons - old_req_cons;
628 649
629 BUG_ON(ring_slots_used > max_slots_needed); 650 BUG_ON(ring_slots_used > max_slots_needed);
630 651
631 __skb_queue_tail(&rxq, skb); 652 __skb_queue_tail(&rxq, skb);
632 } 653 }
633 654
634 BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); 655 BUG_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
635 656
636 if (!npo.copy_prod) 657 if (!npo.copy_prod)
637 goto done; 658 goto done;
638 659
639 BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS); 660 BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
640 gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); 661 gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
641 662
642 while ((skb = __skb_dequeue(&rxq)) != NULL) { 663 while ((skb = __skb_dequeue(&rxq)) != NULL) {
643 664
644 if ((1 << vif->meta[npo.meta_cons].gso_type) & 665 if ((1 << queue->meta[npo.meta_cons].gso_type) &
645 vif->gso_prefix_mask) { 666 queue->vif->gso_prefix_mask) {
646 resp = RING_GET_RESPONSE(&vif->rx, 667 resp = RING_GET_RESPONSE(&queue->rx,
647 vif->rx.rsp_prod_pvt++); 668 queue->rx.rsp_prod_pvt++);
648 669
649 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; 670 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
650 671
651 resp->offset = vif->meta[npo.meta_cons].gso_size; 672 resp->offset = queue->meta[npo.meta_cons].gso_size;
652 resp->id = vif->meta[npo.meta_cons].id; 673 resp->id = queue->meta[npo.meta_cons].id;
653 resp->status = XENVIF_RX_CB(skb)->meta_slots_used; 674 resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
654 675
655 npo.meta_cons++; 676 npo.meta_cons++;
@@ -657,10 +678,10 @@ static void xenvif_rx_action(struct xenvif *vif)
657 } 678 }
658 679
659 680
660 vif->dev->stats.tx_bytes += skb->len; 681 queue->stats.tx_bytes += skb->len;
661 vif->dev->stats.tx_packets++; 682 queue->stats.tx_packets++;
662 683
663 status = xenvif_check_gop(vif, 684 status = xenvif_check_gop(queue->vif,
664 XENVIF_RX_CB(skb)->meta_slots_used, 685 XENVIF_RX_CB(skb)->meta_slots_used,
665 &npo); 686 &npo);
666 687
@@ -676,22 +697,22 @@ static void xenvif_rx_action(struct xenvif *vif)
676 flags |= XEN_NETRXF_data_validated; 697 flags |= XEN_NETRXF_data_validated;
677 698
678 offset = 0; 699 offset = 0;
679 resp = make_rx_response(vif, vif->meta[npo.meta_cons].id, 700 resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
680 status, offset, 701 status, offset,
681 vif->meta[npo.meta_cons].size, 702 queue->meta[npo.meta_cons].size,
682 flags); 703 flags);
683 704
684 if ((1 << vif->meta[npo.meta_cons].gso_type) & 705 if ((1 << queue->meta[npo.meta_cons].gso_type) &
685 vif->gso_mask) { 706 queue->vif->gso_mask) {
686 struct xen_netif_extra_info *gso = 707 struct xen_netif_extra_info *gso =
687 (struct xen_netif_extra_info *) 708 (struct xen_netif_extra_info *)
688 RING_GET_RESPONSE(&vif->rx, 709 RING_GET_RESPONSE(&queue->rx,
689 vif->rx.rsp_prod_pvt++); 710 queue->rx.rsp_prod_pvt++);
690 711
691 resp->flags |= XEN_NETRXF_extra_info; 712 resp->flags |= XEN_NETRXF_extra_info;
692 713
693 gso->u.gso.type = vif->meta[npo.meta_cons].gso_type; 714 gso->u.gso.type = queue->meta[npo.meta_cons].gso_type;
694 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 715 gso->u.gso.size = queue->meta[npo.meta_cons].gso_size;
695 gso->u.gso.pad = 0; 716 gso->u.gso.pad = 0;
696 gso->u.gso.features = 0; 717 gso->u.gso.features = 0;
697 718
@@ -699,11 +720,11 @@ static void xenvif_rx_action(struct xenvif *vif)
699 gso->flags = 0; 720 gso->flags = 0;
700 } 721 }
701 722
702 xenvif_add_frag_responses(vif, status, 723 xenvif_add_frag_responses(queue, status,
703 vif->meta + npo.meta_cons + 1, 724 queue->meta + npo.meta_cons + 1,
704 XENVIF_RX_CB(skb)->meta_slots_used); 725 XENVIF_RX_CB(skb)->meta_slots_used);
705 726
706 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); 727 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
707 728
708 need_to_notify |= !!ret; 729 need_to_notify |= !!ret;
709 730
@@ -713,20 +734,20 @@ static void xenvif_rx_action(struct xenvif *vif)
713 734
714done: 735done:
715 if (need_to_notify) 736 if (need_to_notify)
716 notify_remote_via_irq(vif->rx_irq); 737 notify_remote_via_irq(queue->rx_irq);
717} 738}
718 739
719void xenvif_napi_schedule_or_enable_events(struct xenvif *vif) 740void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
720{ 741{
721 int more_to_do; 742 int more_to_do;
722 743
723 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 744 RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
724 745
725 if (more_to_do) 746 if (more_to_do)
726 napi_schedule(&vif->napi); 747 napi_schedule(&queue->napi);
727} 748}
728 749
729static void tx_add_credit(struct xenvif *vif) 750static void tx_add_credit(struct xenvif_queue *queue)
730{ 751{
731 unsigned long max_burst, max_credit; 752 unsigned long max_burst, max_credit;
732 753
@@ -734,55 +755,57 @@ static void tx_add_credit(struct xenvif *vif)
734 * Allow a burst big enough to transmit a jumbo packet of up to 128kB. 755 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
735 * Otherwise the interface can seize up due to insufficient credit. 756 * Otherwise the interface can seize up due to insufficient credit.
736 */ 757 */
737 max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; 758 max_burst = RING_GET_REQUEST(&queue->tx, queue->tx.req_cons)->size;
738 max_burst = min(max_burst, 131072UL); 759 max_burst = min(max_burst, 131072UL);
739 max_burst = max(max_burst, vif->credit_bytes); 760 max_burst = max(max_burst, queue->credit_bytes);
740 761
741 /* Take care that adding a new chunk of credit doesn't wrap to zero. */ 762 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
742 max_credit = vif->remaining_credit + vif->credit_bytes; 763 max_credit = queue->remaining_credit + queue->credit_bytes;
743 if (max_credit < vif->remaining_credit) 764 if (max_credit < queue->remaining_credit)
744 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ 765 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
745 766
746 vif->remaining_credit = min(max_credit, max_burst); 767 queue->remaining_credit = min(max_credit, max_burst);
747} 768}
748 769
749static void tx_credit_callback(unsigned long data) 770static void tx_credit_callback(unsigned long data)
750{ 771{
751 struct xenvif *vif = (struct xenvif *)data; 772 struct xenvif_queue *queue = (struct xenvif_queue *)data;
752 tx_add_credit(vif); 773 tx_add_credit(queue);
753 xenvif_napi_schedule_or_enable_events(vif); 774 xenvif_napi_schedule_or_enable_events(queue);
754} 775}
755 776
756static void xenvif_tx_err(struct xenvif *vif, 777static void xenvif_tx_err(struct xenvif_queue *queue,
757 struct xen_netif_tx_request *txp, RING_IDX end) 778 struct xen_netif_tx_request *txp, RING_IDX end)
758{ 779{
759 RING_IDX cons = vif->tx.req_cons; 780 RING_IDX cons = queue->tx.req_cons;
760 unsigned long flags; 781 unsigned long flags;
761 782
762 do { 783 do {
763 spin_lock_irqsave(&vif->response_lock, flags); 784 spin_lock_irqsave(&queue->response_lock, flags);
764 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); 785 make_tx_response(queue, txp, XEN_NETIF_RSP_ERROR);
765 spin_unlock_irqrestore(&vif->response_lock, flags); 786 spin_unlock_irqrestore(&queue->response_lock, flags);
766 if (cons == end) 787 if (cons == end)
767 break; 788 break;
768 txp = RING_GET_REQUEST(&vif->tx, cons++); 789 txp = RING_GET_REQUEST(&queue->tx, cons++);
769 } while (1); 790 } while (1);
770 vif->tx.req_cons = cons; 791 queue->tx.req_cons = cons;
771} 792}
772 793
773static void xenvif_fatal_tx_err(struct xenvif *vif) 794static void xenvif_fatal_tx_err(struct xenvif *vif)
774{ 795{
775 netdev_err(vif->dev, "fatal error; disabling device\n"); 796 netdev_err(vif->dev, "fatal error; disabling device\n");
776 vif->disabled = true; 797 vif->disabled = true;
777 xenvif_kick_thread(vif); 798 /* Disable the vif from queue 0's kthread */
799 if (vif->queues)
800 xenvif_kick_thread(&vif->queues[0]);
778} 801}
779 802
780static int xenvif_count_requests(struct xenvif *vif, 803static int xenvif_count_requests(struct xenvif_queue *queue,
781 struct xen_netif_tx_request *first, 804 struct xen_netif_tx_request *first,
782 struct xen_netif_tx_request *txp, 805 struct xen_netif_tx_request *txp,
783 int work_to_do) 806 int work_to_do)
784{ 807{
785 RING_IDX cons = vif->tx.req_cons; 808 RING_IDX cons = queue->tx.req_cons;
786 int slots = 0; 809 int slots = 0;
787 int drop_err = 0; 810 int drop_err = 0;
788 int more_data; 811 int more_data;
@@ -794,10 +817,10 @@ static int xenvif_count_requests(struct xenvif *vif,
794 struct xen_netif_tx_request dropped_tx = { 0 }; 817 struct xen_netif_tx_request dropped_tx = { 0 };
795 818
796 if (slots >= work_to_do) { 819 if (slots >= work_to_do) {
797 netdev_err(vif->dev, 820 netdev_err(queue->vif->dev,
798 "Asked for %d slots but exceeds this limit\n", 821 "Asked for %d slots but exceeds this limit\n",
799 work_to_do); 822 work_to_do);
800 xenvif_fatal_tx_err(vif); 823 xenvif_fatal_tx_err(queue->vif);
801 return -ENODATA; 824 return -ENODATA;
802 } 825 }
803 826
@@ -805,10 +828,10 @@ static int xenvif_count_requests(struct xenvif *vif,
805 * considered malicious. 828 * considered malicious.
806 */ 829 */
807 if (unlikely(slots >= fatal_skb_slots)) { 830 if (unlikely(slots >= fatal_skb_slots)) {
808 netdev_err(vif->dev, 831 netdev_err(queue->vif->dev,
809 "Malicious frontend using %d slots, threshold %u\n", 832 "Malicious frontend using %d slots, threshold %u\n",
810 slots, fatal_skb_slots); 833 slots, fatal_skb_slots);
811 xenvif_fatal_tx_err(vif); 834 xenvif_fatal_tx_err(queue->vif);
812 return -E2BIG; 835 return -E2BIG;
813 } 836 }
814 837
@@ -821,7 +844,7 @@ static int xenvif_count_requests(struct xenvif *vif,
821 */ 844 */
822 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { 845 if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
823 if (net_ratelimit()) 846 if (net_ratelimit())
824 netdev_dbg(vif->dev, 847 netdev_dbg(queue->vif->dev,
825 "Too many slots (%d) exceeding limit (%d), dropping packet\n", 848 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
826 slots, XEN_NETBK_LEGACY_SLOTS_MAX); 849 slots, XEN_NETBK_LEGACY_SLOTS_MAX);
827 drop_err = -E2BIG; 850 drop_err = -E2BIG;
@@ -830,7 +853,7 @@ static int xenvif_count_requests(struct xenvif *vif,
830 if (drop_err) 853 if (drop_err)
831 txp = &dropped_tx; 854 txp = &dropped_tx;
832 855
833 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), 856 memcpy(txp, RING_GET_REQUEST(&queue->tx, cons + slots),
834 sizeof(*txp)); 857 sizeof(*txp));
835 858
836 /* If the guest submitted a frame >= 64 KiB then 859 /* If the guest submitted a frame >= 64 KiB then
@@ -844,7 +867,7 @@ static int xenvif_count_requests(struct xenvif *vif,
844 */ 867 */
845 if (!drop_err && txp->size > first->size) { 868 if (!drop_err && txp->size > first->size) {
846 if (net_ratelimit()) 869 if (net_ratelimit())
847 netdev_dbg(vif->dev, 870 netdev_dbg(queue->vif->dev,
848 "Invalid tx request, slot size %u > remaining size %u\n", 871 "Invalid tx request, slot size %u > remaining size %u\n",
849 txp->size, first->size); 872 txp->size, first->size);
850 drop_err = -EIO; 873 drop_err = -EIO;
@@ -854,9 +877,9 @@ static int xenvif_count_requests(struct xenvif *vif,
854 slots++; 877 slots++;
855 878
856 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 879 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
857 netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", 880 netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
858 txp->offset, txp->size); 881 txp->offset, txp->size);
859 xenvif_fatal_tx_err(vif); 882 xenvif_fatal_tx_err(queue->vif);
860 return -EINVAL; 883 return -EINVAL;
861 } 884 }
862 885
@@ -868,7 +891,7 @@ static int xenvif_count_requests(struct xenvif *vif,
868 } while (more_data); 891 } while (more_data);
869 892
870 if (drop_err) { 893 if (drop_err) {
871 xenvif_tx_err(vif, first, cons + slots); 894 xenvif_tx_err(queue, first, cons + slots);
872 return drop_err; 895 return drop_err;
873 } 896 }
874 897
@@ -882,17 +905,17 @@ struct xenvif_tx_cb {
882 905
883#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) 906#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
884 907
885static inline void xenvif_tx_create_map_op(struct xenvif *vif, 908static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
886 u16 pending_idx, 909 u16 pending_idx,
887 struct xen_netif_tx_request *txp, 910 struct xen_netif_tx_request *txp,
888 struct gnttab_map_grant_ref *mop) 911 struct gnttab_map_grant_ref *mop)
889{ 912{
890 vif->pages_to_map[mop-vif->tx_map_ops] = vif->mmap_pages[pending_idx]; 913 queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
891 gnttab_set_map_op(mop, idx_to_kaddr(vif, pending_idx), 914 gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
892 GNTMAP_host_map | GNTMAP_readonly, 915 GNTMAP_host_map | GNTMAP_readonly,
893 txp->gref, vif->domid); 916 txp->gref, queue->vif->domid);
894 917
895 memcpy(&vif->pending_tx_info[pending_idx].req, txp, 918 memcpy(&queue->pending_tx_info[pending_idx].req, txp,
896 sizeof(*txp)); 919 sizeof(*txp));
897} 920}
898 921
@@ -913,7 +936,7 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
913 return skb; 936 return skb;
914} 937}
915 938
916static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif, 939static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
917 struct sk_buff *skb, 940 struct sk_buff *skb,
918 struct xen_netif_tx_request *txp, 941 struct xen_netif_tx_request *txp,
919 struct gnttab_map_grant_ref *gop) 942 struct gnttab_map_grant_ref *gop)
@@ -940,9 +963,9 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
940 963
941 for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; 964 for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
942 shinfo->nr_frags++, txp++, gop++) { 965 shinfo->nr_frags++, txp++, gop++) {
943 index = pending_index(vif->pending_cons++); 966 index = pending_index(queue->pending_cons++);
944 pending_idx = vif->pending_ring[index]; 967 pending_idx = queue->pending_ring[index];
945 xenvif_tx_create_map_op(vif, pending_idx, txp, gop); 968 xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
946 frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); 969 frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
947 } 970 }
948 971
@@ -950,7 +973,7 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
950 struct sk_buff *nskb = xenvif_alloc_skb(0); 973 struct sk_buff *nskb = xenvif_alloc_skb(0);
951 if (unlikely(nskb == NULL)) { 974 if (unlikely(nskb == NULL)) {
952 if (net_ratelimit()) 975 if (net_ratelimit())
953 netdev_err(vif->dev, 976 netdev_err(queue->vif->dev,
954 "Can't allocate the frag_list skb.\n"); 977 "Can't allocate the frag_list skb.\n");
955 return NULL; 978 return NULL;
956 } 979 }
@@ -960,9 +983,9 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
960 983
961 for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow; 984 for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
962 shinfo->nr_frags++, txp++, gop++) { 985 shinfo->nr_frags++, txp++, gop++) {
963 index = pending_index(vif->pending_cons++); 986 index = pending_index(queue->pending_cons++);
964 pending_idx = vif->pending_ring[index]; 987 pending_idx = queue->pending_ring[index];
965 xenvif_tx_create_map_op(vif, pending_idx, txp, gop); 988 xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
966 frag_set_pending_idx(&frags[shinfo->nr_frags], 989 frag_set_pending_idx(&frags[shinfo->nr_frags],
967 pending_idx); 990 pending_idx);
968 } 991 }
@@ -973,57 +996,68 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
973 return gop; 996 return gop;
974} 997}
975 998
976static inline void xenvif_grant_handle_set(struct xenvif *vif, 999static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
977 u16 pending_idx, 1000 u16 pending_idx,
978 grant_handle_t handle) 1001 grant_handle_t handle)
979{ 1002{
980 if (unlikely(vif->grant_tx_handle[pending_idx] != 1003 if (unlikely(queue->grant_tx_handle[pending_idx] !=
981 NETBACK_INVALID_HANDLE)) { 1004 NETBACK_INVALID_HANDLE)) {
982 netdev_err(vif->dev, 1005 netdev_err(queue->vif->dev,
983 "Trying to overwrite active handle! pending_idx: %x\n", 1006 "Trying to overwrite active handle! pending_idx: %x\n",
984 pending_idx); 1007 pending_idx);
985 BUG(); 1008 BUG();
986 } 1009 }
987 vif->grant_tx_handle[pending_idx] = handle; 1010 queue->grant_tx_handle[pending_idx] = handle;
988} 1011}
989 1012
990static inline void xenvif_grant_handle_reset(struct xenvif *vif, 1013static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
991 u16 pending_idx) 1014 u16 pending_idx)
992{ 1015{
993 if (unlikely(vif->grant_tx_handle[pending_idx] == 1016 if (unlikely(queue->grant_tx_handle[pending_idx] ==
994 NETBACK_INVALID_HANDLE)) { 1017 NETBACK_INVALID_HANDLE)) {
995 netdev_err(vif->dev, 1018 netdev_err(queue->vif->dev,
996 "Trying to unmap invalid handle! pending_idx: %x\n", 1019 "Trying to unmap invalid handle! pending_idx: %x\n",
997 pending_idx); 1020 pending_idx);
998 BUG(); 1021 BUG();
999 } 1022 }
1000 vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE; 1023 queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
1001} 1024}
1002 1025
1003static int xenvif_tx_check_gop(struct xenvif *vif, 1026static int xenvif_tx_check_gop(struct xenvif_queue *queue,
1004 struct sk_buff *skb, 1027 struct sk_buff *skb,
1005 struct gnttab_map_grant_ref **gopp_map, 1028 struct gnttab_map_grant_ref **gopp_map,
1006 struct gnttab_copy **gopp_copy) 1029 struct gnttab_copy **gopp_copy)
1007{ 1030{
1008 struct gnttab_map_grant_ref *gop_map = *gopp_map; 1031 struct gnttab_map_grant_ref *gop_map = *gopp_map;
1009 u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 1032 u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
1033 /* This always points to the shinfo of the skb being checked, which
1034 * could be either the first or the one on the frag_list
1035 */
1010 struct skb_shared_info *shinfo = skb_shinfo(skb); 1036 struct skb_shared_info *shinfo = skb_shinfo(skb);
1037 /* If this is non-NULL, we are currently checking the frag_list skb, and
1038 * this points to the shinfo of the first one
1039 */
1040 struct skb_shared_info *first_shinfo = NULL;
1011 int nr_frags = shinfo->nr_frags; 1041 int nr_frags = shinfo->nr_frags;
1042 const bool sharedslot = nr_frags &&
1043 frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
1012 int i, err; 1044 int i, err;
1013 struct sk_buff *first_skb = NULL;
1014 1045
1015 /* Check status of header. */ 1046 /* Check status of header. */
1016 err = (*gopp_copy)->status; 1047 err = (*gopp_copy)->status;
1017 (*gopp_copy)++;
1018 if (unlikely(err)) { 1048 if (unlikely(err)) {
1019 if (net_ratelimit()) 1049 if (net_ratelimit())
1020 netdev_dbg(vif->dev, 1050 netdev_dbg(queue->vif->dev,
1021 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", 1051 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
1022 (*gopp_copy)->status, 1052 (*gopp_copy)->status,
1023 pending_idx, 1053 pending_idx,
1024 (*gopp_copy)->source.u.ref); 1054 (*gopp_copy)->source.u.ref);
1025 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 1055 /* The first frag might still have this slot mapped */
1056 if (!sharedslot)
1057 xenvif_idx_release(queue, pending_idx,
1058 XEN_NETIF_RSP_ERROR);
1026 } 1059 }
1060 (*gopp_copy)++;
1027 1061
1028check_frags: 1062check_frags:
1029 for (i = 0; i < nr_frags; i++, gop_map++) { 1063 for (i = 0; i < nr_frags; i++, gop_map++) {
@@ -1035,64 +1069,86 @@ check_frags:
1035 newerr = gop_map->status; 1069 newerr = gop_map->status;
1036 1070
1037 if (likely(!newerr)) { 1071 if (likely(!newerr)) {
1038 xenvif_grant_handle_set(vif, 1072 xenvif_grant_handle_set(queue,
1039 pending_idx, 1073 pending_idx,
1040 gop_map->handle); 1074 gop_map->handle);
1041 /* Had a previous error? Invalidate this fragment. */ 1075 /* Had a previous error? Invalidate this fragment. */
1042 if (unlikely(err)) 1076 if (unlikely(err)) {
1043 xenvif_idx_unmap(vif, pending_idx); 1077 xenvif_idx_unmap(queue, pending_idx);
1078 /* If the mapping of the first frag was OK, but
1079 * the header's copy failed, and they are
1080 * sharing a slot, send an error
1081 */
1082 if (i == 0 && sharedslot)
1083 xenvif_idx_release(queue, pending_idx,
1084 XEN_NETIF_RSP_ERROR);
1085 else
1086 xenvif_idx_release(queue, pending_idx,
1087 XEN_NETIF_RSP_OKAY);
1088 }
1044 continue; 1089 continue;
1045 } 1090 }
1046 1091
1047 /* Error on this fragment: respond to client with an error. */ 1092 /* Error on this fragment: respond to client with an error. */
1048 if (net_ratelimit()) 1093 if (net_ratelimit())
1049 netdev_dbg(vif->dev, 1094 netdev_dbg(queue->vif->dev,
1050 "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n", 1095 "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
1051 i, 1096 i,
1052 gop_map->status, 1097 gop_map->status,
1053 pending_idx, 1098 pending_idx,
1054 gop_map->ref); 1099 gop_map->ref);
1055 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 1100
1101 xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
1056 1102
1057 /* Not the first error? Preceding frags already invalidated. */ 1103 /* Not the first error? Preceding frags already invalidated. */
1058 if (err) 1104 if (err)
1059 continue; 1105 continue;
1060 /* First error: invalidate preceding fragments. */ 1106
1107 /* First error: if the header haven't shared a slot with the
1108 * first frag, release it as well.
1109 */
1110 if (!sharedslot)
1111 xenvif_idx_release(queue,
1112 XENVIF_TX_CB(skb)->pending_idx,
1113 XEN_NETIF_RSP_OKAY);
1114
1115 /* Invalidate preceding fragments of this skb. */
1061 for (j = 0; j < i; j++) { 1116 for (j = 0; j < i; j++) {
1062 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 1117 pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1063 xenvif_idx_unmap(vif, pending_idx); 1118 xenvif_idx_unmap(queue, pending_idx);
1119 xenvif_idx_release(queue, pending_idx,
1120 XEN_NETIF_RSP_OKAY);
1121 }
1122
1123 /* And if we found the error while checking the frag_list, unmap
1124 * the first skb's frags
1125 */
1126 if (first_shinfo) {
1127 for (j = 0; j < first_shinfo->nr_frags; j++) {
1128 pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
1129 xenvif_idx_unmap(queue, pending_idx);
1130 xenvif_idx_release(queue, pending_idx,
1131 XEN_NETIF_RSP_OKAY);
1132 }
1064 } 1133 }
1065 1134
1066 /* Remember the error: invalidate all subsequent fragments. */ 1135 /* Remember the error: invalidate all subsequent fragments. */
1067 err = newerr; 1136 err = newerr;
1068 } 1137 }
1069 1138
1070 if (skb_has_frag_list(skb)) { 1139 if (skb_has_frag_list(skb) && !first_shinfo) {
1071 first_skb = skb; 1140 first_shinfo = skb_shinfo(skb);
1072 skb = shinfo->frag_list; 1141 shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
1073 shinfo = skb_shinfo(skb);
1074 nr_frags = shinfo->nr_frags; 1142 nr_frags = shinfo->nr_frags;
1075 1143
1076 goto check_frags; 1144 goto check_frags;
1077 } 1145 }
1078 1146
1079 /* There was a mapping error in the frag_list skb. We have to unmap
1080 * the first skb's frags
1081 */
1082 if (first_skb && err) {
1083 int j;
1084 shinfo = skb_shinfo(first_skb);
1085 for (j = 0; j < shinfo->nr_frags; j++) {
1086 pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1087 xenvif_idx_unmap(vif, pending_idx);
1088 }
1089 }
1090
1091 *gopp_map = gop_map; 1147 *gopp_map = gop_map;
1092 return err; 1148 return err;
1093} 1149}
1094 1150
1095static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) 1151static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb)
1096{ 1152{
1097 struct skb_shared_info *shinfo = skb_shinfo(skb); 1153 struct skb_shared_info *shinfo = skb_shinfo(skb);
1098 int nr_frags = shinfo->nr_frags; 1154 int nr_frags = shinfo->nr_frags;
@@ -1110,23 +1166,23 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
1110 /* If this is not the first frag, chain it to the previous*/ 1166 /* If this is not the first frag, chain it to the previous*/
1111 if (prev_pending_idx == INVALID_PENDING_IDX) 1167 if (prev_pending_idx == INVALID_PENDING_IDX)
1112 skb_shinfo(skb)->destructor_arg = 1168 skb_shinfo(skb)->destructor_arg =
1113 &callback_param(vif, pending_idx); 1169 &callback_param(queue, pending_idx);
1114 else 1170 else
1115 callback_param(vif, prev_pending_idx).ctx = 1171 callback_param(queue, prev_pending_idx).ctx =
1116 &callback_param(vif, pending_idx); 1172 &callback_param(queue, pending_idx);
1117 1173
1118 callback_param(vif, pending_idx).ctx = NULL; 1174 callback_param(queue, pending_idx).ctx = NULL;
1119 prev_pending_idx = pending_idx; 1175 prev_pending_idx = pending_idx;
1120 1176
1121 txp = &vif->pending_tx_info[pending_idx].req; 1177 txp = &queue->pending_tx_info[pending_idx].req;
1122 page = virt_to_page(idx_to_kaddr(vif, pending_idx)); 1178 page = virt_to_page(idx_to_kaddr(queue, pending_idx));
1123 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 1179 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
1124 skb->len += txp->size; 1180 skb->len += txp->size;
1125 skb->data_len += txp->size; 1181 skb->data_len += txp->size;
1126 skb->truesize += txp->size; 1182 skb->truesize += txp->size;
1127 1183
1128 /* Take an extra reference to offset network stack's put_page */ 1184 /* Take an extra reference to offset network stack's put_page */
1129 get_page(vif->mmap_pages[pending_idx]); 1185 get_page(queue->mmap_pages[pending_idx]);
1130 } 1186 }
1131 /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc 1187 /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1132 * overlaps with "index", and "mapping" is not set. I think mapping 1188 * overlaps with "index", and "mapping" is not set. I think mapping
@@ -1136,33 +1192,33 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
1136 skb->pfmemalloc = false; 1192 skb->pfmemalloc = false;
1137} 1193}
1138 1194
1139static int xenvif_get_extras(struct xenvif *vif, 1195static int xenvif_get_extras(struct xenvif_queue *queue,
1140 struct xen_netif_extra_info *extras, 1196 struct xen_netif_extra_info *extras,
1141 int work_to_do) 1197 int work_to_do)
1142{ 1198{
1143 struct xen_netif_extra_info extra; 1199 struct xen_netif_extra_info extra;
1144 RING_IDX cons = vif->tx.req_cons; 1200 RING_IDX cons = queue->tx.req_cons;
1145 1201
1146 do { 1202 do {
1147 if (unlikely(work_to_do-- <= 0)) { 1203 if (unlikely(work_to_do-- <= 0)) {
1148 netdev_err(vif->dev, "Missing extra info\n"); 1204 netdev_err(queue->vif->dev, "Missing extra info\n");
1149 xenvif_fatal_tx_err(vif); 1205 xenvif_fatal_tx_err(queue->vif);
1150 return -EBADR; 1206 return -EBADR;
1151 } 1207 }
1152 1208
1153 memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), 1209 memcpy(&extra, RING_GET_REQUEST(&queue->tx, cons),
1154 sizeof(extra)); 1210 sizeof(extra));
1155 if (unlikely(!extra.type || 1211 if (unlikely(!extra.type ||
1156 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { 1212 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1157 vif->tx.req_cons = ++cons; 1213 queue->tx.req_cons = ++cons;
1158 netdev_err(vif->dev, 1214 netdev_err(queue->vif->dev,
1159 "Invalid extra type: %d\n", extra.type); 1215 "Invalid extra type: %d\n", extra.type);
1160 xenvif_fatal_tx_err(vif); 1216 xenvif_fatal_tx_err(queue->vif);
1161 return -EINVAL; 1217 return -EINVAL;
1162 } 1218 }
1163 1219
1164 memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); 1220 memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1165 vif->tx.req_cons = ++cons; 1221 queue->tx.req_cons = ++cons;
1166 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); 1222 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1167 1223
1168 return work_to_do; 1224 return work_to_do;
@@ -1197,7 +1253,7 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1197 return 0; 1253 return 0;
1198} 1254}
1199 1255
1200static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1256static int checksum_setup(struct xenvif_queue *queue, struct sk_buff *skb)
1201{ 1257{
1202 bool recalculate_partial_csum = false; 1258 bool recalculate_partial_csum = false;
1203 1259
@@ -1207,7 +1263,7 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1207 * recalculate the partial checksum. 1263 * recalculate the partial checksum.
1208 */ 1264 */
1209 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { 1265 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1210 vif->rx_gso_checksum_fixup++; 1266 queue->stats.rx_gso_checksum_fixup++;
1211 skb->ip_summed = CHECKSUM_PARTIAL; 1267 skb->ip_summed = CHECKSUM_PARTIAL;
1212 recalculate_partial_csum = true; 1268 recalculate_partial_csum = true;
1213 } 1269 }
@@ -1219,31 +1275,31 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1219 return skb_checksum_setup(skb, recalculate_partial_csum); 1275 return skb_checksum_setup(skb, recalculate_partial_csum);
1220} 1276}
1221 1277
1222static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1278static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
1223{ 1279{
1224 u64 now = get_jiffies_64(); 1280 u64 now = get_jiffies_64();
1225 u64 next_credit = vif->credit_window_start + 1281 u64 next_credit = queue->credit_window_start +
1226 msecs_to_jiffies(vif->credit_usec / 1000); 1282 msecs_to_jiffies(queue->credit_usec / 1000);
1227 1283
1228 /* Timer could already be pending in rare cases. */ 1284 /* Timer could already be pending in rare cases. */
1229 if (timer_pending(&vif->credit_timeout)) 1285 if (timer_pending(&queue->credit_timeout))
1230 return true; 1286 return true;
1231 1287
1232 /* Passed the point where we can replenish credit? */ 1288 /* Passed the point where we can replenish credit? */
1233 if (time_after_eq64(now, next_credit)) { 1289 if (time_after_eq64(now, next_credit)) {
1234 vif->credit_window_start = now; 1290 queue->credit_window_start = now;
1235 tx_add_credit(vif); 1291 tx_add_credit(queue);
1236 } 1292 }
1237 1293
1238 /* Still too big to send right now? Set a callback. */ 1294 /* Still too big to send right now? Set a callback. */
1239 if (size > vif->remaining_credit) { 1295 if (size > queue->remaining_credit) {
1240 vif->credit_timeout.data = 1296 queue->credit_timeout.data =
1241 (unsigned long)vif; 1297 (unsigned long)queue;
1242 vif->credit_timeout.function = 1298 queue->credit_timeout.function =
1243 tx_credit_callback; 1299 tx_credit_callback;
1244 mod_timer(&vif->credit_timeout, 1300 mod_timer(&queue->credit_timeout,
1245 next_credit); 1301 next_credit);
1246 vif->credit_window_start = next_credit; 1302 queue->credit_window_start = next_credit;
1247 1303
1248 return true; 1304 return true;
1249 } 1305 }
@@ -1251,16 +1307,16 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1251 return false; 1307 return false;
1252} 1308}
1253 1309
1254static void xenvif_tx_build_gops(struct xenvif *vif, 1310static void xenvif_tx_build_gops(struct xenvif_queue *queue,
1255 int budget, 1311 int budget,
1256 unsigned *copy_ops, 1312 unsigned *copy_ops,
1257 unsigned *map_ops) 1313 unsigned *map_ops)
1258{ 1314{
1259 struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop; 1315 struct gnttab_map_grant_ref *gop = queue->tx_map_ops, *request_gop;
1260 struct sk_buff *skb; 1316 struct sk_buff *skb;
1261 int ret; 1317 int ret;
1262 1318
1263 while (skb_queue_len(&vif->tx_queue) < budget) { 1319 while (skb_queue_len(&queue->tx_queue) < budget) {
1264 struct xen_netif_tx_request txreq; 1320 struct xen_netif_tx_request txreq;
1265 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; 1321 struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
1266 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 1322 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
@@ -1270,69 +1326,69 @@ static void xenvif_tx_build_gops(struct xenvif *vif,
1270 unsigned int data_len; 1326 unsigned int data_len;
1271 pending_ring_idx_t index; 1327 pending_ring_idx_t index;
1272 1328
1273 if (vif->tx.sring->req_prod - vif->tx.req_cons > 1329 if (queue->tx.sring->req_prod - queue->tx.req_cons >
1274 XEN_NETIF_TX_RING_SIZE) { 1330 XEN_NETIF_TX_RING_SIZE) {
1275 netdev_err(vif->dev, 1331 netdev_err(queue->vif->dev,
1276 "Impossible number of requests. " 1332 "Impossible number of requests. "
1277 "req_prod %d, req_cons %d, size %ld\n", 1333 "req_prod %d, req_cons %d, size %ld\n",
1278 vif->tx.sring->req_prod, vif->tx.req_cons, 1334 queue->tx.sring->req_prod, queue->tx.req_cons,
1279 XEN_NETIF_TX_RING_SIZE); 1335 XEN_NETIF_TX_RING_SIZE);
1280 xenvif_fatal_tx_err(vif); 1336 xenvif_fatal_tx_err(queue->vif);
1281 break; 1337 break;
1282 } 1338 }
1283 1339
1284 work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx); 1340 work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
1285 if (!work_to_do) 1341 if (!work_to_do)
1286 break; 1342 break;
1287 1343
1288 idx = vif->tx.req_cons; 1344 idx = queue->tx.req_cons;
1289 rmb(); /* Ensure that we see the request before we copy it. */ 1345 rmb(); /* Ensure that we see the request before we copy it. */
1290 memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); 1346 memcpy(&txreq, RING_GET_REQUEST(&queue->tx, idx), sizeof(txreq));
1291 1347
1292 /* Credit-based scheduling. */ 1348 /* Credit-based scheduling. */
1293 if (txreq.size > vif->remaining_credit && 1349 if (txreq.size > queue->remaining_credit &&
1294 tx_credit_exceeded(vif, txreq.size)) 1350 tx_credit_exceeded(queue, txreq.size))
1295 break; 1351 break;
1296 1352
1297 vif->remaining_credit -= txreq.size; 1353 queue->remaining_credit -= txreq.size;
1298 1354
1299 work_to_do--; 1355 work_to_do--;
1300 vif->tx.req_cons = ++idx; 1356 queue->tx.req_cons = ++idx;
1301 1357
1302 memset(extras, 0, sizeof(extras)); 1358 memset(extras, 0, sizeof(extras));
1303 if (txreq.flags & XEN_NETTXF_extra_info) { 1359 if (txreq.flags & XEN_NETTXF_extra_info) {
1304 work_to_do = xenvif_get_extras(vif, extras, 1360 work_to_do = xenvif_get_extras(queue, extras,
1305 work_to_do); 1361 work_to_do);
1306 idx = vif->tx.req_cons; 1362 idx = queue->tx.req_cons;
1307 if (unlikely(work_to_do < 0)) 1363 if (unlikely(work_to_do < 0))
1308 break; 1364 break;
1309 } 1365 }
1310 1366
1311 ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do); 1367 ret = xenvif_count_requests(queue, &txreq, txfrags, work_to_do);
1312 if (unlikely(ret < 0)) 1368 if (unlikely(ret < 0))
1313 break; 1369 break;
1314 1370
1315 idx += ret; 1371 idx += ret;
1316 1372
1317 if (unlikely(txreq.size < ETH_HLEN)) { 1373 if (unlikely(txreq.size < ETH_HLEN)) {
1318 netdev_dbg(vif->dev, 1374 netdev_dbg(queue->vif->dev,
1319 "Bad packet size: %d\n", txreq.size); 1375 "Bad packet size: %d\n", txreq.size);
1320 xenvif_tx_err(vif, &txreq, idx); 1376 xenvif_tx_err(queue, &txreq, idx);
1321 break; 1377 break;
1322 } 1378 }
1323 1379
1324 /* No crossing a page as the payload mustn't fragment. */ 1380 /* No crossing a page as the payload mustn't fragment. */
1325 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { 1381 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1326 netdev_err(vif->dev, 1382 netdev_err(queue->vif->dev,
1327 "txreq.offset: %x, size: %u, end: %lu\n", 1383 "txreq.offset: %x, size: %u, end: %lu\n",
1328 txreq.offset, txreq.size, 1384 txreq.offset, txreq.size,
1329 (txreq.offset&~PAGE_MASK) + txreq.size); 1385 (txreq.offset&~PAGE_MASK) + txreq.size);
1330 xenvif_fatal_tx_err(vif); 1386 xenvif_fatal_tx_err(queue->vif);
1331 break; 1387 break;
1332 } 1388 }
1333 1389
1334 index = pending_index(vif->pending_cons); 1390 index = pending_index(queue->pending_cons);
1335 pending_idx = vif->pending_ring[index]; 1391 pending_idx = queue->pending_ring[index];
1336 1392
1337 data_len = (txreq.size > PKT_PROT_LEN && 1393 data_len = (txreq.size > PKT_PROT_LEN &&
1338 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? 1394 ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
@@ -1340,9 +1396,9 @@ static void xenvif_tx_build_gops(struct xenvif *vif,
1340 1396
1341 skb = xenvif_alloc_skb(data_len); 1397 skb = xenvif_alloc_skb(data_len);
1342 if (unlikely(skb == NULL)) { 1398 if (unlikely(skb == NULL)) {
1343 netdev_dbg(vif->dev, 1399 netdev_dbg(queue->vif->dev,
1344 "Can't allocate a skb in start_xmit.\n"); 1400 "Can't allocate a skb in start_xmit.\n");
1345 xenvif_tx_err(vif, &txreq, idx); 1401 xenvif_tx_err(queue, &txreq, idx);
1346 break; 1402 break;
1347 } 1403 }
1348 1404
@@ -1350,7 +1406,7 @@ static void xenvif_tx_build_gops(struct xenvif *vif,
1350 struct xen_netif_extra_info *gso; 1406 struct xen_netif_extra_info *gso;
1351 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; 1407 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1352 1408
1353 if (xenvif_set_skb_gso(vif, skb, gso)) { 1409 if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
1354 /* Failure in xenvif_set_skb_gso is fatal. */ 1410 /* Failure in xenvif_set_skb_gso is fatal. */
1355 kfree_skb(skb); 1411 kfree_skb(skb);
1356 break; 1412 break;
@@ -1360,18 +1416,18 @@ static void xenvif_tx_build_gops(struct xenvif *vif,
1360 XENVIF_TX_CB(skb)->pending_idx = pending_idx; 1416 XENVIF_TX_CB(skb)->pending_idx = pending_idx;
1361 1417
1362 __skb_put(skb, data_len); 1418 __skb_put(skb, data_len);
1363 vif->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; 1419 queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
1364 vif->tx_copy_ops[*copy_ops].source.domid = vif->domid; 1420 queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
1365 vif->tx_copy_ops[*copy_ops].source.offset = txreq.offset; 1421 queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
1366 1422
1367 vif->tx_copy_ops[*copy_ops].dest.u.gmfn = 1423 queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
1368 virt_to_mfn(skb->data); 1424 virt_to_mfn(skb->data);
1369 vif->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; 1425 queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
1370 vif->tx_copy_ops[*copy_ops].dest.offset = 1426 queue->tx_copy_ops[*copy_ops].dest.offset =
1371 offset_in_page(skb->data); 1427 offset_in_page(skb->data);
1372 1428
1373 vif->tx_copy_ops[*copy_ops].len = data_len; 1429 queue->tx_copy_ops[*copy_ops].len = data_len;
1374 vif->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; 1430 queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
1375 1431
1376 (*copy_ops)++; 1432 (*copy_ops)++;
1377 1433
@@ -1380,42 +1436,42 @@ static void xenvif_tx_build_gops(struct xenvif *vif,
1380 skb_shinfo(skb)->nr_frags++; 1436 skb_shinfo(skb)->nr_frags++;
1381 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1437 frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1382 pending_idx); 1438 pending_idx);
1383 xenvif_tx_create_map_op(vif, pending_idx, &txreq, gop); 1439 xenvif_tx_create_map_op(queue, pending_idx, &txreq, gop);
1384 gop++; 1440 gop++;
1385 } else { 1441 } else {
1386 frag_set_pending_idx(&skb_shinfo(skb)->frags[0], 1442 frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1387 INVALID_PENDING_IDX); 1443 INVALID_PENDING_IDX);
1388 memcpy(&vif->pending_tx_info[pending_idx].req, &txreq, 1444 memcpy(&queue->pending_tx_info[pending_idx].req, &txreq,
1389 sizeof(txreq)); 1445 sizeof(txreq));
1390 } 1446 }
1391 1447
1392 vif->pending_cons++; 1448 queue->pending_cons++;
1393 1449
1394 request_gop = xenvif_get_requests(vif, skb, txfrags, gop); 1450 request_gop = xenvif_get_requests(queue, skb, txfrags, gop);
1395 if (request_gop == NULL) { 1451 if (request_gop == NULL) {
1396 kfree_skb(skb); 1452 kfree_skb(skb);
1397 xenvif_tx_err(vif, &txreq, idx); 1453 xenvif_tx_err(queue, &txreq, idx);
1398 break; 1454 break;
1399 } 1455 }
1400 gop = request_gop; 1456 gop = request_gop;
1401 1457
1402 __skb_queue_tail(&vif->tx_queue, skb); 1458 __skb_queue_tail(&queue->tx_queue, skb);
1403 1459
1404 vif->tx.req_cons = idx; 1460 queue->tx.req_cons = idx;
1405 1461
1406 if (((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops)) || 1462 if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
1407 (*copy_ops >= ARRAY_SIZE(vif->tx_copy_ops))) 1463 (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
1408 break; 1464 break;
1409 } 1465 }
1410 1466
1411 (*map_ops) = gop - vif->tx_map_ops; 1467 (*map_ops) = gop - queue->tx_map_ops;
1412 return; 1468 return;
1413} 1469}
1414 1470
1415/* Consolidate skb with a frag_list into a brand new one with local pages on 1471/* Consolidate skb with a frag_list into a brand new one with local pages on
1416 * frags. Returns 0 or -ENOMEM if can't allocate new pages. 1472 * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1417 */ 1473 */
1418static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb) 1474static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb)
1419{ 1475{
1420 unsigned int offset = skb_headlen(skb); 1476 unsigned int offset = skb_headlen(skb);
1421 skb_frag_t frags[MAX_SKB_FRAGS]; 1477 skb_frag_t frags[MAX_SKB_FRAGS];
@@ -1423,10 +1479,10 @@ static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb)
1423 struct ubuf_info *uarg; 1479 struct ubuf_info *uarg;
1424 struct sk_buff *nskb = skb_shinfo(skb)->frag_list; 1480 struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
1425 1481
1426 vif->tx_zerocopy_sent += 2; 1482 queue->stats.tx_zerocopy_sent += 2;
1427 vif->tx_frag_overflow++; 1483 queue->stats.tx_frag_overflow++;
1428 1484
1429 xenvif_fill_frags(vif, nskb); 1485 xenvif_fill_frags(queue, nskb);
1430 /* Subtract frags size, we will correct it later */ 1486 /* Subtract frags size, we will correct it later */
1431 skb->truesize -= skb->data_len; 1487 skb->truesize -= skb->data_len;
1432 skb->len += nskb->len; 1488 skb->len += nskb->len;
@@ -1478,37 +1534,46 @@ static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb)
1478 return 0; 1534 return 0;
1479} 1535}
1480 1536
1481static int xenvif_tx_submit(struct xenvif *vif) 1537static int xenvif_tx_submit(struct xenvif_queue *queue)
1482{ 1538{
1483 struct gnttab_map_grant_ref *gop_map = vif->tx_map_ops; 1539 struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
1484 struct gnttab_copy *gop_copy = vif->tx_copy_ops; 1540 struct gnttab_copy *gop_copy = queue->tx_copy_ops;
1485 struct sk_buff *skb; 1541 struct sk_buff *skb;
1486 int work_done = 0; 1542 int work_done = 0;
1487 1543
1488 while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) { 1544 while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
1489 struct xen_netif_tx_request *txp; 1545 struct xen_netif_tx_request *txp;
1490 u16 pending_idx; 1546 u16 pending_idx;
1491 unsigned data_len; 1547 unsigned data_len;
1492 1548
1493 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 1549 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
1494 txp = &vif->pending_tx_info[pending_idx].req; 1550 txp = &queue->pending_tx_info[pending_idx].req;
1495 1551
1496 /* Check the remap error code. */ 1552 /* Check the remap error code. */
1497 if (unlikely(xenvif_tx_check_gop(vif, skb, &gop_map, &gop_copy))) { 1553 if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
1554 /* If there was an error, xenvif_tx_check_gop is
1555 * expected to release all the frags which were mapped,
1556 * so kfree_skb shouldn't do it again
1557 */
1498 skb_shinfo(skb)->nr_frags = 0; 1558 skb_shinfo(skb)->nr_frags = 0;
1559 if (skb_has_frag_list(skb)) {
1560 struct sk_buff *nskb =
1561 skb_shinfo(skb)->frag_list;
1562 skb_shinfo(nskb)->nr_frags = 0;
1563 }
1499 kfree_skb(skb); 1564 kfree_skb(skb);
1500 continue; 1565 continue;
1501 } 1566 }
1502 1567
1503 data_len = skb->len; 1568 data_len = skb->len;
1504 callback_param(vif, pending_idx).ctx = NULL; 1569 callback_param(queue, pending_idx).ctx = NULL;
1505 if (data_len < txp->size) { 1570 if (data_len < txp->size) {
1506 /* Append the packet payload as a fragment. */ 1571 /* Append the packet payload as a fragment. */
1507 txp->offset += data_len; 1572 txp->offset += data_len;
1508 txp->size -= data_len; 1573 txp->size -= data_len;
1509 } else { 1574 } else {
1510 /* Schedule a response immediately. */ 1575 /* Schedule a response immediately. */
1511 xenvif_idx_release(vif, pending_idx, 1576 xenvif_idx_release(queue, pending_idx,
1512 XEN_NETIF_RSP_OKAY); 1577 XEN_NETIF_RSP_OKAY);
1513 } 1578 }
1514 1579
@@ -1517,12 +1582,12 @@ static int xenvif_tx_submit(struct xenvif *vif)
1517 else if (txp->flags & XEN_NETTXF_data_validated) 1582 else if (txp->flags & XEN_NETTXF_data_validated)
1518 skb->ip_summed = CHECKSUM_UNNECESSARY; 1583 skb->ip_summed = CHECKSUM_UNNECESSARY;
1519 1584
1520 xenvif_fill_frags(vif, skb); 1585 xenvif_fill_frags(queue, skb);
1521 1586
1522 if (unlikely(skb_has_frag_list(skb))) { 1587 if (unlikely(skb_has_frag_list(skb))) {
1523 if (xenvif_handle_frag_list(vif, skb)) { 1588 if (xenvif_handle_frag_list(queue, skb)) {
1524 if (net_ratelimit()) 1589 if (net_ratelimit())
1525 netdev_err(vif->dev, 1590 netdev_err(queue->vif->dev,
1526 "Not enough memory to consolidate frag_list!\n"); 1591 "Not enough memory to consolidate frag_list!\n");
1527 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 1592 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1528 kfree_skb(skb); 1593 kfree_skb(skb);
@@ -1535,12 +1600,12 @@ static int xenvif_tx_submit(struct xenvif *vif)
1535 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1600 __pskb_pull_tail(skb, target - skb_headlen(skb));
1536 } 1601 }
1537 1602
1538 skb->dev = vif->dev; 1603 skb->dev = queue->vif->dev;
1539 skb->protocol = eth_type_trans(skb, skb->dev); 1604 skb->protocol = eth_type_trans(skb, skb->dev);
1540 skb_reset_network_header(skb); 1605 skb_reset_network_header(skb);
1541 1606
1542 if (checksum_setup(vif, skb)) { 1607 if (checksum_setup(queue, skb)) {
1543 netdev_dbg(vif->dev, 1608 netdev_dbg(queue->vif->dev,
1544 "Can't setup checksum in net_tx_action\n"); 1609 "Can't setup checksum in net_tx_action\n");
1545 /* We have to set this flag to trigger the callback */ 1610 /* We have to set this flag to trigger the callback */
1546 if (skb_shinfo(skb)->destructor_arg) 1611 if (skb_shinfo(skb)->destructor_arg)
@@ -1565,8 +1630,8 @@ static int xenvif_tx_submit(struct xenvif *vif)
1565 DIV_ROUND_UP(skb->len - hdrlen, mss); 1630 DIV_ROUND_UP(skb->len - hdrlen, mss);
1566 } 1631 }
1567 1632
1568 vif->dev->stats.rx_bytes += skb->len; 1633 queue->stats.rx_bytes += skb->len;
1569 vif->dev->stats.rx_packets++; 1634 queue->stats.rx_packets++;
1570 1635
1571 work_done++; 1636 work_done++;
1572 1637
@@ -1577,7 +1642,7 @@ static int xenvif_tx_submit(struct xenvif *vif)
1577 */ 1642 */
1578 if (skb_shinfo(skb)->destructor_arg) { 1643 if (skb_shinfo(skb)->destructor_arg) {
1579 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 1644 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1580 vif->tx_zerocopy_sent++; 1645 queue->stats.tx_zerocopy_sent++;
1581 } 1646 }
1582 1647
1583 netif_receive_skb(skb); 1648 netif_receive_skb(skb);
@@ -1590,47 +1655,47 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
1590{ 1655{
1591 unsigned long flags; 1656 unsigned long flags;
1592 pending_ring_idx_t index; 1657 pending_ring_idx_t index;
1593 struct xenvif *vif = ubuf_to_vif(ubuf); 1658 struct xenvif_queue *queue = ubuf_to_queue(ubuf);
1594 1659
1595 /* This is the only place where we grab this lock, to protect callbacks 1660 /* This is the only place where we grab this lock, to protect callbacks
1596 * from each other. 1661 * from each other.
1597 */ 1662 */
1598 spin_lock_irqsave(&vif->callback_lock, flags); 1663 spin_lock_irqsave(&queue->callback_lock, flags);
1599 do { 1664 do {
1600 u16 pending_idx = ubuf->desc; 1665 u16 pending_idx = ubuf->desc;
1601 ubuf = (struct ubuf_info *) ubuf->ctx; 1666 ubuf = (struct ubuf_info *) ubuf->ctx;
1602 BUG_ON(vif->dealloc_prod - vif->dealloc_cons >= 1667 BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
1603 MAX_PENDING_REQS); 1668 MAX_PENDING_REQS);
1604 index = pending_index(vif->dealloc_prod); 1669 index = pending_index(queue->dealloc_prod);
1605 vif->dealloc_ring[index] = pending_idx; 1670 queue->dealloc_ring[index] = pending_idx;
1606 /* Sync with xenvif_tx_dealloc_action: 1671 /* Sync with xenvif_tx_dealloc_action:
1607 * insert idx then incr producer. 1672 * insert idx then incr producer.
1608 */ 1673 */
1609 smp_wmb(); 1674 smp_wmb();
1610 vif->dealloc_prod++; 1675 queue->dealloc_prod++;
1611 } while (ubuf); 1676 } while (ubuf);
1612 wake_up(&vif->dealloc_wq); 1677 wake_up(&queue->dealloc_wq);
1613 spin_unlock_irqrestore(&vif->callback_lock, flags); 1678 spin_unlock_irqrestore(&queue->callback_lock, flags);
1614 1679
1615 if (likely(zerocopy_success)) 1680 if (likely(zerocopy_success))
1616 vif->tx_zerocopy_success++; 1681 queue->stats.tx_zerocopy_success++;
1617 else 1682 else
1618 vif->tx_zerocopy_fail++; 1683 queue->stats.tx_zerocopy_fail++;
1619} 1684}
1620 1685
1621static inline void xenvif_tx_dealloc_action(struct xenvif *vif) 1686static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
1622{ 1687{
1623 struct gnttab_unmap_grant_ref *gop; 1688 struct gnttab_unmap_grant_ref *gop;
1624 pending_ring_idx_t dc, dp; 1689 pending_ring_idx_t dc, dp;
1625 u16 pending_idx, pending_idx_release[MAX_PENDING_REQS]; 1690 u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
1626 unsigned int i = 0; 1691 unsigned int i = 0;
1627 1692
1628 dc = vif->dealloc_cons; 1693 dc = queue->dealloc_cons;
1629 gop = vif->tx_unmap_ops; 1694 gop = queue->tx_unmap_ops;
1630 1695
1631 /* Free up any grants we have finished using */ 1696 /* Free up any grants we have finished using */
1632 do { 1697 do {
1633 dp = vif->dealloc_prod; 1698 dp = queue->dealloc_prod;
1634 1699
1635 /* Ensure we see all indices enqueued by all 1700 /* Ensure we see all indices enqueued by all
1636 * xenvif_zerocopy_callback(). 1701 * xenvif_zerocopy_callback().
@@ -1638,38 +1703,38 @@ static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
1638 smp_rmb(); 1703 smp_rmb();
1639 1704
1640 while (dc != dp) { 1705 while (dc != dp) {
1641 BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS); 1706 BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS);
1642 pending_idx = 1707 pending_idx =
1643 vif->dealloc_ring[pending_index(dc++)]; 1708 queue->dealloc_ring[pending_index(dc++)];
1644 1709
1645 pending_idx_release[gop-vif->tx_unmap_ops] = 1710 pending_idx_release[gop-queue->tx_unmap_ops] =
1646 pending_idx; 1711 pending_idx;
1647 vif->pages_to_unmap[gop-vif->tx_unmap_ops] = 1712 queue->pages_to_unmap[gop-queue->tx_unmap_ops] =
1648 vif->mmap_pages[pending_idx]; 1713 queue->mmap_pages[pending_idx];
1649 gnttab_set_unmap_op(gop, 1714 gnttab_set_unmap_op(gop,
1650 idx_to_kaddr(vif, pending_idx), 1715 idx_to_kaddr(queue, pending_idx),
1651 GNTMAP_host_map, 1716 GNTMAP_host_map,
1652 vif->grant_tx_handle[pending_idx]); 1717 queue->grant_tx_handle[pending_idx]);
1653 xenvif_grant_handle_reset(vif, pending_idx); 1718 xenvif_grant_handle_reset(queue, pending_idx);
1654 ++gop; 1719 ++gop;
1655 } 1720 }
1656 1721
1657 } while (dp != vif->dealloc_prod); 1722 } while (dp != queue->dealloc_prod);
1658 1723
1659 vif->dealloc_cons = dc; 1724 queue->dealloc_cons = dc;
1660 1725
1661 if (gop - vif->tx_unmap_ops > 0) { 1726 if (gop - queue->tx_unmap_ops > 0) {
1662 int ret; 1727 int ret;
1663 ret = gnttab_unmap_refs(vif->tx_unmap_ops, 1728 ret = gnttab_unmap_refs(queue->tx_unmap_ops,
1664 NULL, 1729 NULL,
1665 vif->pages_to_unmap, 1730 queue->pages_to_unmap,
1666 gop - vif->tx_unmap_ops); 1731 gop - queue->tx_unmap_ops);
1667 if (ret) { 1732 if (ret) {
1668 netdev_err(vif->dev, "Unmap fail: nr_ops %tx ret %d\n", 1733 netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tx ret %d\n",
1669 gop - vif->tx_unmap_ops, ret); 1734 gop - queue->tx_unmap_ops, ret);
1670 for (i = 0; i < gop - vif->tx_unmap_ops; ++i) { 1735 for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
1671 if (gop[i].status != GNTST_okay) 1736 if (gop[i].status != GNTST_okay)
1672 netdev_err(vif->dev, 1737 netdev_err(queue->vif->dev,
1673 " host_addr: %llx handle: %x status: %d\n", 1738 " host_addr: %llx handle: %x status: %d\n",
1674 gop[i].host_addr, 1739 gop[i].host_addr,
1675 gop[i].handle, 1740 gop[i].handle,
@@ -1679,91 +1744,91 @@ static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
1679 } 1744 }
1680 } 1745 }
1681 1746
1682 for (i = 0; i < gop - vif->tx_unmap_ops; ++i) 1747 for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
1683 xenvif_idx_release(vif, pending_idx_release[i], 1748 xenvif_idx_release(queue, pending_idx_release[i],
1684 XEN_NETIF_RSP_OKAY); 1749 XEN_NETIF_RSP_OKAY);
1685} 1750}
1686 1751
1687 1752
1688/* Called after netfront has transmitted */ 1753/* Called after netfront has transmitted */
1689int xenvif_tx_action(struct xenvif *vif, int budget) 1754int xenvif_tx_action(struct xenvif_queue *queue, int budget)
1690{ 1755{
1691 unsigned nr_mops, nr_cops = 0; 1756 unsigned nr_mops, nr_cops = 0;
1692 int work_done, ret; 1757 int work_done, ret;
1693 1758
1694 if (unlikely(!tx_work_todo(vif))) 1759 if (unlikely(!tx_work_todo(queue)))
1695 return 0; 1760 return 0;
1696 1761
1697 xenvif_tx_build_gops(vif, budget, &nr_cops, &nr_mops); 1762 xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
1698 1763
1699 if (nr_cops == 0) 1764 if (nr_cops == 0)
1700 return 0; 1765 return 0;
1701 1766
1702 gnttab_batch_copy(vif->tx_copy_ops, nr_cops); 1767 gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
1703 if (nr_mops != 0) { 1768 if (nr_mops != 0) {
1704 ret = gnttab_map_refs(vif->tx_map_ops, 1769 ret = gnttab_map_refs(queue->tx_map_ops,
1705 NULL, 1770 NULL,
1706 vif->pages_to_map, 1771 queue->pages_to_map,
1707 nr_mops); 1772 nr_mops);
1708 BUG_ON(ret); 1773 BUG_ON(ret);
1709 } 1774 }
1710 1775
1711 work_done = xenvif_tx_submit(vif); 1776 work_done = xenvif_tx_submit(queue);
1712 1777
1713 return work_done; 1778 return work_done;
1714} 1779}
1715 1780
1716static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, 1781static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
1717 u8 status) 1782 u8 status)
1718{ 1783{
1719 struct pending_tx_info *pending_tx_info; 1784 struct pending_tx_info *pending_tx_info;
1720 pending_ring_idx_t index; 1785 pending_ring_idx_t index;
1721 unsigned long flags; 1786 unsigned long flags;
1722 1787
1723 pending_tx_info = &vif->pending_tx_info[pending_idx]; 1788 pending_tx_info = &queue->pending_tx_info[pending_idx];
1724 spin_lock_irqsave(&vif->response_lock, flags); 1789 spin_lock_irqsave(&queue->response_lock, flags);
1725 make_tx_response(vif, &pending_tx_info->req, status); 1790 make_tx_response(queue, &pending_tx_info->req, status);
1726 index = pending_index(vif->pending_prod); 1791 index = pending_index(queue->pending_prod);
1727 vif->pending_ring[index] = pending_idx; 1792 queue->pending_ring[index] = pending_idx;
1728 /* TX shouldn't use the index before we give it back here */ 1793 /* TX shouldn't use the index before we give it back here */
1729 mb(); 1794 mb();
1730 vif->pending_prod++; 1795 queue->pending_prod++;
1731 spin_unlock_irqrestore(&vif->response_lock, flags); 1796 spin_unlock_irqrestore(&queue->response_lock, flags);
1732} 1797}
1733 1798
1734 1799
1735static void make_tx_response(struct xenvif *vif, 1800static void make_tx_response(struct xenvif_queue *queue,
1736 struct xen_netif_tx_request *txp, 1801 struct xen_netif_tx_request *txp,
1737 s8 st) 1802 s8 st)
1738{ 1803{
1739 RING_IDX i = vif->tx.rsp_prod_pvt; 1804 RING_IDX i = queue->tx.rsp_prod_pvt;
1740 struct xen_netif_tx_response *resp; 1805 struct xen_netif_tx_response *resp;
1741 int notify; 1806 int notify;
1742 1807
1743 resp = RING_GET_RESPONSE(&vif->tx, i); 1808 resp = RING_GET_RESPONSE(&queue->tx, i);
1744 resp->id = txp->id; 1809 resp->id = txp->id;
1745 resp->status = st; 1810 resp->status = st;
1746 1811
1747 if (txp->flags & XEN_NETTXF_extra_info) 1812 if (txp->flags & XEN_NETTXF_extra_info)
1748 RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL; 1813 RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1749 1814
1750 vif->tx.rsp_prod_pvt = ++i; 1815 queue->tx.rsp_prod_pvt = ++i;
1751 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); 1816 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
1752 if (notify) 1817 if (notify)
1753 notify_remote_via_irq(vif->tx_irq); 1818 notify_remote_via_irq(queue->tx_irq);
1754} 1819}
1755 1820
1756static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, 1821static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
1757 u16 id, 1822 u16 id,
1758 s8 st, 1823 s8 st,
1759 u16 offset, 1824 u16 offset,
1760 u16 size, 1825 u16 size,
1761 u16 flags) 1826 u16 flags)
1762{ 1827{
1763 RING_IDX i = vif->rx.rsp_prod_pvt; 1828 RING_IDX i = queue->rx.rsp_prod_pvt;
1764 struct xen_netif_rx_response *resp; 1829 struct xen_netif_rx_response *resp;
1765 1830
1766 resp = RING_GET_RESPONSE(&vif->rx, i); 1831 resp = RING_GET_RESPONSE(&queue->rx, i);
1767 resp->offset = offset; 1832 resp->offset = offset;
1768 resp->flags = flags; 1833 resp->flags = flags;
1769 resp->id = id; 1834 resp->id = id;
@@ -1771,26 +1836,26 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
1771 if (st < 0) 1836 if (st < 0)
1772 resp->status = (s16)st; 1837 resp->status = (s16)st;
1773 1838
1774 vif->rx.rsp_prod_pvt = ++i; 1839 queue->rx.rsp_prod_pvt = ++i;
1775 1840
1776 return resp; 1841 return resp;
1777} 1842}
1778 1843
1779void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx) 1844void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
1780{ 1845{
1781 int ret; 1846 int ret;
1782 struct gnttab_unmap_grant_ref tx_unmap_op; 1847 struct gnttab_unmap_grant_ref tx_unmap_op;
1783 1848
1784 gnttab_set_unmap_op(&tx_unmap_op, 1849 gnttab_set_unmap_op(&tx_unmap_op,
1785 idx_to_kaddr(vif, pending_idx), 1850 idx_to_kaddr(queue, pending_idx),
1786 GNTMAP_host_map, 1851 GNTMAP_host_map,
1787 vif->grant_tx_handle[pending_idx]); 1852 queue->grant_tx_handle[pending_idx]);
1788 xenvif_grant_handle_reset(vif, pending_idx); 1853 xenvif_grant_handle_reset(queue, pending_idx);
1789 1854
1790 ret = gnttab_unmap_refs(&tx_unmap_op, NULL, 1855 ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
1791 &vif->mmap_pages[pending_idx], 1); 1856 &queue->mmap_pages[pending_idx], 1);
1792 if (ret) { 1857 if (ret) {
1793 netdev_err(vif->dev, 1858 netdev_err(queue->vif->dev,
1794 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n", 1859 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
1795 ret, 1860 ret,
1796 pending_idx, 1861 pending_idx,
@@ -1799,42 +1864,38 @@ void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
1799 tx_unmap_op.status); 1864 tx_unmap_op.status);
1800 BUG(); 1865 BUG();
1801 } 1866 }
1802
1803 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
1804} 1867}
1805 1868
1806static inline int rx_work_todo(struct xenvif *vif) 1869static inline int rx_work_todo(struct xenvif_queue *queue)
1807{ 1870{
1808 return (!skb_queue_empty(&vif->rx_queue) && 1871 return (!skb_queue_empty(&queue->rx_queue) &&
1809 xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) || 1872 xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
1810 vif->rx_queue_purge;
1811} 1873}
1812 1874
1813static inline int tx_work_todo(struct xenvif *vif) 1875static inline int tx_work_todo(struct xenvif_queue *queue)
1814{ 1876{
1815 1877 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
1816 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)))
1817 return 1; 1878 return 1;
1818 1879
1819 return 0; 1880 return 0;
1820} 1881}
1821 1882
1822static inline bool tx_dealloc_work_todo(struct xenvif *vif) 1883static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
1823{ 1884{
1824 return vif->dealloc_cons != vif->dealloc_prod; 1885 return queue->dealloc_cons != queue->dealloc_prod;
1825} 1886}
1826 1887
1827void xenvif_unmap_frontend_rings(struct xenvif *vif) 1888void xenvif_unmap_frontend_rings(struct xenvif_queue *queue)
1828{ 1889{
1829 if (vif->tx.sring) 1890 if (queue->tx.sring)
1830 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1891 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
1831 vif->tx.sring); 1892 queue->tx.sring);
1832 if (vif->rx.sring) 1893 if (queue->rx.sring)
1833 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), 1894 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
1834 vif->rx.sring); 1895 queue->rx.sring);
1835} 1896}
1836 1897
1837int xenvif_map_frontend_rings(struct xenvif *vif, 1898int xenvif_map_frontend_rings(struct xenvif_queue *queue,
1838 grant_ref_t tx_ring_ref, 1899 grant_ref_t tx_ring_ref,
1839 grant_ref_t rx_ring_ref) 1900 grant_ref_t rx_ring_ref)
1840{ 1901{
@@ -1844,85 +1905,148 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
1844 1905
1845 int err = -ENOMEM; 1906 int err = -ENOMEM;
1846 1907
1847 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1908 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
1848 tx_ring_ref, &addr); 1909 tx_ring_ref, &addr);
1849 if (err) 1910 if (err)
1850 goto err; 1911 goto err;
1851 1912
1852 txs = (struct xen_netif_tx_sring *)addr; 1913 txs = (struct xen_netif_tx_sring *)addr;
1853 BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); 1914 BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE);
1854 1915
1855 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), 1916 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
1856 rx_ring_ref, &addr); 1917 rx_ring_ref, &addr);
1857 if (err) 1918 if (err)
1858 goto err; 1919 goto err;
1859 1920
1860 rxs = (struct xen_netif_rx_sring *)addr; 1921 rxs = (struct xen_netif_rx_sring *)addr;
1861 BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE); 1922 BACK_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
1862 1923
1863 return 0; 1924 return 0;
1864 1925
1865err: 1926err:
1866 xenvif_unmap_frontend_rings(vif); 1927 xenvif_unmap_frontend_rings(queue);
1867 return err; 1928 return err;
1868} 1929}
1869 1930
1870void xenvif_stop_queue(struct xenvif *vif) 1931static void xenvif_start_queue(struct xenvif_queue *queue)
1871{ 1932{
1872 if (!vif->can_queue) 1933 if (xenvif_schedulable(queue->vif))
1873 return; 1934 xenvif_wake_queue(queue);
1874
1875 netif_stop_queue(vif->dev);
1876} 1935}
1877 1936
1878static void xenvif_start_queue(struct xenvif *vif) 1937/* Only called from the queue's thread, it handles the situation when the guest
1938 * doesn't post enough requests on the receiving ring.
1939 * First xenvif_start_xmit disables QDisc and start a timer, and then either the
1940 * timer fires, or the guest send an interrupt after posting new request. If it
1941 * is the timer, the carrier is turned off here.
1942 * */
1943static void xenvif_rx_purge_event(struct xenvif_queue *queue)
1879{ 1944{
1880 if (xenvif_schedulable(vif)) 1945 /* Either the last unsuccesful skb or at least 1 slot should fit */
1881 netif_wake_queue(vif->dev); 1946 int needed = queue->rx_last_skb_slots ?
1947 queue->rx_last_skb_slots : 1;
1948
1949 /* It is assumed that if the guest post new slots after this, the RX
1950 * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
1951 * the thread again
1952 */
1953 set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
1954 if (!xenvif_rx_ring_slots_available(queue, needed)) {
1955 rtnl_lock();
1956 if (netif_carrier_ok(queue->vif->dev)) {
1957 /* Timer fired and there are still no slots. Turn off
1958 * everything except the interrupts
1959 */
1960 netif_carrier_off(queue->vif->dev);
1961 skb_queue_purge(&queue->rx_queue);
1962 queue->rx_last_skb_slots = 0;
1963 if (net_ratelimit())
1964 netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
1965 } else {
1966 /* Probably an another queue already turned the carrier
1967 * off, make sure nothing is stucked in the internal
1968 * queue of this queue
1969 */
1970 skb_queue_purge(&queue->rx_queue);
1971 queue->rx_last_skb_slots = 0;
1972 }
1973 rtnl_unlock();
1974 } else if (!netif_carrier_ok(queue->vif->dev)) {
1975 unsigned int num_queues = queue->vif->num_queues;
1976 unsigned int i;
1977 /* The carrier was down, but an interrupt kicked
1978 * the thread again after new requests were
1979 * posted
1980 */
1981 clear_bit(QUEUE_STATUS_RX_STALLED,
1982 &queue->status);
1983 rtnl_lock();
1984 netif_carrier_on(queue->vif->dev);
1985 netif_tx_wake_all_queues(queue->vif->dev);
1986 rtnl_unlock();
1987
1988 for (i = 0; i < num_queues; i++) {
1989 struct xenvif_queue *temp = &queue->vif->queues[i];
1990
1991 xenvif_napi_schedule_or_enable_events(temp);
1992 }
1993 if (net_ratelimit())
1994 netdev_err(queue->vif->dev, "Carrier on again\n");
1995 } else {
1996 /* Queuing were stopped, but the guest posted
1997 * new requests and sent an interrupt
1998 */
1999 clear_bit(QUEUE_STATUS_RX_STALLED,
2000 &queue->status);
2001 del_timer_sync(&queue->rx_stalled);
2002 xenvif_start_queue(queue);
2003 }
1882} 2004}
1883 2005
1884int xenvif_kthread_guest_rx(void *data) 2006int xenvif_kthread_guest_rx(void *data)
1885{ 2007{
1886 struct xenvif *vif = data; 2008 struct xenvif_queue *queue = data;
1887 struct sk_buff *skb; 2009 struct sk_buff *skb;
1888 2010
1889 while (!kthread_should_stop()) { 2011 while (!kthread_should_stop()) {
1890 wait_event_interruptible(vif->wq, 2012 wait_event_interruptible(queue->wq,
1891 rx_work_todo(vif) || 2013 rx_work_todo(queue) ||
1892 vif->disabled || 2014 queue->vif->disabled ||
2015 test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
1893 kthread_should_stop()); 2016 kthread_should_stop());
1894 2017
2018 if (kthread_should_stop())
2019 break;
2020
1895 /* This frontend is found to be rogue, disable it in 2021 /* This frontend is found to be rogue, disable it in
1896 * kthread context. Currently this is only set when 2022 * kthread context. Currently this is only set when
1897 * netback finds out frontend sends malformed packet, 2023 * netback finds out frontend sends malformed packet,
1898 * but we cannot disable the interface in softirq 2024 * but we cannot disable the interface in softirq
1899 * context so we defer it here. 2025 * context so we defer it here, if this thread is
2026 * associated with queue 0.
1900 */ 2027 */
1901 if (unlikely(vif->disabled && netif_carrier_ok(vif->dev))) 2028 if (unlikely(queue->vif->disabled && queue->id == 0))
1902 xenvif_carrier_off(vif); 2029 xenvif_carrier_off(queue->vif);
1903 2030 else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
1904 if (kthread_should_stop()) 2031 &queue->status))) {
1905 break; 2032 xenvif_rx_purge_event(queue);
1906 2033 } else if (!netif_carrier_ok(queue->vif->dev)) {
1907 if (vif->rx_queue_purge) { 2034 /* Another queue stalled and turned the carrier off, so
1908 skb_queue_purge(&vif->rx_queue); 2035 * purge the internal queue of queues which were not
1909 vif->rx_queue_purge = false; 2036 * blocked
2037 */
2038 skb_queue_purge(&queue->rx_queue);
2039 queue->rx_last_skb_slots = 0;
1910 } 2040 }
1911 2041
1912 if (!skb_queue_empty(&vif->rx_queue)) 2042 if (!skb_queue_empty(&queue->rx_queue))
1913 xenvif_rx_action(vif); 2043 xenvif_rx_action(queue);
1914
1915 if (skb_queue_empty(&vif->rx_queue) &&
1916 netif_queue_stopped(vif->dev)) {
1917 del_timer_sync(&vif->wake_queue);
1918 xenvif_start_queue(vif);
1919 }
1920 2044
1921 cond_resched(); 2045 cond_resched();
1922 } 2046 }
1923 2047
1924 /* Bin any remaining skbs */ 2048 /* Bin any remaining skbs */
1925 while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) 2049 while ((skb = skb_dequeue(&queue->rx_queue)) != NULL)
1926 dev_kfree_skb(skb); 2050 dev_kfree_skb(skb);
1927 2051
1928 return 0; 2052 return 0;
@@ -1930,22 +2054,22 @@ int xenvif_kthread_guest_rx(void *data)
1930 2054
1931int xenvif_dealloc_kthread(void *data) 2055int xenvif_dealloc_kthread(void *data)
1932{ 2056{
1933 struct xenvif *vif = data; 2057 struct xenvif_queue *queue = data;
1934 2058
1935 while (!kthread_should_stop()) { 2059 while (!kthread_should_stop()) {
1936 wait_event_interruptible(vif->dealloc_wq, 2060 wait_event_interruptible(queue->dealloc_wq,
1937 tx_dealloc_work_todo(vif) || 2061 tx_dealloc_work_todo(queue) ||
1938 kthread_should_stop()); 2062 kthread_should_stop());
1939 if (kthread_should_stop()) 2063 if (kthread_should_stop())
1940 break; 2064 break;
1941 2065
1942 xenvif_tx_dealloc_action(vif); 2066 xenvif_tx_dealloc_action(queue);
1943 cond_resched(); 2067 cond_resched();
1944 } 2068 }
1945 2069
1946 /* Unmap anything remaining*/ 2070 /* Unmap anything remaining*/
1947 if (tx_dealloc_work_todo(vif)) 2071 if (tx_dealloc_work_todo(queue))
1948 xenvif_tx_dealloc_action(vif); 2072 xenvif_tx_dealloc_action(queue);
1949 2073
1950 return 0; 2074 return 0;
1951} 2075}
@@ -1957,6 +2081,9 @@ static int __init netback_init(void)
1957 if (!xen_domain()) 2081 if (!xen_domain())
1958 return -ENODEV; 2082 return -ENODEV;
1959 2083
2084 /* Allow as many queues as there are CPUs, by default */
2085 xenvif_max_queues = num_online_cpus();
2086
1960 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { 2087 if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
1961 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", 2088 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1962 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); 2089 fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
@@ -1969,6 +2096,13 @@ static int __init netback_init(void)
1969 2096
1970 rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs); 2097 rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
1971 2098
2099#ifdef CONFIG_DEBUG_FS
2100 xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
2101 if (IS_ERR_OR_NULL(xen_netback_dbg_root))
2102 pr_warn("Init of debugfs returned %ld!\n",
2103 PTR_ERR(xen_netback_dbg_root));
2104#endif /* CONFIG_DEBUG_FS */
2105
1972 return 0; 2106 return 0;
1973 2107
1974failed_init: 2108failed_init:
@@ -1979,6 +2113,10 @@ module_init(netback_init);
1979 2113
1980static void __exit netback_fini(void) 2114static void __exit netback_fini(void)
1981{ 2115{
2116#ifdef CONFIG_DEBUG_FS
2117 if (!IS_ERR_OR_NULL(xen_netback_dbg_root))
2118 debugfs_remove_recursive(xen_netback_dbg_root);
2119#endif /* CONFIG_DEBUG_FS */
1982 xenvif_xenbus_fini(); 2120 xenvif_xenbus_fini();
1983} 2121}
1984module_exit(netback_fini); 2122module_exit(netback_fini);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 7a206cffb062..580517d857bf 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -19,6 +19,8 @@
19*/ 19*/
20 20
21#include "common.h" 21#include "common.h"
22#include <linux/vmalloc.h>
23#include <linux/rtnetlink.h>
22 24
23struct backend_info { 25struct backend_info {
24 struct xenbus_device *dev; 26 struct xenbus_device *dev;
@@ -34,13 +36,183 @@ struct backend_info {
34 u8 have_hotplug_status_watch:1; 36 u8 have_hotplug_status_watch:1;
35}; 37};
36 38
37static int connect_rings(struct backend_info *); 39static int connect_rings(struct backend_info *be, struct xenvif_queue *queue);
38static void connect(struct backend_info *); 40static void connect(struct backend_info *be);
41static int read_xenbus_vif_flags(struct backend_info *be);
39static void backend_create_xenvif(struct backend_info *be); 42static void backend_create_xenvif(struct backend_info *be);
40static void unregister_hotplug_status_watch(struct backend_info *be); 43static void unregister_hotplug_status_watch(struct backend_info *be);
41static void set_backend_state(struct backend_info *be, 44static void set_backend_state(struct backend_info *be,
42 enum xenbus_state state); 45 enum xenbus_state state);
43 46
47#ifdef CONFIG_DEBUG_FS
48struct dentry *xen_netback_dbg_root = NULL;
49
50static int xenvif_read_io_ring(struct seq_file *m, void *v)
51{
52 struct xenvif_queue *queue = m->private;
53 struct xen_netif_tx_back_ring *tx_ring = &queue->tx;
54 struct xen_netif_rx_back_ring *rx_ring = &queue->rx;
55
56 if (tx_ring->sring) {
57 struct xen_netif_tx_sring *sring = tx_ring->sring;
58
59 seq_printf(m, "Queue %d\nTX: nr_ents %u\n", queue->id,
60 tx_ring->nr_ents);
61 seq_printf(m, "req prod %u (%d) cons %u (%d) event %u (%d)\n",
62 sring->req_prod,
63 sring->req_prod - sring->rsp_prod,
64 tx_ring->req_cons,
65 tx_ring->req_cons - sring->rsp_prod,
66 sring->req_event,
67 sring->req_event - sring->rsp_prod);
68 seq_printf(m, "rsp prod %u (base) pvt %u (%d) event %u (%d)\n",
69 sring->rsp_prod,
70 tx_ring->rsp_prod_pvt,
71 tx_ring->rsp_prod_pvt - sring->rsp_prod,
72 sring->rsp_event,
73 sring->rsp_event - sring->rsp_prod);
74 seq_printf(m, "pending prod %u pending cons %u nr_pending_reqs %u\n",
75 queue->pending_prod,
76 queue->pending_cons,
77 nr_pending_reqs(queue));
78 seq_printf(m, "dealloc prod %u dealloc cons %u dealloc_queue %u\n\n",
79 queue->dealloc_prod,
80 queue->dealloc_cons,
81 queue->dealloc_prod - queue->dealloc_cons);
82 }
83
84 if (rx_ring->sring) {
85 struct xen_netif_rx_sring *sring = rx_ring->sring;
86
87 seq_printf(m, "RX: nr_ents %u\n", rx_ring->nr_ents);
88 seq_printf(m, "req prod %u (%d) cons %u (%d) event %u (%d)\n",
89 sring->req_prod,
90 sring->req_prod - sring->rsp_prod,
91 rx_ring->req_cons,
92 rx_ring->req_cons - sring->rsp_prod,
93 sring->req_event,
94 sring->req_event - sring->rsp_prod);
95 seq_printf(m, "rsp prod %u (base) pvt %u (%d) event %u (%d)\n\n",
96 sring->rsp_prod,
97 rx_ring->rsp_prod_pvt,
98 rx_ring->rsp_prod_pvt - sring->rsp_prod,
99 sring->rsp_event,
100 sring->rsp_event - sring->rsp_prod);
101 }
102
103 seq_printf(m, "NAPI state: %lx NAPI weight: %d TX queue len %u\n"
104 "Credit timer_pending: %d, credit: %lu, usec: %lu\n"
105 "remaining: %lu, expires: %lu, now: %lu\n",
106 queue->napi.state, queue->napi.weight,
107 skb_queue_len(&queue->tx_queue),
108 timer_pending(&queue->credit_timeout),
109 queue->credit_bytes,
110 queue->credit_usec,
111 queue->remaining_credit,
112 queue->credit_timeout.expires,
113 jiffies);
114
115 return 0;
116}
117
118#define XENVIF_KICK_STR "kick"
119
120static ssize_t
121xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
122 loff_t *ppos)
123{
124 struct xenvif_queue *queue =
125 ((struct seq_file *)filp->private_data)->private;
126 int len;
127 char write[sizeof(XENVIF_KICK_STR)];
128
129 /* don't allow partial writes and check the length */
130 if (*ppos != 0)
131 return 0;
132 if (count < sizeof(XENVIF_KICK_STR) - 1)
133 return -ENOSPC;
134
135 len = simple_write_to_buffer(write,
136 sizeof(write),
137 ppos,
138 buf,
139 count);
140 if (len < 0)
141 return len;
142
143 if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
144 xenvif_interrupt(0, (void *)queue);
145 else {
146 pr_warn("Unknown command to io_ring_q%d. Available: kick\n",
147 queue->id);
148 count = -EINVAL;
149 }
150 return count;
151}
152
153static int xenvif_dump_open(struct inode *inode, struct file *filp)
154{
155 int ret;
156 void *queue = NULL;
157
158 if (inode->i_private)
159 queue = inode->i_private;
160 ret = single_open(filp, xenvif_read_io_ring, queue);
161 filp->f_mode |= FMODE_PWRITE;
162 return ret;
163}
164
165static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
166 .owner = THIS_MODULE,
167 .open = xenvif_dump_open,
168 .read = seq_read,
169 .llseek = seq_lseek,
170 .release = single_release,
171 .write = xenvif_write_io_ring,
172};
173
174static void xenvif_debugfs_addif(struct xenvif_queue *queue)
175{
176 struct dentry *pfile;
177 struct xenvif *vif = queue->vif;
178 int i;
179
180 if (IS_ERR_OR_NULL(xen_netback_dbg_root))
181 return;
182
183 vif->xenvif_dbg_root = debugfs_create_dir(vif->dev->name,
184 xen_netback_dbg_root);
185 if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root)) {
186 for (i = 0; i < vif->num_queues; ++i) {
187 char filename[sizeof("io_ring_q") + 4];
188
189 snprintf(filename, sizeof(filename), "io_ring_q%d", i);
190 pfile = debugfs_create_file(filename,
191 S_IRUSR | S_IWUSR,
192 vif->xenvif_dbg_root,
193 &vif->queues[i],
194 &xenvif_dbg_io_ring_ops_fops);
195 if (IS_ERR_OR_NULL(pfile))
196 pr_warn("Creation of io_ring file returned %ld!\n",
197 PTR_ERR(pfile));
198 }
199 } else
200 netdev_warn(vif->dev,
201 "Creation of vif debugfs dir returned %ld!\n",
202 PTR_ERR(vif->xenvif_dbg_root));
203}
204
205static void xenvif_debugfs_delif(struct xenvif *vif)
206{
207 if (IS_ERR_OR_NULL(xen_netback_dbg_root))
208 return;
209
210 if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root))
211 debugfs_remove_recursive(vif->xenvif_dbg_root);
212 vif->xenvif_dbg_root = NULL;
213}
214#endif /* CONFIG_DEBUG_FS */
215
44static int netback_remove(struct xenbus_device *dev) 216static int netback_remove(struct xenbus_device *dev)
45{ 217{
46 struct backend_info *be = dev_get_drvdata(&dev->dev); 218 struct backend_info *be = dev_get_drvdata(&dev->dev);
@@ -157,6 +329,12 @@ static int netback_probe(struct xenbus_device *dev,
157 if (err) 329 if (err)
158 pr_debug("Error writing feature-split-event-channels\n"); 330 pr_debug("Error writing feature-split-event-channels\n");
159 331
332 /* Multi-queue support: This is an optional feature. */
333 err = xenbus_printf(XBT_NIL, dev->nodename,
334 "multi-queue-max-queues", "%u", xenvif_max_queues);
335 if (err)
336 pr_debug("Error writing multi-queue-max-queues\n");
337
160 err = xenbus_switch_state(dev, XenbusStateInitWait); 338 err = xenbus_switch_state(dev, XenbusStateInitWait);
161 if (err) 339 if (err)
162 goto fail; 340 goto fail;
@@ -237,8 +415,12 @@ static void backend_create_xenvif(struct backend_info *be)
237 415
238static void backend_disconnect(struct backend_info *be) 416static void backend_disconnect(struct backend_info *be)
239{ 417{
240 if (be->vif) 418 if (be->vif) {
419#ifdef CONFIG_DEBUG_FS
420 xenvif_debugfs_delif(be->vif);
421#endif /* CONFIG_DEBUG_FS */
241 xenvif_disconnect(be->vif); 422 xenvif_disconnect(be->vif);
423 }
242} 424}
243 425
244static void backend_connect(struct backend_info *be) 426static void backend_connect(struct backend_info *be)
@@ -485,10 +667,26 @@ static void connect(struct backend_info *be)
485{ 667{
486 int err; 668 int err;
487 struct xenbus_device *dev = be->dev; 669 struct xenbus_device *dev = be->dev;
670 unsigned long credit_bytes, credit_usec;
671 unsigned int queue_index;
672 unsigned int requested_num_queues;
673 struct xenvif_queue *queue;
488 674
489 err = connect_rings(be); 675 /* Check whether the frontend requested multiple queues
490 if (err) 676 * and read the number requested.
677 */
678 err = xenbus_scanf(XBT_NIL, dev->otherend,
679 "multi-queue-num-queues",
680 "%u", &requested_num_queues);
681 if (err < 0) {
682 requested_num_queues = 1; /* Fall back to single queue */
683 } else if (requested_num_queues > xenvif_max_queues) {
684 /* buggy or malicious guest */
685 xenbus_dev_fatal(dev, err,
686 "guest requested %u queues, exceeding the maximum of %u.",
687 requested_num_queues, xenvif_max_queues);
491 return; 688 return;
689 }
492 690
493 err = xen_net_read_mac(dev, be->vif->fe_dev_addr); 691 err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
494 if (err) { 692 if (err) {
@@ -496,9 +694,59 @@ static void connect(struct backend_info *be)
496 return; 694 return;
497 } 695 }
498 696
499 xen_net_read_rate(dev, &be->vif->credit_bytes, 697 xen_net_read_rate(dev, &credit_bytes, &credit_usec);
500 &be->vif->credit_usec); 698 read_xenbus_vif_flags(be);
501 be->vif->remaining_credit = be->vif->credit_bytes; 699
700 /* Use the number of queues requested by the frontend */
701 be->vif->queues = vzalloc(requested_num_queues *
702 sizeof(struct xenvif_queue));
703 be->vif->num_queues = requested_num_queues;
704
705 for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) {
706 queue = &be->vif->queues[queue_index];
707 queue->vif = be->vif;
708 queue->id = queue_index;
709 snprintf(queue->name, sizeof(queue->name), "%s-q%u",
710 be->vif->dev->name, queue->id);
711
712 err = xenvif_init_queue(queue);
713 if (err) {
714 /* xenvif_init_queue() cleans up after itself on
715 * failure, but we need to clean up any previously
716 * initialised queues. Set num_queues to i so that
717 * earlier queues can be destroyed using the regular
718 * disconnect logic.
719 */
720 be->vif->num_queues = queue_index;
721 goto err;
722 }
723
724 queue->remaining_credit = credit_bytes;
725
726 err = connect_rings(be, queue);
727 if (err) {
728 /* connect_rings() cleans up after itself on failure,
729 * but we need to clean up after xenvif_init_queue() here,
730 * and also clean up any previously initialised queues.
731 */
732 xenvif_deinit_queue(queue);
733 be->vif->num_queues = queue_index;
734 goto err;
735 }
736#ifdef CONFIG_DEBUG_FS
737 xenvif_debugfs_addif(queue);
738#endif /* CONFIG_DEBUG_FS */
739 }
740
741 /* Initialisation completed, tell core driver the number of
742 * active queues.
743 */
744 rtnl_lock();
745 netif_set_real_num_tx_queues(be->vif->dev, requested_num_queues);
746 netif_set_real_num_rx_queues(be->vif->dev, requested_num_queues);
747 rtnl_unlock();
748
749 xenvif_carrier_on(be->vif);
502 750
503 unregister_hotplug_status_watch(be); 751 unregister_hotplug_status_watch(be);
504 err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, 752 err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
@@ -507,45 +755,107 @@ static void connect(struct backend_info *be)
507 if (!err) 755 if (!err)
508 be->have_hotplug_status_watch = 1; 756 be->have_hotplug_status_watch = 1;
509 757
510 netif_wake_queue(be->vif->dev); 758 netif_tx_wake_all_queues(be->vif->dev);
759
760 return;
761
762err:
763 if (be->vif->num_queues > 0)
764 xenvif_disconnect(be->vif); /* Clean up existing queues */
765 vfree(be->vif->queues);
766 be->vif->queues = NULL;
767 be->vif->num_queues = 0;
768 return;
511} 769}
512 770
513 771
514static int connect_rings(struct backend_info *be) 772static int connect_rings(struct backend_info *be, struct xenvif_queue *queue)
515{ 773{
516 struct xenvif *vif = be->vif;
517 struct xenbus_device *dev = be->dev; 774 struct xenbus_device *dev = be->dev;
775 unsigned int num_queues = queue->vif->num_queues;
518 unsigned long tx_ring_ref, rx_ring_ref; 776 unsigned long tx_ring_ref, rx_ring_ref;
519 unsigned int tx_evtchn, rx_evtchn, rx_copy; 777 unsigned int tx_evtchn, rx_evtchn;
520 int err; 778 int err;
521 int val; 779 char *xspath;
780 size_t xspathsize;
781 const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
782
783 /* If the frontend requested 1 queue, or we have fallen back
784 * to single queue due to lack of frontend support for multi-
785 * queue, expect the remaining XenStore keys in the toplevel
786 * directory. Otherwise, expect them in a subdirectory called
787 * queue-N.
788 */
789 if (num_queues == 1) {
790 xspath = kzalloc(strlen(dev->otherend) + 1, GFP_KERNEL);
791 if (!xspath) {
792 xenbus_dev_fatal(dev, -ENOMEM,
793 "reading ring references");
794 return -ENOMEM;
795 }
796 strcpy(xspath, dev->otherend);
797 } else {
798 xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
799 xspath = kzalloc(xspathsize, GFP_KERNEL);
800 if (!xspath) {
801 xenbus_dev_fatal(dev, -ENOMEM,
802 "reading ring references");
803 return -ENOMEM;
804 }
805 snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend,
806 queue->id);
807 }
522 808
523 err = xenbus_gather(XBT_NIL, dev->otherend, 809 err = xenbus_gather(XBT_NIL, xspath,
524 "tx-ring-ref", "%lu", &tx_ring_ref, 810 "tx-ring-ref", "%lu", &tx_ring_ref,
525 "rx-ring-ref", "%lu", &rx_ring_ref, NULL); 811 "rx-ring-ref", "%lu", &rx_ring_ref, NULL);
526 if (err) { 812 if (err) {
527 xenbus_dev_fatal(dev, err, 813 xenbus_dev_fatal(dev, err,
528 "reading %s/ring-ref", 814 "reading %s/ring-ref",
529 dev->otherend); 815 xspath);
530 return err; 816 goto err;
531 } 817 }
532 818
533 /* Try split event channels first, then single event channel. */ 819 /* Try split event channels first, then single event channel. */
534 err = xenbus_gather(XBT_NIL, dev->otherend, 820 err = xenbus_gather(XBT_NIL, xspath,
535 "event-channel-tx", "%u", &tx_evtchn, 821 "event-channel-tx", "%u", &tx_evtchn,
536 "event-channel-rx", "%u", &rx_evtchn, NULL); 822 "event-channel-rx", "%u", &rx_evtchn, NULL);
537 if (err < 0) { 823 if (err < 0) {
538 err = xenbus_scanf(XBT_NIL, dev->otherend, 824 err = xenbus_scanf(XBT_NIL, xspath,
539 "event-channel", "%u", &tx_evtchn); 825 "event-channel", "%u", &tx_evtchn);
540 if (err < 0) { 826 if (err < 0) {
541 xenbus_dev_fatal(dev, err, 827 xenbus_dev_fatal(dev, err,
542 "reading %s/event-channel(-tx/rx)", 828 "reading %s/event-channel(-tx/rx)",
543 dev->otherend); 829 xspath);
544 return err; 830 goto err;
545 } 831 }
546 rx_evtchn = tx_evtchn; 832 rx_evtchn = tx_evtchn;
547 } 833 }
548 834
835 /* Map the shared frame, irq etc. */
836 err = xenvif_connect(queue, tx_ring_ref, rx_ring_ref,
837 tx_evtchn, rx_evtchn);
838 if (err) {
839 xenbus_dev_fatal(dev, err,
840 "mapping shared-frames %lu/%lu port tx %u rx %u",
841 tx_ring_ref, rx_ring_ref,
842 tx_evtchn, rx_evtchn);
843 goto err;
844 }
845
846 err = 0;
847err: /* Regular return falls through with err == 0 */
848 kfree(xspath);
849 return err;
850}
851
852static int read_xenbus_vif_flags(struct backend_info *be)
853{
854 struct xenvif *vif = be->vif;
855 struct xenbus_device *dev = be->dev;
856 unsigned int rx_copy;
857 int err, val;
858
549 err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u", 859 err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
550 &rx_copy); 860 &rx_copy);
551 if (err == -ENOENT) { 861 if (err == -ENOENT) {
@@ -621,16 +931,6 @@ static int connect_rings(struct backend_info *be)
621 val = 0; 931 val = 0;
622 vif->ipv6_csum = !!val; 932 vif->ipv6_csum = !!val;
623 933
624 /* Map the shared frame, irq etc. */
625 err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref,
626 tx_evtchn, rx_evtchn);
627 if (err) {
628 xenbus_dev_fatal(dev, err,
629 "mapping shared-frames %lu/%lu port tx %u rx %u",
630 tx_ring_ref, rx_ring_ref,
631 tx_evtchn, rx_evtchn);
632 return err;
633 }
634 return 0; 934 return 0;
635} 935}
636 936