aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Campbell <Ian.Campbell@citrix.com>2011-03-14 20:06:18 -0400
committerDavid S. Miller <davem@davemloft.net>2011-03-15 22:38:03 -0400
commitf942dc2552b8bfdee607be867b12a8971bb9cd85 (patch)
treeba7d264f94d9e6938ef4e36f93e179162e12cf20
parente0da2481fc00e031c04480b9dc88fae9eff39a19 (diff)
xen network backend driver
netback is the host side counterpart to the frontend driver in drivers/net/xen-netfront.c. The PV protocol is also implemented by frontend drivers in other OSes too, such as the BSDs and even Windows. The patch is based on the driver from the xen.git pvops kernel tree but has been put through the checkpatch.pl wringer plus several manual cleanup passes and review iterations. The driver has been moved from drivers/xen/netback to drivers/net/xen-netback. One major change from xen.git is that the guest transmit path (i.e. what looks like receive to netback) has been significantly reworked to remove the dependency on the out of tree PageForeign page flag (a core kernel patch which enables a per page destructor callback on the final put_page). This page flag was used in order to implement a grant map based transmit path (where guest pages are mapped directly into SKB frags). Instead this version of netback uses grant copy operations into regular memory belonging to the backend domain. Reinstating the grant map functionality is something which I would like to revisit in the future. Note that this driver depends on 2e820f58f7ad "xen/irq: implement bind_interdomain_evtchn_to_irqhandler for backend drivers" which is in linux next via the "xen-two" tree and is intended for the 2.6.39 merge window: git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git stable/backends this branch has only that single commit since 2.6.38-rc2 and is safe for cross merging into the net branch. Signed-off-by: Ian Campbell <ian.campbell@citrix.com> Reviewed-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/Kconfig38
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/xen-netback/Makefile3
-rw-r--r--drivers/net/xen-netback/common.h161
-rw-r--r--drivers/net/xen-netback/interface.c424
-rw-r--r--drivers/net/xen-netback/netback.c1745
-rw-r--r--drivers/net/xen-netback/xenbus.c490
-rw-r--r--drivers/net/xen-netfront.c20
-rw-r--r--include/xen/interface/io/netif.h80
9 files changed, 2908 insertions, 54 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 46e1b1a28b80..797d68196728 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2953,12 +2953,38 @@ config XEN_NETDEV_FRONTEND
2953 select XEN_XENBUS_FRONTEND 2953 select XEN_XENBUS_FRONTEND
2954 default y 2954 default y
2955 help 2955 help
2956 The network device frontend driver allows the kernel to 2956 This driver provides support for Xen paravirtual network
2957 access network devices exported exported by a virtual 2957 devices exported by a Xen network driver domain (often
2958 machine containing a physical network device driver. The 2958 domain 0).
2959 frontend driver is intended for unprivileged guest domains; 2959
2960 if you are compiling a kernel for a Xen guest, you almost 2960 The corresponding Linux backend driver is enabled by the
2961 certainly want to enable this. 2961 CONFIG_XEN_NETDEV_BACKEND option.
2962
2963 If you are compiling a kernel for use as Xen guest, you
2964 should say Y here. To compile this driver as a module, chose
2965 M here: the module will be called xen-netfront.
2966
2967config XEN_NETDEV_BACKEND
2968 tristate "Xen backend network device"
2969 depends on XEN_BACKEND
2970 help
2971 This driver allows the kernel to act as a Xen network driver
2972 domain which exports paravirtual network devices to other
2973 Xen domains. These devices can be accessed by any operating
2974 system that implements a compatible front end.
2975
2976 The corresponding Linux frontend driver is enabled by the
2977 CONFIG_XEN_NETDEV_FRONTEND configuration option.
2978
2979 The backend driver presents a standard network device
2980 endpoint for each paravirtual network device to the driver
2981 domain network stack. These can then be bridged or routed
2982 etc in order to provide full network connectivity.
2983
2984 If you are compiling a kernel to run in a Xen network driver
2985 domain (often this is domain 0) you should say Y here. To
2986 compile this driver as a module, chose M here: the module
2987 will be called xen-netback.
2962 2988
2963config ISERIES_VETH 2989config ISERIES_VETH
2964 tristate "iSeries Virtual Ethernet driver support" 2990 tristate "iSeries Virtual Ethernet driver support"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 7c2171179f97..01b604ad155e 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -172,6 +172,7 @@ obj-$(CONFIG_SLIP) += slip.o
172obj-$(CONFIG_SLHC) += slhc.o 172obj-$(CONFIG_SLHC) += slhc.o
173 173
174obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o 174obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
175obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
175 176
176obj-$(CONFIG_DUMMY) += dummy.o 177obj-$(CONFIG_DUMMY) += dummy.o
177obj-$(CONFIG_IFB) += ifb.o 178obj-$(CONFIG_IFB) += ifb.o
diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
new file mode 100644
index 000000000000..e346e8125ef5
--- /dev/null
+++ b/drivers/net/xen-netback/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
2
3xen-netback-y := netback.o xenbus.o interface.o
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
new file mode 100644
index 000000000000..5d7bbf2b2ee7
--- /dev/null
+++ b/drivers/net/xen-netback/common.h
@@ -0,0 +1,161 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License version 2
4 * as published by the Free Software Foundation; or, when distributed
5 * separately from the Linux kernel or incorporated into other
6 * software packages, subject to the following license:
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this source file (the "Software"), to deal in the Software without
10 * restriction, including without limitation the rights to use, copy, modify,
11 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 */
26
27#ifndef __XEN_NETBACK__COMMON_H__
28#define __XEN_NETBACK__COMMON_H__
29
30#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
31
32#include <linux/module.h>
33#include <linux/interrupt.h>
34#include <linux/slab.h>
35#include <linux/ip.h>
36#include <linux/in.h>
37#include <linux/io.h>
38#include <linux/netdevice.h>
39#include <linux/etherdevice.h>
40#include <linux/wait.h>
41#include <linux/sched.h>
42
43#include <xen/interface/io/netif.h>
44#include <xen/interface/grant_table.h>
45#include <xen/grant_table.h>
46#include <xen/xenbus.h>
47
48struct xen_netbk;
49
50struct xenvif {
51 /* Unique identifier for this interface. */
52 domid_t domid;
53 unsigned int handle;
54
55 /* Reference to netback processing backend. */
56 struct xen_netbk *netbk;
57
58 u8 fe_dev_addr[6];
59
60 /* Physical parameters of the comms window. */
61 grant_handle_t tx_shmem_handle;
62 grant_ref_t tx_shmem_ref;
63 grant_handle_t rx_shmem_handle;
64 grant_ref_t rx_shmem_ref;
65 unsigned int irq;
66
67 /* List of frontends to notify after a batch of frames sent. */
68 struct list_head notify_list;
69
70 /* The shared rings and indexes. */
71 struct xen_netif_tx_back_ring tx;
72 struct xen_netif_rx_back_ring rx;
73 struct vm_struct *tx_comms_area;
74 struct vm_struct *rx_comms_area;
75
76 /* Flags that must not be set in dev->features */
77 u32 features_disabled;
78
79 /* Frontend feature information. */
80 u8 can_sg:1;
81 u8 gso:1;
82 u8 gso_prefix:1;
83 u8 csum:1;
84
85 /* Internal feature information. */
86 u8 can_queue:1; /* can queue packets for receiver? */
87
88 /*
89 * Allow xenvif_start_xmit() to peek ahead in the rx request
90 * ring. This is a prediction of what rx_req_cons will be
91 * once all queued skbs are put on the ring.
92 */
93 RING_IDX rx_req_cons_peek;
94
95 /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
96 unsigned long credit_bytes;
97 unsigned long credit_usec;
98 unsigned long remaining_credit;
99 struct timer_list credit_timeout;
100
101 /* Statistics */
102 unsigned long rx_gso_checksum_fixup;
103
104 /* Miscellaneous private stuff. */
105 struct list_head schedule_list;
106 atomic_t refcnt;
107 struct net_device *dev;
108
109 wait_queue_head_t waiting_to_free;
110};
111
112#define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
113#define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
114
115struct xenvif *xenvif_alloc(struct device *parent,
116 domid_t domid,
117 unsigned int handle);
118
119int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
120 unsigned long rx_ring_ref, unsigned int evtchn);
121void xenvif_disconnect(struct xenvif *vif);
122
123void xenvif_get(struct xenvif *vif);
124void xenvif_put(struct xenvif *vif);
125
126int xenvif_xenbus_init(void);
127
128int xenvif_schedulable(struct xenvif *vif);
129
130int xen_netbk_rx_ring_full(struct xenvif *vif);
131
132int xen_netbk_must_stop_queue(struct xenvif *vif);
133
134/* (Un)Map communication rings. */
135void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
136int xen_netbk_map_frontend_rings(struct xenvif *vif,
137 grant_ref_t tx_ring_ref,
138 grant_ref_t rx_ring_ref);
139
140/* (De)Register a xenvif with the netback backend. */
141void xen_netbk_add_xenvif(struct xenvif *vif);
142void xen_netbk_remove_xenvif(struct xenvif *vif);
143
144/* (De)Schedule backend processing for a xenvif */
145void xen_netbk_schedule_xenvif(struct xenvif *vif);
146void xen_netbk_deschedule_xenvif(struct xenvif *vif);
147
148/* Check for SKBs from frontend and schedule backend processing */
149void xen_netbk_check_rx_xenvif(struct xenvif *vif);
150/* Receive an SKB from the frontend */
151void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb);
152
153/* Queue an SKB for transmission to the frontend */
154void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
155/* Notify xenvif that ring now has space to send an skb to the frontend */
156void xenvif_notify_tx_completion(struct xenvif *vif);
157
158/* Returns number of ring slots required to send an skb to the frontend */
159unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
160
161#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
new file mode 100644
index 000000000000..de569cc19da4
--- /dev/null
+++ b/drivers/net/xen-netback/interface.c
@@ -0,0 +1,424 @@
1/*
2 * Network-device interface management.
3 *
4 * Copyright (c) 2004-2005, Keir Fraser
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version 2
8 * as published by the Free Software Foundation; or, when distributed
9 * separately from the Linux kernel or incorporated into other
10 * software packages, subject to the following license:
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30
31#include "common.h"
32
33#include <linux/ethtool.h>
34#include <linux/rtnetlink.h>
35#include <linux/if_vlan.h>
36
37#include <xen/events.h>
38#include <asm/xen/hypercall.h>
39
40#define XENVIF_QUEUE_LENGTH 32
41
42void xenvif_get(struct xenvif *vif)
43{
44 atomic_inc(&vif->refcnt);
45}
46
47void xenvif_put(struct xenvif *vif)
48{
49 if (atomic_dec_and_test(&vif->refcnt))
50 wake_up(&vif->waiting_to_free);
51}
52
53int xenvif_schedulable(struct xenvif *vif)
54{
55 return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
56}
57
58static int xenvif_rx_schedulable(struct xenvif *vif)
59{
60 return xenvif_schedulable(vif) && !xen_netbk_rx_ring_full(vif);
61}
62
63static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
64{
65 struct xenvif *vif = dev_id;
66
67 if (vif->netbk == NULL)
68 return IRQ_NONE;
69
70 xen_netbk_schedule_xenvif(vif);
71
72 if (xenvif_rx_schedulable(vif))
73 netif_wake_queue(vif->dev);
74
75 return IRQ_HANDLED;
76}
77
78static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
79{
80 struct xenvif *vif = netdev_priv(dev);
81
82 BUG_ON(skb->dev != dev);
83
84 if (vif->netbk == NULL)
85 goto drop;
86
87 /* Drop the packet if the target domain has no receive buffers. */
88 if (!xenvif_rx_schedulable(vif))
89 goto drop;
90
91 /* Reserve ring slots for the worst-case number of fragments. */
92 vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
93 xenvif_get(vif);
94
95 if (vif->can_queue && xen_netbk_must_stop_queue(vif))
96 netif_stop_queue(dev);
97
98 xen_netbk_queue_tx_skb(vif, skb);
99
100 return NETDEV_TX_OK;
101
102 drop:
103 vif->dev->stats.tx_dropped++;
104 dev_kfree_skb(skb);
105 return NETDEV_TX_OK;
106}
107
108void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb)
109{
110 netif_rx_ni(skb);
111}
112
113void xenvif_notify_tx_completion(struct xenvif *vif)
114{
115 if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
116 netif_wake_queue(vif->dev);
117}
118
119static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
120{
121 struct xenvif *vif = netdev_priv(dev);
122 return &vif->dev->stats;
123}
124
125static void xenvif_up(struct xenvif *vif)
126{
127 xen_netbk_add_xenvif(vif);
128 enable_irq(vif->irq);
129 xen_netbk_check_rx_xenvif(vif);
130}
131
132static void xenvif_down(struct xenvif *vif)
133{
134 disable_irq(vif->irq);
135 xen_netbk_deschedule_xenvif(vif);
136 xen_netbk_remove_xenvif(vif);
137}
138
139static int xenvif_open(struct net_device *dev)
140{
141 struct xenvif *vif = netdev_priv(dev);
142 if (netif_carrier_ok(dev))
143 xenvif_up(vif);
144 netif_start_queue(dev);
145 return 0;
146}
147
148static int xenvif_close(struct net_device *dev)
149{
150 struct xenvif *vif = netdev_priv(dev);
151 if (netif_carrier_ok(dev))
152 xenvif_down(vif);
153 netif_stop_queue(dev);
154 return 0;
155}
156
157static int xenvif_change_mtu(struct net_device *dev, int mtu)
158{
159 struct xenvif *vif = netdev_priv(dev);
160 int max = vif->can_sg ? 65535 - VLAN_ETH_HLEN : ETH_DATA_LEN;
161
162 if (mtu > max)
163 return -EINVAL;
164 dev->mtu = mtu;
165 return 0;
166}
167
168static void xenvif_set_features(struct xenvif *vif)
169{
170 struct net_device *dev = vif->dev;
171 u32 features = dev->features;
172
173 if (vif->can_sg)
174 features |= NETIF_F_SG;
175 if (vif->gso || vif->gso_prefix)
176 features |= NETIF_F_TSO;
177 if (vif->csum)
178 features |= NETIF_F_IP_CSUM;
179
180 features &= ~(vif->features_disabled);
181
182 if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
183 dev->mtu = ETH_DATA_LEN;
184
185 dev->features = features;
186}
187
188static int xenvif_set_tx_csum(struct net_device *dev, u32 data)
189{
190 struct xenvif *vif = netdev_priv(dev);
191 if (data) {
192 if (!vif->csum)
193 return -EOPNOTSUPP;
194 vif->features_disabled &= ~NETIF_F_IP_CSUM;
195 } else {
196 vif->features_disabled |= NETIF_F_IP_CSUM;
197 }
198
199 xenvif_set_features(vif);
200 return 0;
201}
202
203static int xenvif_set_sg(struct net_device *dev, u32 data)
204{
205 struct xenvif *vif = netdev_priv(dev);
206 if (data) {
207 if (!vif->can_sg)
208 return -EOPNOTSUPP;
209 vif->features_disabled &= ~NETIF_F_SG;
210 } else {
211 vif->features_disabled |= NETIF_F_SG;
212 }
213
214 xenvif_set_features(vif);
215 return 0;
216}
217
218static int xenvif_set_tso(struct net_device *dev, u32 data)
219{
220 struct xenvif *vif = netdev_priv(dev);
221 if (data) {
222 if (!vif->gso && !vif->gso_prefix)
223 return -EOPNOTSUPP;
224 vif->features_disabled &= ~NETIF_F_TSO;
225 } else {
226 vif->features_disabled |= NETIF_F_TSO;
227 }
228
229 xenvif_set_features(vif);
230 return 0;
231}
232
233static const struct xenvif_stat {
234 char name[ETH_GSTRING_LEN];
235 u16 offset;
236} xenvif_stats[] = {
237 {
238 "rx_gso_checksum_fixup",
239 offsetof(struct xenvif, rx_gso_checksum_fixup)
240 },
241};
242
243static int xenvif_get_sset_count(struct net_device *dev, int string_set)
244{
245 switch (string_set) {
246 case ETH_SS_STATS:
247 return ARRAY_SIZE(xenvif_stats);
248 default:
249 return -EINVAL;
250 }
251}
252
253static void xenvif_get_ethtool_stats(struct net_device *dev,
254 struct ethtool_stats *stats, u64 * data)
255{
256 void *vif = netdev_priv(dev);
257 int i;
258
259 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
260 data[i] = *(unsigned long *)(vif + xenvif_stats[i].offset);
261}
262
263static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
264{
265 int i;
266
267 switch (stringset) {
268 case ETH_SS_STATS:
269 for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
270 memcpy(data + i * ETH_GSTRING_LEN,
271 xenvif_stats[i].name, ETH_GSTRING_LEN);
272 break;
273 }
274}
275
276static struct ethtool_ops xenvif_ethtool_ops = {
277 .get_tx_csum = ethtool_op_get_tx_csum,
278 .set_tx_csum = xenvif_set_tx_csum,
279 .get_sg = ethtool_op_get_sg,
280 .set_sg = xenvif_set_sg,
281 .get_tso = ethtool_op_get_tso,
282 .set_tso = xenvif_set_tso,
283 .get_link = ethtool_op_get_link,
284
285 .get_sset_count = xenvif_get_sset_count,
286 .get_ethtool_stats = xenvif_get_ethtool_stats,
287 .get_strings = xenvif_get_strings,
288};
289
290static struct net_device_ops xenvif_netdev_ops = {
291 .ndo_start_xmit = xenvif_start_xmit,
292 .ndo_get_stats = xenvif_get_stats,
293 .ndo_open = xenvif_open,
294 .ndo_stop = xenvif_close,
295 .ndo_change_mtu = xenvif_change_mtu,
296};
297
298struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
299 unsigned int handle)
300{
301 int err;
302 struct net_device *dev;
303 struct xenvif *vif;
304 char name[IFNAMSIZ] = {};
305
306 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
307 dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
308 if (dev == NULL) {
309 pr_warn("Could not allocate netdev\n");
310 return ERR_PTR(-ENOMEM);
311 }
312
313 SET_NETDEV_DEV(dev, parent);
314
315 vif = netdev_priv(dev);
316 vif->domid = domid;
317 vif->handle = handle;
318 vif->netbk = NULL;
319 vif->can_sg = 1;
320 vif->csum = 1;
321 atomic_set(&vif->refcnt, 1);
322 init_waitqueue_head(&vif->waiting_to_free);
323 vif->dev = dev;
324 INIT_LIST_HEAD(&vif->schedule_list);
325 INIT_LIST_HEAD(&vif->notify_list);
326
327 vif->credit_bytes = vif->remaining_credit = ~0UL;
328 vif->credit_usec = 0UL;
329 init_timer(&vif->credit_timeout);
330 /* Initialize 'expires' now: it's used to track the credit window. */
331 vif->credit_timeout.expires = jiffies;
332
333 dev->netdev_ops = &xenvif_netdev_ops;
334 xenvif_set_features(vif);
335 SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
336
337 dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
338
339 /*
340 * Initialise a dummy MAC address. We choose the numerically
341 * largest non-broadcast address to prevent the address getting
342 * stolen by an Ethernet bridge for STP purposes.
343 * (FE:FF:FF:FF:FF:FF)
344 */
345 memset(dev->dev_addr, 0xFF, ETH_ALEN);
346 dev->dev_addr[0] &= ~0x01;
347
348 netif_carrier_off(dev);
349
350 err = register_netdev(dev);
351 if (err) {
352 netdev_warn(dev, "Could not register device: err=%d\n", err);
353 free_netdev(dev);
354 return ERR_PTR(err);
355 }
356
357 netdev_dbg(dev, "Successfully created xenvif\n");
358 return vif;
359}
360
361int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
362 unsigned long rx_ring_ref, unsigned int evtchn)
363{
364 int err = -ENOMEM;
365
366 /* Already connected through? */
367 if (vif->irq)
368 return 0;
369
370 xenvif_set_features(vif);
371
372 err = xen_netbk_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
373 if (err < 0)
374 goto err;
375
376 err = bind_interdomain_evtchn_to_irqhandler(
377 vif->domid, evtchn, xenvif_interrupt, 0,
378 vif->dev->name, vif);
379 if (err < 0)
380 goto err_unmap;
381 vif->irq = err;
382 disable_irq(vif->irq);
383
384 xenvif_get(vif);
385
386 rtnl_lock();
387 netif_carrier_on(vif->dev);
388 if (netif_running(vif->dev))
389 xenvif_up(vif);
390 rtnl_unlock();
391
392 return 0;
393err_unmap:
394 xen_netbk_unmap_frontend_rings(vif);
395err:
396 return err;
397}
398
399void xenvif_disconnect(struct xenvif *vif)
400{
401 struct net_device *dev = vif->dev;
402 if (netif_carrier_ok(dev)) {
403 rtnl_lock();
404 netif_carrier_off(dev); /* discard queued packets */
405 if (netif_running(dev))
406 xenvif_down(vif);
407 rtnl_unlock();
408 xenvif_put(vif);
409 }
410
411 atomic_dec(&vif->refcnt);
412 wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);
413
414 del_timer_sync(&vif->credit_timeout);
415
416 if (vif->irq)
417 unbind_from_irqhandler(vif->irq, vif);
418
419 unregister_netdev(vif->dev);
420
421 xen_netbk_unmap_frontend_rings(vif);
422
423 free_netdev(vif->dev);
424}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
new file mode 100644
index 000000000000..0e4851b8a773
--- /dev/null
+++ b/drivers/net/xen-netback/netback.c
@@ -0,0 +1,1745 @@
1/*
2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
7 *
8 * Copyright (c) 2002-2005, K A Fraser
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
34
35#include "common.h"
36
37#include <linux/kthread.h>
38#include <linux/if_vlan.h>
39#include <linux/udp.h>
40
41#include <net/tcp.h>
42
43#include <xen/events.h>
44#include <xen/interface/memory.h>
45
46#include <asm/xen/hypercall.h>
47#include <asm/xen/page.h>
48
49struct pending_tx_info {
50 struct xen_netif_tx_request req;
51 struct xenvif *vif;
52};
53typedef unsigned int pending_ring_idx_t;
54
55struct netbk_rx_meta {
56 int id;
57 int size;
58 int gso_size;
59};
60
61#define MAX_PENDING_REQS 256
62
63#define MAX_BUFFER_OFFSET PAGE_SIZE
64
65/* extra field used in struct page */
66union page_ext {
67 struct {
68#if BITS_PER_LONG < 64
69#define IDX_WIDTH 8
70#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
71 unsigned int group:GROUP_WIDTH;
72 unsigned int idx:IDX_WIDTH;
73#else
74 unsigned int group, idx;
75#endif
76 } e;
77 void *mapping;
78};
79
80struct xen_netbk {
81 wait_queue_head_t wq;
82 struct task_struct *task;
83
84 struct sk_buff_head rx_queue;
85 struct sk_buff_head tx_queue;
86
87 struct timer_list net_timer;
88
89 struct page *mmap_pages[MAX_PENDING_REQS];
90
91 pending_ring_idx_t pending_prod;
92 pending_ring_idx_t pending_cons;
93 struct list_head net_schedule_list;
94
95 /* Protect the net_schedule_list in netif. */
96 spinlock_t net_schedule_list_lock;
97
98 atomic_t netfront_count;
99
100 struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
101 struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
102
103 u16 pending_ring[MAX_PENDING_REQS];
104
105 /*
106 * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
107 * head/fragment page uses 2 copy operations because it
108 * straddles two buffers in the frontend.
109 */
110 struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
111 struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
112};
113
114static struct xen_netbk *xen_netbk;
115static int xen_netbk_group_nr;
116
117void xen_netbk_add_xenvif(struct xenvif *vif)
118{
119 int i;
120 int min_netfront_count;
121 int min_group = 0;
122 struct xen_netbk *netbk;
123
124 min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
125 for (i = 0; i < xen_netbk_group_nr; i++) {
126 int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
127 if (netfront_count < min_netfront_count) {
128 min_group = i;
129 min_netfront_count = netfront_count;
130 }
131 }
132
133 netbk = &xen_netbk[min_group];
134
135 vif->netbk = netbk;
136 atomic_inc(&netbk->netfront_count);
137}
138
139void xen_netbk_remove_xenvif(struct xenvif *vif)
140{
141 struct xen_netbk *netbk = vif->netbk;
142 vif->netbk = NULL;
143 atomic_dec(&netbk->netfront_count);
144}
145
146static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
147static void make_tx_response(struct xenvif *vif,
148 struct xen_netif_tx_request *txp,
149 s8 st);
150static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
151 u16 id,
152 s8 st,
153 u16 offset,
154 u16 size,
155 u16 flags);
156
157static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
158 unsigned int idx)
159{
160 return page_to_pfn(netbk->mmap_pages[idx]);
161}
162
163static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
164 unsigned int idx)
165{
166 return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
167}
168
169/* extra field used in struct page */
170static inline void set_page_ext(struct page *pg, struct xen_netbk *netbk,
171 unsigned int idx)
172{
173 unsigned int group = netbk - xen_netbk;
174 union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
175
176 BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
177 pg->mapping = ext.mapping;
178}
179
180static int get_page_ext(struct page *pg,
181 unsigned int *pgroup, unsigned int *pidx)
182{
183 union page_ext ext = { .mapping = pg->mapping };
184 struct xen_netbk *netbk;
185 unsigned int group, idx;
186
187 group = ext.e.group - 1;
188
189 if (group < 0 || group >= xen_netbk_group_nr)
190 return 0;
191
192 netbk = &xen_netbk[group];
193
194 idx = ext.e.idx;
195
196 if ((idx < 0) || (idx >= MAX_PENDING_REQS))
197 return 0;
198
199 if (netbk->mmap_pages[idx] != pg)
200 return 0;
201
202 *pgroup = group;
203 *pidx = idx;
204
205 return 1;
206}
207
208/*
209 * This is the amount of packet we copy rather than map, so that the
210 * guest can't fiddle with the contents of the headers while we do
211 * packet processing on them (netfilter, routing, etc).
212 */
213#define PKT_PROT_LEN (ETH_HLEN + \
214 VLAN_HLEN + \
215 sizeof(struct iphdr) + MAX_IPOPTLEN + \
216 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
217
218static inline pending_ring_idx_t pending_index(unsigned i)
219{
220 return i & (MAX_PENDING_REQS-1);
221}
222
223static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
224{
225 return MAX_PENDING_REQS -
226 netbk->pending_prod + netbk->pending_cons;
227}
228
229static void xen_netbk_kick_thread(struct xen_netbk *netbk)
230{
231 wake_up(&netbk->wq);
232}
233
234static int max_required_rx_slots(struct xenvif *vif)
235{
236 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
237
238 if (vif->can_sg || vif->gso || vif->gso_prefix)
239 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
240
241 return max;
242}
243
244int xen_netbk_rx_ring_full(struct xenvif *vif)
245{
246 RING_IDX peek = vif->rx_req_cons_peek;
247 RING_IDX needed = max_required_rx_slots(vif);
248
249 return ((vif->rx.sring->req_prod - peek) < needed) ||
250 ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
251}
252
253int xen_netbk_must_stop_queue(struct xenvif *vif)
254{
255 if (!xen_netbk_rx_ring_full(vif))
256 return 0;
257
258 vif->rx.sring->req_event = vif->rx_req_cons_peek +
259 max_required_rx_slots(vif);
260 mb(); /* request notification /then/ check the queue */
261
262 return xen_netbk_rx_ring_full(vif);
263}
264
265/*
266 * Returns true if we should start a new receive buffer instead of
267 * adding 'size' bytes to a buffer which currently contains 'offset'
268 * bytes.
269 */
270static bool start_new_rx_buffer(int offset, unsigned long size, int head)
271{
272 /* simple case: we have completely filled the current buffer. */
273 if (offset == MAX_BUFFER_OFFSET)
274 return true;
275
276 /*
277 * complex case: start a fresh buffer if the current frag
278 * would overflow the current buffer but only if:
279 * (i) this frag would fit completely in the next buffer
280 * and (ii) there is already some data in the current buffer
281 * and (iii) this is not the head buffer.
282 *
283 * Where:
284 * - (i) stops us splitting a frag into two copies
285 * unless the frag is too large for a single buffer.
286 * - (ii) stops us from leaving a buffer pointlessly empty.
287 * - (iii) stops us leaving the first buffer
288 * empty. Strictly speaking this is already covered
289 * by (ii) but is explicitly checked because
290 * netfront relies on the first buffer being
291 * non-empty and can crash otherwise.
292 *
293 * This means we will effectively linearise small
294 * frags but do not needlessly split large buffers
295 * into multiple copies tend to give large frags their
296 * own buffers as before.
297 */
298 if ((offset + size > MAX_BUFFER_OFFSET) &&
299 (size <= MAX_BUFFER_OFFSET) && offset && !head)
300 return true;
301
302 return false;
303}
304
305/*
306 * Figure out how many ring slots we're going to need to send @skb to
307 * the guest. This function is essentially a dry run of
308 * netbk_gop_frag_copy.
309 */
310unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
311{
312 unsigned int count;
313 int i, copy_off;
314
315 count = DIV_ROUND_UP(
316 offset_in_page(skb->data)+skb_headlen(skb), PAGE_SIZE);
317
318 copy_off = skb_headlen(skb) % PAGE_SIZE;
319
320 if (skb_shinfo(skb)->gso_size)
321 count++;
322
323 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
324 unsigned long size = skb_shinfo(skb)->frags[i].size;
325 unsigned long bytes;
326 while (size > 0) {
327 BUG_ON(copy_off > MAX_BUFFER_OFFSET);
328
329 if (start_new_rx_buffer(copy_off, size, 0)) {
330 count++;
331 copy_off = 0;
332 }
333
334 bytes = size;
335 if (copy_off + bytes > MAX_BUFFER_OFFSET)
336 bytes = MAX_BUFFER_OFFSET - copy_off;
337
338 copy_off += bytes;
339 size -= bytes;
340 }
341 }
342 return count;
343}
344
345struct netrx_pending_operations {
346 unsigned copy_prod, copy_cons;
347 unsigned meta_prod, meta_cons;
348 struct gnttab_copy *copy;
349 struct netbk_rx_meta *meta;
350 int copy_off;
351 grant_ref_t copy_gref;
352};
353
354static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
355 struct netrx_pending_operations *npo)
356{
357 struct netbk_rx_meta *meta;
358 struct xen_netif_rx_request *req;
359
360 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
361
362 meta = npo->meta + npo->meta_prod++;
363 meta->gso_size = 0;
364 meta->size = 0;
365 meta->id = req->id;
366
367 npo->copy_off = 0;
368 npo->copy_gref = req->gref;
369
370 return meta;
371}
372
373/*
374 * Set up the grant operations for this fragment. If it's a flipping
375 * interface, we also set up the unmap request from here.
376 */
377static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
378 struct netrx_pending_operations *npo,
379 struct page *page, unsigned long size,
380 unsigned long offset, int *head)
381{
382 struct gnttab_copy *copy_gop;
383 struct netbk_rx_meta *meta;
384 /*
385 * These variables a used iff get_page_ext returns true,
386 * in which case they are guaranteed to be initialized.
387 */
388 unsigned int uninitialized_var(group), uninitialized_var(idx);
389 int foreign = get_page_ext(page, &group, &idx);
390 unsigned long bytes;
391
392 /* Data must not cross a page boundary. */
393 BUG_ON(size + offset > PAGE_SIZE);
394
395 meta = npo->meta + npo->meta_prod - 1;
396
397 while (size > 0) {
398 BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
399
400 if (start_new_rx_buffer(npo->copy_off, size, *head)) {
401 /*
402 * Netfront requires there to be some data in the head
403 * buffer.
404 */
405 BUG_ON(*head);
406
407 meta = get_next_rx_buffer(vif, npo);
408 }
409
410 bytes = size;
411 if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
412 bytes = MAX_BUFFER_OFFSET - npo->copy_off;
413
414 copy_gop = npo->copy + npo->copy_prod++;
415 copy_gop->flags = GNTCOPY_dest_gref;
416 if (foreign) {
417 struct xen_netbk *netbk = &xen_netbk[group];
418 struct pending_tx_info *src_pend;
419
420 src_pend = &netbk->pending_tx_info[idx];
421
422 copy_gop->source.domid = src_pend->vif->domid;
423 copy_gop->source.u.ref = src_pend->req.gref;
424 copy_gop->flags |= GNTCOPY_source_gref;
425 } else {
426 void *vaddr = page_address(page);
427 copy_gop->source.domid = DOMID_SELF;
428 copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
429 }
430 copy_gop->source.offset = offset;
431 copy_gop->dest.domid = vif->domid;
432
433 copy_gop->dest.offset = npo->copy_off;
434 copy_gop->dest.u.ref = npo->copy_gref;
435 copy_gop->len = bytes;
436
437 npo->copy_off += bytes;
438 meta->size += bytes;
439
440 offset += bytes;
441 size -= bytes;
442
443 /* Leave a gap for the GSO descriptor. */
444 if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
445 vif->rx.req_cons++;
446
447 *head = 0; /* There must be something in this buffer now. */
448
449 }
450}
451
452/*
453 * Prepare an SKB to be transmitted to the frontend.
454 *
455 * This function is responsible for allocating grant operations, meta
456 * structures, etc.
457 *
458 * It returns the number of meta structures consumed. The number of
459 * ring slots used is always equal to the number of meta slots used
460 * plus the number of GSO descriptors used. Currently, we use either
461 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
462 * frontend-side LRO).
463 */
464static int netbk_gop_skb(struct sk_buff *skb,
465 struct netrx_pending_operations *npo)
466{
467 struct xenvif *vif = netdev_priv(skb->dev);
468 int nr_frags = skb_shinfo(skb)->nr_frags;
469 int i;
470 struct xen_netif_rx_request *req;
471 struct netbk_rx_meta *meta;
472 unsigned char *data;
473 int head = 1;
474 int old_meta_prod;
475
476 old_meta_prod = npo->meta_prod;
477
478 /* Set up a GSO prefix descriptor, if necessary */
479 if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
480 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
481 meta = npo->meta + npo->meta_prod++;
482 meta->gso_size = skb_shinfo(skb)->gso_size;
483 meta->size = 0;
484 meta->id = req->id;
485 }
486
487 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
488 meta = npo->meta + npo->meta_prod++;
489
490 if (!vif->gso_prefix)
491 meta->gso_size = skb_shinfo(skb)->gso_size;
492 else
493 meta->gso_size = 0;
494
495 meta->size = 0;
496 meta->id = req->id;
497 npo->copy_off = 0;
498 npo->copy_gref = req->gref;
499
500 data = skb->data;
501 while (data < skb_tail_pointer(skb)) {
502 unsigned int offset = offset_in_page(data);
503 unsigned int len = PAGE_SIZE - offset;
504
505 if (data + len > skb_tail_pointer(skb))
506 len = skb_tail_pointer(skb) - data;
507
508 netbk_gop_frag_copy(vif, skb, npo,
509 virt_to_page(data), len, offset, &head);
510 data += len;
511 }
512
513 for (i = 0; i < nr_frags; i++) {
514 netbk_gop_frag_copy(vif, skb, npo,
515 skb_shinfo(skb)->frags[i].page,
516 skb_shinfo(skb)->frags[i].size,
517 skb_shinfo(skb)->frags[i].page_offset,
518 &head);
519 }
520
521 return npo->meta_prod - old_meta_prod;
522}
523
524/*
525 * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
526 * used to set up the operations on the top of
527 * netrx_pending_operations, which have since been done. Check that
528 * they didn't give any errors and advance over them.
529 */
530static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,
531 struct netrx_pending_operations *npo)
532{
533 struct gnttab_copy *copy_op;
534 int status = XEN_NETIF_RSP_OKAY;
535 int i;
536
537 for (i = 0; i < nr_meta_slots; i++) {
538 copy_op = npo->copy + npo->copy_cons++;
539 if (copy_op->status != GNTST_okay) {
540 netdev_dbg(vif->dev,
541 "Bad status %d from copy to DOM%d.\n",
542 copy_op->status, vif->domid);
543 status = XEN_NETIF_RSP_ERROR;
544 }
545 }
546
547 return status;
548}
549
550static void netbk_add_frag_responses(struct xenvif *vif, int status,
551 struct netbk_rx_meta *meta,
552 int nr_meta_slots)
553{
554 int i;
555 unsigned long offset;
556
557 /* No fragments used */
558 if (nr_meta_slots <= 1)
559 return;
560
561 nr_meta_slots--;
562
563 for (i = 0; i < nr_meta_slots; i++) {
564 int flags;
565 if (i == nr_meta_slots - 1)
566 flags = 0;
567 else
568 flags = XEN_NETRXF_more_data;
569
570 offset = 0;
571 make_rx_response(vif, meta[i].id, status, offset,
572 meta[i].size, flags);
573 }
574}
575
576struct skb_cb_overlay {
577 int meta_slots_used;
578};
579
580static void xen_netbk_rx_action(struct xen_netbk *netbk)
581{
582 struct xenvif *vif = NULL, *tmp;
583 s8 status;
584 u16 irq, flags;
585 struct xen_netif_rx_response *resp;
586 struct sk_buff_head rxq;
587 struct sk_buff *skb;
588 LIST_HEAD(notify);
589 int ret;
590 int nr_frags;
591 int count;
592 unsigned long offset;
593 struct skb_cb_overlay *sco;
594
595 struct netrx_pending_operations npo = {
596 .copy = netbk->grant_copy_op,
597 .meta = netbk->meta,
598 };
599
600 skb_queue_head_init(&rxq);
601
602 count = 0;
603
604 while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
605 vif = netdev_priv(skb->dev);
606 nr_frags = skb_shinfo(skb)->nr_frags;
607
608 sco = (struct skb_cb_overlay *)skb->cb;
609 sco->meta_slots_used = netbk_gop_skb(skb, &npo);
610
611 count += nr_frags + 1;
612
613 __skb_queue_tail(&rxq, skb);
614
615 /* Filled the batch queue? */
616 if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
617 break;
618 }
619
620 BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
621
622 if (!npo.copy_prod)
623 return;
624
625 BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
626 ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
627 npo.copy_prod);
628 BUG_ON(ret != 0);
629
630 while ((skb = __skb_dequeue(&rxq)) != NULL) {
631 sco = (struct skb_cb_overlay *)skb->cb;
632
633 vif = netdev_priv(skb->dev);
634
635 if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
636 resp = RING_GET_RESPONSE(&vif->rx,
637 vif->rx.rsp_prod_pvt++);
638
639 resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
640
641 resp->offset = netbk->meta[npo.meta_cons].gso_size;
642 resp->id = netbk->meta[npo.meta_cons].id;
643 resp->status = sco->meta_slots_used;
644
645 npo.meta_cons++;
646 sco->meta_slots_used--;
647 }
648
649
650 vif->dev->stats.tx_bytes += skb->len;
651 vif->dev->stats.tx_packets++;
652
653 status = netbk_check_gop(vif, sco->meta_slots_used, &npo);
654
655 if (sco->meta_slots_used == 1)
656 flags = 0;
657 else
658 flags = XEN_NETRXF_more_data;
659
660 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
661 flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
662 else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
663 /* remote but checksummed. */
664 flags |= XEN_NETRXF_data_validated;
665
666 offset = 0;
667 resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
668 status, offset,
669 netbk->meta[npo.meta_cons].size,
670 flags);
671
672 if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
673 struct xen_netif_extra_info *gso =
674 (struct xen_netif_extra_info *)
675 RING_GET_RESPONSE(&vif->rx,
676 vif->rx.rsp_prod_pvt++);
677
678 resp->flags |= XEN_NETRXF_extra_info;
679
680 gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
681 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
682 gso->u.gso.pad = 0;
683 gso->u.gso.features = 0;
684
685 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
686 gso->flags = 0;
687 }
688
689 netbk_add_frag_responses(vif, status,
690 netbk->meta + npo.meta_cons + 1,
691 sco->meta_slots_used);
692
693 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
694 irq = vif->irq;
695 if (ret && list_empty(&vif->notify_list))
696 list_add_tail(&vif->notify_list, &notify);
697
698 xenvif_notify_tx_completion(vif);
699
700 xenvif_put(vif);
701 npo.meta_cons += sco->meta_slots_used;
702 dev_kfree_skb(skb);
703 }
704
705 list_for_each_entry_safe(vif, tmp, &notify, notify_list) {
706 notify_remote_via_irq(vif->irq);
707 list_del_init(&vif->notify_list);
708 }
709
710 /* More work to do? */
711 if (!skb_queue_empty(&netbk->rx_queue) &&
712 !timer_pending(&netbk->net_timer))
713 xen_netbk_kick_thread(netbk);
714}
715
716void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
717{
718 struct xen_netbk *netbk = vif->netbk;
719
720 skb_queue_tail(&netbk->rx_queue, skb);
721
722 xen_netbk_kick_thread(netbk);
723}
724
725static void xen_netbk_alarm(unsigned long data)
726{
727 struct xen_netbk *netbk = (struct xen_netbk *)data;
728 xen_netbk_kick_thread(netbk);
729}
730
731static int __on_net_schedule_list(struct xenvif *vif)
732{
733 return !list_empty(&vif->schedule_list);
734}
735
736/* Must be called with net_schedule_list_lock held */
737static void remove_from_net_schedule_list(struct xenvif *vif)
738{
739 if (likely(__on_net_schedule_list(vif))) {
740 list_del_init(&vif->schedule_list);
741 xenvif_put(vif);
742 }
743}
744
745static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk)
746{
747 struct xenvif *vif = NULL;
748
749 spin_lock_irq(&netbk->net_schedule_list_lock);
750 if (list_empty(&netbk->net_schedule_list))
751 goto out;
752
753 vif = list_first_entry(&netbk->net_schedule_list,
754 struct xenvif, schedule_list);
755 if (!vif)
756 goto out;
757
758 xenvif_get(vif);
759
760 remove_from_net_schedule_list(vif);
761out:
762 spin_unlock_irq(&netbk->net_schedule_list_lock);
763 return vif;
764}
765
766void xen_netbk_schedule_xenvif(struct xenvif *vif)
767{
768 unsigned long flags;
769 struct xen_netbk *netbk = vif->netbk;
770
771 if (__on_net_schedule_list(vif))
772 goto kick;
773
774 spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
775 if (!__on_net_schedule_list(vif) &&
776 likely(xenvif_schedulable(vif))) {
777 list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);
778 xenvif_get(vif);
779 }
780 spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
781
782kick:
783 smp_mb();
784 if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
785 !list_empty(&netbk->net_schedule_list))
786 xen_netbk_kick_thread(netbk);
787}
788
789void xen_netbk_deschedule_xenvif(struct xenvif *vif)
790{
791 struct xen_netbk *netbk = vif->netbk;
792 spin_lock_irq(&netbk->net_schedule_list_lock);
793 remove_from_net_schedule_list(vif);
794 spin_unlock_irq(&netbk->net_schedule_list_lock);
795}
796
797void xen_netbk_check_rx_xenvif(struct xenvif *vif)
798{
799 int more_to_do;
800
801 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
802
803 if (more_to_do)
804 xen_netbk_schedule_xenvif(vif);
805}
806
807static void tx_add_credit(struct xenvif *vif)
808{
809 unsigned long max_burst, max_credit;
810
811 /*
812 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
813 * Otherwise the interface can seize up due to insufficient credit.
814 */
815 max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
816 max_burst = min(max_burst, 131072UL);
817 max_burst = max(max_burst, vif->credit_bytes);
818
819 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
820 max_credit = vif->remaining_credit + vif->credit_bytes;
821 if (max_credit < vif->remaining_credit)
822 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
823
824 vif->remaining_credit = min(max_credit, max_burst);
825}
826
827static void tx_credit_callback(unsigned long data)
828{
829 struct xenvif *vif = (struct xenvif *)data;
830 tx_add_credit(vif);
831 xen_netbk_check_rx_xenvif(vif);
832}
833
834static void netbk_tx_err(struct xenvif *vif,
835 struct xen_netif_tx_request *txp, RING_IDX end)
836{
837 RING_IDX cons = vif->tx.req_cons;
838
839 do {
840 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
841 if (cons >= end)
842 break;
843 txp = RING_GET_REQUEST(&vif->tx, cons++);
844 } while (1);
845 vif->tx.req_cons = cons;
846 xen_netbk_check_rx_xenvif(vif);
847 xenvif_put(vif);
848}
849
850static int netbk_count_requests(struct xenvif *vif,
851 struct xen_netif_tx_request *first,
852 struct xen_netif_tx_request *txp,
853 int work_to_do)
854{
855 RING_IDX cons = vif->tx.req_cons;
856 int frags = 0;
857
858 if (!(first->flags & XEN_NETTXF_more_data))
859 return 0;
860
861 do {
862 if (frags >= work_to_do) {
863 netdev_dbg(vif->dev, "Need more frags\n");
864 return -frags;
865 }
866
867 if (unlikely(frags >= MAX_SKB_FRAGS)) {
868 netdev_dbg(vif->dev, "Too many frags\n");
869 return -frags;
870 }
871
872 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
873 sizeof(*txp));
874 if (txp->size > first->size) {
875 netdev_dbg(vif->dev, "Frags galore\n");
876 return -frags;
877 }
878
879 first->size -= txp->size;
880 frags++;
881
882 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
883 netdev_dbg(vif->dev, "txp->offset: %x, size: %u\n",
884 txp->offset, txp->size);
885 return -frags;
886 }
887 } while ((txp++)->flags & XEN_NETTXF_more_data);
888 return frags;
889}
890
891static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
892 struct sk_buff *skb,
893 unsigned long pending_idx)
894{
895 struct page *page;
896 page = alloc_page(GFP_KERNEL|__GFP_COLD);
897 if (!page)
898 return NULL;
899 set_page_ext(page, netbk, pending_idx);
900 netbk->mmap_pages[pending_idx] = page;
901 return page;
902}
903
904static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
905 struct xenvif *vif,
906 struct sk_buff *skb,
907 struct xen_netif_tx_request *txp,
908 struct gnttab_copy *gop)
909{
910 struct skb_shared_info *shinfo = skb_shinfo(skb);
911 skb_frag_t *frags = shinfo->frags;
912 unsigned long pending_idx = *((u16 *)skb->data);
913 int i, start;
914
915 /* Skip first skb fragment if it is on same page as header fragment. */
916 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
917
918 for (i = start; i < shinfo->nr_frags; i++, txp++) {
919 struct page *page;
920 pending_ring_idx_t index;
921 struct pending_tx_info *pending_tx_info =
922 netbk->pending_tx_info;
923
924 index = pending_index(netbk->pending_cons++);
925 pending_idx = netbk->pending_ring[index];
926 page = xen_netbk_alloc_page(netbk, skb, pending_idx);
927 if (!page)
928 return NULL;
929
930 netbk->mmap_pages[pending_idx] = page;
931
932 gop->source.u.ref = txp->gref;
933 gop->source.domid = vif->domid;
934 gop->source.offset = txp->offset;
935
936 gop->dest.u.gmfn = virt_to_mfn(page_address(page));
937 gop->dest.domid = DOMID_SELF;
938 gop->dest.offset = txp->offset;
939
940 gop->len = txp->size;
941 gop->flags = GNTCOPY_source_gref;
942
943 gop++;
944
945 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
946 xenvif_get(vif);
947 pending_tx_info[pending_idx].vif = vif;
948 frags[i].page = (void *)pending_idx;
949 }
950
951 return gop;
952}
953
954static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
955 struct sk_buff *skb,
956 struct gnttab_copy **gopp)
957{
958 struct gnttab_copy *gop = *gopp;
959 int pending_idx = *((u16 *)skb->data);
960 struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
961 struct xenvif *vif = pending_tx_info[pending_idx].vif;
962 struct xen_netif_tx_request *txp;
963 struct skb_shared_info *shinfo = skb_shinfo(skb);
964 int nr_frags = shinfo->nr_frags;
965 int i, err, start;
966
967 /* Check status of header. */
968 err = gop->status;
969 if (unlikely(err)) {
970 pending_ring_idx_t index;
971 index = pending_index(netbk->pending_prod++);
972 txp = &pending_tx_info[pending_idx].req;
973 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
974 netbk->pending_ring[index] = pending_idx;
975 xenvif_put(vif);
976 }
977
978 /* Skip first skb fragment if it is on same page as header fragment. */
979 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
980
981 for (i = start; i < nr_frags; i++) {
982 int j, newerr;
983 pending_ring_idx_t index;
984
985 pending_idx = (unsigned long)shinfo->frags[i].page;
986
987 /* Check error status: if okay then remember grant handle. */
988 newerr = (++gop)->status;
989 if (likely(!newerr)) {
990 /* Had a previous error? Invalidate this fragment. */
991 if (unlikely(err))
992 xen_netbk_idx_release(netbk, pending_idx);
993 continue;
994 }
995
996 /* Error on this fragment: respond to client with an error. */
997 txp = &netbk->pending_tx_info[pending_idx].req;
998 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
999 index = pending_index(netbk->pending_prod++);
1000 netbk->pending_ring[index] = pending_idx;
1001 xenvif_put(vif);
1002
1003 /* Not the first error? Preceding frags already invalidated. */
1004 if (err)
1005 continue;
1006
1007 /* First error: invalidate header and preceding fragments. */
1008 pending_idx = *((u16 *)skb->data);
1009 xen_netbk_idx_release(netbk, pending_idx);
1010 for (j = start; j < i; j++) {
1011 pending_idx = (unsigned long)shinfo->frags[i].page;
1012 xen_netbk_idx_release(netbk, pending_idx);
1013 }
1014
1015 /* Remember the error: invalidate all subsequent fragments. */
1016 err = newerr;
1017 }
1018
1019 *gopp = gop + 1;
1020 return err;
1021}
1022
1023static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
1024{
1025 struct skb_shared_info *shinfo = skb_shinfo(skb);
1026 int nr_frags = shinfo->nr_frags;
1027 int i;
1028
1029 for (i = 0; i < nr_frags; i++) {
1030 skb_frag_t *frag = shinfo->frags + i;
1031 struct xen_netif_tx_request *txp;
1032 unsigned long pending_idx;
1033
1034 pending_idx = (unsigned long)frag->page;
1035
1036 txp = &netbk->pending_tx_info[pending_idx].req;
1037 frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
1038 frag->size = txp->size;
1039 frag->page_offset = txp->offset;
1040
1041 skb->len += txp->size;
1042 skb->data_len += txp->size;
1043 skb->truesize += txp->size;
1044
1045 /* Take an extra reference to offset xen_netbk_idx_release */
1046 get_page(netbk->mmap_pages[pending_idx]);
1047 xen_netbk_idx_release(netbk, pending_idx);
1048 }
1049}
1050
1051static int xen_netbk_get_extras(struct xenvif *vif,
1052 struct xen_netif_extra_info *extras,
1053 int work_to_do)
1054{
1055 struct xen_netif_extra_info extra;
1056 RING_IDX cons = vif->tx.req_cons;
1057
1058 do {
1059 if (unlikely(work_to_do-- <= 0)) {
1060 netdev_dbg(vif->dev, "Missing extra info\n");
1061 return -EBADR;
1062 }
1063
1064 memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
1065 sizeof(extra));
1066 if (unlikely(!extra.type ||
1067 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1068 vif->tx.req_cons = ++cons;
1069 netdev_dbg(vif->dev,
1070 "Invalid extra type: %d\n", extra.type);
1071 return -EINVAL;
1072 }
1073
1074 memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1075 vif->tx.req_cons = ++cons;
1076 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1077
1078 return work_to_do;
1079}
1080
1081static int netbk_set_skb_gso(struct xenvif *vif,
1082 struct sk_buff *skb,
1083 struct xen_netif_extra_info *gso)
1084{
1085 if (!gso->u.gso.size) {
1086 netdev_dbg(vif->dev, "GSO size must not be zero.\n");
1087 return -EINVAL;
1088 }
1089
1090 /* Currently only TCPv4 S.O. is supported. */
1091 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1092 netdev_dbg(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1093 return -EINVAL;
1094 }
1095
1096 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1097 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1098
1099 /* Header must be checked, and gso_segs computed. */
1100 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1101 skb_shinfo(skb)->gso_segs = 0;
1102
1103 return 0;
1104}
1105
1106static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1107{
1108 struct iphdr *iph;
1109 unsigned char *th;
1110 int err = -EPROTO;
1111 int recalculate_partial_csum = 0;
1112
1113 /*
1114 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1115 * peers can fail to set NETRXF_csum_blank when sending a GSO
1116 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1117 * recalculate the partial checksum.
1118 */
1119 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1120 vif->rx_gso_checksum_fixup++;
1121 skb->ip_summed = CHECKSUM_PARTIAL;
1122 recalculate_partial_csum = 1;
1123 }
1124
1125 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1126 if (skb->ip_summed != CHECKSUM_PARTIAL)
1127 return 0;
1128
1129 if (skb->protocol != htons(ETH_P_IP))
1130 goto out;
1131
1132 iph = (void *)skb->data;
1133 th = skb->data + 4 * iph->ihl;
1134 if (th >= skb_tail_pointer(skb))
1135 goto out;
1136
1137 skb->csum_start = th - skb->head;
1138 switch (iph->protocol) {
1139 case IPPROTO_TCP:
1140 skb->csum_offset = offsetof(struct tcphdr, check);
1141
1142 if (recalculate_partial_csum) {
1143 struct tcphdr *tcph = (struct tcphdr *)th;
1144 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1145 skb->len - iph->ihl*4,
1146 IPPROTO_TCP, 0);
1147 }
1148 break;
1149 case IPPROTO_UDP:
1150 skb->csum_offset = offsetof(struct udphdr, check);
1151
1152 if (recalculate_partial_csum) {
1153 struct udphdr *udph = (struct udphdr *)th;
1154 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1155 skb->len - iph->ihl*4,
1156 IPPROTO_UDP, 0);
1157 }
1158 break;
1159 default:
1160 if (net_ratelimit())
1161 netdev_err(vif->dev,
1162 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
1163 iph->protocol);
1164 goto out;
1165 }
1166
1167 if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
1168 goto out;
1169
1170 err = 0;
1171
1172out:
1173 return err;
1174}
1175
1176static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1177{
1178 unsigned long now = jiffies;
1179 unsigned long next_credit =
1180 vif->credit_timeout.expires +
1181 msecs_to_jiffies(vif->credit_usec / 1000);
1182
1183 /* Timer could already be pending in rare cases. */
1184 if (timer_pending(&vif->credit_timeout))
1185 return true;
1186
1187 /* Passed the point where we can replenish credit? */
1188 if (time_after_eq(now, next_credit)) {
1189 vif->credit_timeout.expires = now;
1190 tx_add_credit(vif);
1191 }
1192
1193 /* Still too big to send right now? Set a callback. */
1194 if (size > vif->remaining_credit) {
1195 vif->credit_timeout.data =
1196 (unsigned long)vif;
1197 vif->credit_timeout.function =
1198 tx_credit_callback;
1199 mod_timer(&vif->credit_timeout,
1200 next_credit);
1201
1202 return true;
1203 }
1204
1205 return false;
1206}
1207
1208static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1209{
1210 struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
1211 struct sk_buff *skb;
1212 int ret;
1213
1214 while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1215 !list_empty(&netbk->net_schedule_list)) {
1216 struct xenvif *vif;
1217 struct xen_netif_tx_request txreq;
1218 struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
1219 struct page *page;
1220 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
1221 u16 pending_idx;
1222 RING_IDX idx;
1223 int work_to_do;
1224 unsigned int data_len;
1225 pending_ring_idx_t index;
1226
1227 /* Get a netif from the list with work to do. */
1228 vif = poll_net_schedule_list(netbk);
1229 if (!vif)
1230 continue;
1231
1232 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
1233 if (!work_to_do) {
1234 xenvif_put(vif);
1235 continue;
1236 }
1237
1238 idx = vif->tx.req_cons;
1239 rmb(); /* Ensure that we see the request before we copy it. */
1240 memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
1241
1242 /* Credit-based scheduling. */
1243 if (txreq.size > vif->remaining_credit &&
1244 tx_credit_exceeded(vif, txreq.size)) {
1245 xenvif_put(vif);
1246 continue;
1247 }
1248
1249 vif->remaining_credit -= txreq.size;
1250
1251 work_to_do--;
1252 vif->tx.req_cons = ++idx;
1253
1254 memset(extras, 0, sizeof(extras));
1255 if (txreq.flags & XEN_NETTXF_extra_info) {
1256 work_to_do = xen_netbk_get_extras(vif, extras,
1257 work_to_do);
1258 idx = vif->tx.req_cons;
1259 if (unlikely(work_to_do < 0)) {
1260 netbk_tx_err(vif, &txreq, idx);
1261 continue;
1262 }
1263 }
1264
1265 ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
1266 if (unlikely(ret < 0)) {
1267 netbk_tx_err(vif, &txreq, idx - ret);
1268 continue;
1269 }
1270 idx += ret;
1271
1272 if (unlikely(txreq.size < ETH_HLEN)) {
1273 netdev_dbg(vif->dev,
1274 "Bad packet size: %d\n", txreq.size);
1275 netbk_tx_err(vif, &txreq, idx);
1276 continue;
1277 }
1278
1279 /* No crossing a page as the payload mustn't fragment. */
1280 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1281 netdev_dbg(vif->dev,
1282 "txreq.offset: %x, size: %u, end: %lu\n",
1283 txreq.offset, txreq.size,
1284 (txreq.offset&~PAGE_MASK) + txreq.size);
1285 netbk_tx_err(vif, &txreq, idx);
1286 continue;
1287 }
1288
1289 index = pending_index(netbk->pending_cons);
1290 pending_idx = netbk->pending_ring[index];
1291
1292 data_len = (txreq.size > PKT_PROT_LEN &&
1293 ret < MAX_SKB_FRAGS) ?
1294 PKT_PROT_LEN : txreq.size;
1295
1296 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
1297 GFP_ATOMIC | __GFP_NOWARN);
1298 if (unlikely(skb == NULL)) {
1299 netdev_dbg(vif->dev,
1300 "Can't allocate a skb in start_xmit.\n");
1301 netbk_tx_err(vif, &txreq, idx);
1302 break;
1303 }
1304
1305 /* Packets passed to netif_rx() must have some headroom. */
1306 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
1307
1308 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1309 struct xen_netif_extra_info *gso;
1310 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1311
1312 if (netbk_set_skb_gso(vif, skb, gso)) {
1313 kfree_skb(skb);
1314 netbk_tx_err(vif, &txreq, idx);
1315 continue;
1316 }
1317 }
1318
1319 /* XXX could copy straight to head */
1320 page = xen_netbk_alloc_page(netbk, skb, pending_idx);
1321 if (!page) {
1322 kfree_skb(skb);
1323 netbk_tx_err(vif, &txreq, idx);
1324 continue;
1325 }
1326
1327 netbk->mmap_pages[pending_idx] = page;
1328
1329 gop->source.u.ref = txreq.gref;
1330 gop->source.domid = vif->domid;
1331 gop->source.offset = txreq.offset;
1332
1333 gop->dest.u.gmfn = virt_to_mfn(page_address(page));
1334 gop->dest.domid = DOMID_SELF;
1335 gop->dest.offset = txreq.offset;
1336
1337 gop->len = txreq.size;
1338 gop->flags = GNTCOPY_source_gref;
1339
1340 gop++;
1341
1342 memcpy(&netbk->pending_tx_info[pending_idx].req,
1343 &txreq, sizeof(txreq));
1344 netbk->pending_tx_info[pending_idx].vif = vif;
1345 *((u16 *)skb->data) = pending_idx;
1346
1347 __skb_put(skb, data_len);
1348
1349 skb_shinfo(skb)->nr_frags = ret;
1350 if (data_len < txreq.size) {
1351 skb_shinfo(skb)->nr_frags++;
1352 skb_shinfo(skb)->frags[0].page =
1353 (void *)(unsigned long)pending_idx;
1354 } else {
1355 /* Discriminate from any valid pending_idx value. */
1356 skb_shinfo(skb)->frags[0].page = (void *)~0UL;
1357 }
1358
1359 __skb_queue_tail(&netbk->tx_queue, skb);
1360
1361 netbk->pending_cons++;
1362
1363 request_gop = xen_netbk_get_requests(netbk, vif,
1364 skb, txfrags, gop);
1365 if (request_gop == NULL) {
1366 kfree_skb(skb);
1367 netbk_tx_err(vif, &txreq, idx);
1368 continue;
1369 }
1370 gop = request_gop;
1371
1372 vif->tx.req_cons = idx;
1373 xen_netbk_check_rx_xenvif(vif);
1374
1375 if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
1376 break;
1377 }
1378
1379 return gop - netbk->tx_copy_ops;
1380}
1381
1382static void xen_netbk_tx_submit(struct xen_netbk *netbk)
1383{
1384 struct gnttab_copy *gop = netbk->tx_copy_ops;
1385 struct sk_buff *skb;
1386
1387 while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
1388 struct xen_netif_tx_request *txp;
1389 struct xenvif *vif;
1390 u16 pending_idx;
1391 unsigned data_len;
1392
1393 pending_idx = *((u16 *)skb->data);
1394 vif = netbk->pending_tx_info[pending_idx].vif;
1395 txp = &netbk->pending_tx_info[pending_idx].req;
1396
1397 /* Check the remap error code. */
1398 if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
1399 netdev_dbg(vif->dev, "netback grant failed.\n");
1400 skb_shinfo(skb)->nr_frags = 0;
1401 kfree_skb(skb);
1402 continue;
1403 }
1404
1405 data_len = skb->len;
1406 memcpy(skb->data,
1407 (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
1408 data_len);
1409 if (data_len < txp->size) {
1410 /* Append the packet payload as a fragment. */
1411 txp->offset += data_len;
1412 txp->size -= data_len;
1413 } else {
1414 /* Schedule a response immediately. */
1415 xen_netbk_idx_release(netbk, pending_idx);
1416 }
1417
1418 if (txp->flags & XEN_NETTXF_csum_blank)
1419 skb->ip_summed = CHECKSUM_PARTIAL;
1420 else if (txp->flags & XEN_NETTXF_data_validated)
1421 skb->ip_summed = CHECKSUM_UNNECESSARY;
1422
1423 xen_netbk_fill_frags(netbk, skb);
1424
1425 /*
1426 * If the initial fragment was < PKT_PROT_LEN then
1427 * pull through some bytes from the other fragments to
1428 * increase the linear region to PKT_PROT_LEN bytes.
1429 */
1430 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1431 int target = min_t(int, skb->len, PKT_PROT_LEN);
1432 __pskb_pull_tail(skb, target - skb_headlen(skb));
1433 }
1434
1435 skb->dev = vif->dev;
1436 skb->protocol = eth_type_trans(skb, skb->dev);
1437
1438 if (checksum_setup(vif, skb)) {
1439 netdev_dbg(vif->dev,
1440 "Can't setup checksum in net_tx_action\n");
1441 kfree_skb(skb);
1442 continue;
1443 }
1444
1445 vif->dev->stats.rx_bytes += skb->len;
1446 vif->dev->stats.rx_packets++;
1447
1448 xenvif_receive_skb(vif, skb);
1449 }
1450}
1451
1452/* Called after netfront has transmitted */
1453static void xen_netbk_tx_action(struct xen_netbk *netbk)
1454{
1455 unsigned nr_gops;
1456 int ret;
1457
1458 nr_gops = xen_netbk_tx_build_gops(netbk);
1459
1460 if (nr_gops == 0)
1461 return;
1462 ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
1463 netbk->tx_copy_ops, nr_gops);
1464 BUG_ON(ret);
1465
1466 xen_netbk_tx_submit(netbk);
1467
1468}
1469
1470static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
1471{
1472 struct xenvif *vif;
1473 struct pending_tx_info *pending_tx_info;
1474 pending_ring_idx_t index;
1475
1476 /* Already complete? */
1477 if (netbk->mmap_pages[pending_idx] == NULL)
1478 return;
1479
1480 pending_tx_info = &netbk->pending_tx_info[pending_idx];
1481
1482 vif = pending_tx_info->vif;
1483
1484 make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY);
1485
1486 index = pending_index(netbk->pending_prod++);
1487 netbk->pending_ring[index] = pending_idx;
1488
1489 xenvif_put(vif);
1490
1491 netbk->mmap_pages[pending_idx]->mapping = 0;
1492 put_page(netbk->mmap_pages[pending_idx]);
1493 netbk->mmap_pages[pending_idx] = NULL;
1494}
1495
1496static void make_tx_response(struct xenvif *vif,
1497 struct xen_netif_tx_request *txp,
1498 s8 st)
1499{
1500 RING_IDX i = vif->tx.rsp_prod_pvt;
1501 struct xen_netif_tx_response *resp;
1502 int notify;
1503
1504 resp = RING_GET_RESPONSE(&vif->tx, i);
1505 resp->id = txp->id;
1506 resp->status = st;
1507
1508 if (txp->flags & XEN_NETTXF_extra_info)
1509 RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1510
1511 vif->tx.rsp_prod_pvt = ++i;
1512 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
1513 if (notify)
1514 notify_remote_via_irq(vif->irq);
1515}
1516
1517static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
1518 u16 id,
1519 s8 st,
1520 u16 offset,
1521 u16 size,
1522 u16 flags)
1523{
1524 RING_IDX i = vif->rx.rsp_prod_pvt;
1525 struct xen_netif_rx_response *resp;
1526
1527 resp = RING_GET_RESPONSE(&vif->rx, i);
1528 resp->offset = offset;
1529 resp->flags = flags;
1530 resp->id = id;
1531 resp->status = (s16)size;
1532 if (st < 0)
1533 resp->status = (s16)st;
1534
1535 vif->rx.rsp_prod_pvt = ++i;
1536
1537 return resp;
1538}
1539
1540static inline int rx_work_todo(struct xen_netbk *netbk)
1541{
1542 return !skb_queue_empty(&netbk->rx_queue);
1543}
1544
1545static inline int tx_work_todo(struct xen_netbk *netbk)
1546{
1547
1548 if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1549 !list_empty(&netbk->net_schedule_list))
1550 return 1;
1551
1552 return 0;
1553}
1554
1555static int xen_netbk_kthread(void *data)
1556{
1557 struct xen_netbk *netbk = data;
1558 while (!kthread_should_stop()) {
1559 wait_event_interruptible(netbk->wq,
1560 rx_work_todo(netbk) ||
1561 tx_work_todo(netbk) ||
1562 kthread_should_stop());
1563 cond_resched();
1564
1565 if (kthread_should_stop())
1566 break;
1567
1568 if (rx_work_todo(netbk))
1569 xen_netbk_rx_action(netbk);
1570
1571 if (tx_work_todo(netbk))
1572 xen_netbk_tx_action(netbk);
1573 }
1574
1575 return 0;
1576}
1577
1578void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
1579{
1580 struct gnttab_unmap_grant_ref op;
1581
1582 if (vif->tx.sring) {
1583 gnttab_set_unmap_op(&op, (unsigned long)vif->tx_comms_area->addr,
1584 GNTMAP_host_map, vif->tx_shmem_handle);
1585
1586 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
1587 BUG();
1588 }
1589
1590 if (vif->rx.sring) {
1591 gnttab_set_unmap_op(&op, (unsigned long)vif->rx_comms_area->addr,
1592 GNTMAP_host_map, vif->rx_shmem_handle);
1593
1594 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
1595 BUG();
1596 }
1597 if (vif->rx_comms_area)
1598 free_vm_area(vif->rx_comms_area);
1599 if (vif->tx_comms_area)
1600 free_vm_area(vif->tx_comms_area);
1601}
1602
1603int xen_netbk_map_frontend_rings(struct xenvif *vif,
1604 grant_ref_t tx_ring_ref,
1605 grant_ref_t rx_ring_ref)
1606{
1607 struct gnttab_map_grant_ref op;
1608 struct xen_netif_tx_sring *txs;
1609 struct xen_netif_rx_sring *rxs;
1610
1611 int err = -ENOMEM;
1612
1613 vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
1614 if (vif->tx_comms_area == NULL)
1615 goto err;
1616
1617 vif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
1618 if (vif->rx_comms_area == NULL)
1619 goto err;
1620
1621 gnttab_set_map_op(&op, (unsigned long)vif->tx_comms_area->addr,
1622 GNTMAP_host_map, tx_ring_ref, vif->domid);
1623
1624 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
1625 BUG();
1626
1627 if (op.status) {
1628 netdev_warn(vif->dev,
1629 "failed to map tx ring. err=%d status=%d\n",
1630 err, op.status);
1631 err = op.status;
1632 goto err;
1633 }
1634
1635 vif->tx_shmem_ref = tx_ring_ref;
1636 vif->tx_shmem_handle = op.handle;
1637
1638 txs = (struct xen_netif_tx_sring *)vif->tx_comms_area->addr;
1639 BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
1640
1641 gnttab_set_map_op(&op, (unsigned long)vif->rx_comms_area->addr,
1642 GNTMAP_host_map, rx_ring_ref, vif->domid);
1643
1644 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
1645 BUG();
1646
1647 if (op.status) {
1648 netdev_warn(vif->dev,
1649 "failed to map rx ring. err=%d status=%d\n",
1650 err, op.status);
1651 err = op.status;
1652 goto err;
1653 }
1654
1655 vif->rx_shmem_ref = rx_ring_ref;
1656 vif->rx_shmem_handle = op.handle;
1657 vif->rx_req_cons_peek = 0;
1658
1659 rxs = (struct xen_netif_rx_sring *)vif->rx_comms_area->addr;
1660 BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
1661
1662 return 0;
1663
1664err:
1665 xen_netbk_unmap_frontend_rings(vif);
1666 return err;
1667}
1668
1669static int __init netback_init(void)
1670{
1671 int i;
1672 int rc = 0;
1673 int group;
1674
1675 if (!xen_pv_domain())
1676 return -ENODEV;
1677
1678 xen_netbk_group_nr = num_online_cpus();
1679 xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
1680 if (!xen_netbk) {
1681 printk(KERN_ALERT "%s: out of memory\n", __func__);
1682 return -ENOMEM;
1683 }
1684
1685 for (group = 0; group < xen_netbk_group_nr; group++) {
1686 struct xen_netbk *netbk = &xen_netbk[group];
1687 skb_queue_head_init(&netbk->rx_queue);
1688 skb_queue_head_init(&netbk->tx_queue);
1689
1690 init_timer(&netbk->net_timer);
1691 netbk->net_timer.data = (unsigned long)netbk;
1692 netbk->net_timer.function = xen_netbk_alarm;
1693
1694 netbk->pending_cons = 0;
1695 netbk->pending_prod = MAX_PENDING_REQS;
1696 for (i = 0; i < MAX_PENDING_REQS; i++)
1697 netbk->pending_ring[i] = i;
1698
1699 init_waitqueue_head(&netbk->wq);
1700 netbk->task = kthread_create(xen_netbk_kthread,
1701 (void *)netbk,
1702 "netback/%u", group);
1703
1704 if (IS_ERR(netbk->task)) {
1705 printk(KERN_ALERT "kthread_run() fails at netback\n");
1706 del_timer(&netbk->net_timer);
1707 rc = PTR_ERR(netbk->task);
1708 goto failed_init;
1709 }
1710
1711 kthread_bind(netbk->task, group);
1712
1713 INIT_LIST_HEAD(&netbk->net_schedule_list);
1714
1715 spin_lock_init(&netbk->net_schedule_list_lock);
1716
1717 atomic_set(&netbk->netfront_count, 0);
1718
1719 wake_up_process(netbk->task);
1720 }
1721
1722 rc = xenvif_xenbus_init();
1723 if (rc)
1724 goto failed_init;
1725
1726 return 0;
1727
1728failed_init:
1729 while (--group >= 0) {
1730 struct xen_netbk *netbk = &xen_netbk[group];
1731 for (i = 0; i < MAX_PENDING_REQS; i++) {
1732 if (netbk->mmap_pages[i])
1733 __free_page(netbk->mmap_pages[i]);
1734 }
1735 del_timer(&netbk->net_timer);
1736 kthread_stop(netbk->task);
1737 }
1738 vfree(xen_netbk);
1739 return rc;
1740
1741}
1742
1743module_init(netback_init);
1744
1745MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
new file mode 100644
index 000000000000..22b8c3505991
--- /dev/null
+++ b/drivers/net/xen-netback/xenbus.c
@@ -0,0 +1,490 @@
1/*
2 * Xenbus code for netif backend
3 *
4 * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
5 * Copyright (C) 2005 XenSource Ltd
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20*/
21
22#include "common.h"
23
24struct backend_info {
25 struct xenbus_device *dev;
26 struct xenvif *vif;
27 enum xenbus_state frontend_state;
28 struct xenbus_watch hotplug_status_watch;
29 int have_hotplug_status_watch:1;
30};
31
32static int connect_rings(struct backend_info *);
33static void connect(struct backend_info *);
34static void backend_create_xenvif(struct backend_info *be);
35static void unregister_hotplug_status_watch(struct backend_info *be);
36
37static int netback_remove(struct xenbus_device *dev)
38{
39 struct backend_info *be = dev_get_drvdata(&dev->dev);
40
41 unregister_hotplug_status_watch(be);
42 if (be->vif) {
43 kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
44 xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
45 xenvif_disconnect(be->vif);
46 be->vif = NULL;
47 }
48 kfree(be);
49 dev_set_drvdata(&dev->dev, NULL);
50 return 0;
51}
52
53
54/**
55 * Entry point to this code when a new device is created. Allocate the basic
56 * structures and switch to InitWait.
57 */
58static int netback_probe(struct xenbus_device *dev,
59 const struct xenbus_device_id *id)
60{
61 const char *message;
62 struct xenbus_transaction xbt;
63 int err;
64 int sg;
65 struct backend_info *be = kzalloc(sizeof(struct backend_info),
66 GFP_KERNEL);
67 if (!be) {
68 xenbus_dev_fatal(dev, -ENOMEM,
69 "allocating backend structure");
70 return -ENOMEM;
71 }
72
73 be->dev = dev;
74 dev_set_drvdata(&dev->dev, be);
75
76 sg = 1;
77
78 do {
79 err = xenbus_transaction_start(&xbt);
80 if (err) {
81 xenbus_dev_fatal(dev, err, "starting transaction");
82 goto fail;
83 }
84
85 err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
86 if (err) {
87 message = "writing feature-sg";
88 goto abort_transaction;
89 }
90
91 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
92 "%d", sg);
93 if (err) {
94 message = "writing feature-gso-tcpv4";
95 goto abort_transaction;
96 }
97
98 /* We support rx-copy path. */
99 err = xenbus_printf(xbt, dev->nodename,
100 "feature-rx-copy", "%d", 1);
101 if (err) {
102 message = "writing feature-rx-copy";
103 goto abort_transaction;
104 }
105
106 /*
107 * We don't support rx-flip path (except old guests who don't
108 * grok this feature flag).
109 */
110 err = xenbus_printf(xbt, dev->nodename,
111 "feature-rx-flip", "%d", 0);
112 if (err) {
113 message = "writing feature-rx-flip";
114 goto abort_transaction;
115 }
116
117 err = xenbus_transaction_end(xbt, 0);
118 } while (err == -EAGAIN);
119
120 if (err) {
121 xenbus_dev_fatal(dev, err, "completing transaction");
122 goto fail;
123 }
124
125 err = xenbus_switch_state(dev, XenbusStateInitWait);
126 if (err)
127 goto fail;
128
129 /* This kicks hotplug scripts, so do it immediately. */
130 backend_create_xenvif(be);
131
132 return 0;
133
134abort_transaction:
135 xenbus_transaction_end(xbt, 1);
136 xenbus_dev_fatal(dev, err, "%s", message);
137fail:
138 pr_debug("failed");
139 netback_remove(dev);
140 return err;
141}
142
143
144/*
145 * Handle the creation of the hotplug script environment. We add the script
146 * and vif variables to the environment, for the benefit of the vif-* hotplug
147 * scripts.
148 */
149static int netback_uevent(struct xenbus_device *xdev,
150 struct kobj_uevent_env *env)
151{
152 struct backend_info *be = dev_get_drvdata(&xdev->dev);
153 char *val;
154
155 val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
156 if (IS_ERR(val)) {
157 int err = PTR_ERR(val);
158 xenbus_dev_fatal(xdev, err, "reading script");
159 return err;
160 } else {
161 if (add_uevent_var(env, "script=%s", val)) {
162 kfree(val);
163 return -ENOMEM;
164 }
165 kfree(val);
166 }
167
168 if (!be || !be->vif)
169 return 0;
170
171 return add_uevent_var(env, "vif=%s", be->vif->dev->name);
172}
173
174
175static void backend_create_xenvif(struct backend_info *be)
176{
177 int err;
178 long handle;
179 struct xenbus_device *dev = be->dev;
180
181 if (be->vif != NULL)
182 return;
183
184 err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
185 if (err != 1) {
186 xenbus_dev_fatal(dev, err, "reading handle");
187 return;
188 }
189
190 be->vif = xenvif_alloc(&dev->dev, dev->otherend_id, handle);
191 if (IS_ERR(be->vif)) {
192 err = PTR_ERR(be->vif);
193 be->vif = NULL;
194 xenbus_dev_fatal(dev, err, "creating interface");
195 return;
196 }
197
198 kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
199}
200
201
202static void disconnect_backend(struct xenbus_device *dev)
203{
204 struct backend_info *be = dev_get_drvdata(&dev->dev);
205
206 if (be->vif) {
207 xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
208 xenvif_disconnect(be->vif);
209 be->vif = NULL;
210 }
211}
212
213/**
214 * Callback received when the frontend's state changes.
215 */
216static void frontend_changed(struct xenbus_device *dev,
217 enum xenbus_state frontend_state)
218{
219 struct backend_info *be = dev_get_drvdata(&dev->dev);
220
221 pr_debug("frontend state %s", xenbus_strstate(frontend_state));
222
223 be->frontend_state = frontend_state;
224
225 switch (frontend_state) {
226 case XenbusStateInitialising:
227 if (dev->state == XenbusStateClosed) {
228 printk(KERN_INFO "%s: %s: prepare for reconnect\n",
229 __func__, dev->nodename);
230 xenbus_switch_state(dev, XenbusStateInitWait);
231 }
232 break;
233
234 case XenbusStateInitialised:
235 break;
236
237 case XenbusStateConnected:
238 if (dev->state == XenbusStateConnected)
239 break;
240 backend_create_xenvif(be);
241 if (be->vif)
242 connect(be);
243 break;
244
245 case XenbusStateClosing:
246 if (be->vif)
247 kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
248 disconnect_backend(dev);
249 xenbus_switch_state(dev, XenbusStateClosing);
250 break;
251
252 case XenbusStateClosed:
253 xenbus_switch_state(dev, XenbusStateClosed);
254 if (xenbus_dev_is_online(dev))
255 break;
256 /* fall through if not online */
257 case XenbusStateUnknown:
258 device_unregister(&dev->dev);
259 break;
260
261 default:
262 xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
263 frontend_state);
264 break;
265 }
266}
267
268
269static void xen_net_read_rate(struct xenbus_device *dev,
270 unsigned long *bytes, unsigned long *usec)
271{
272 char *s, *e;
273 unsigned long b, u;
274 char *ratestr;
275
276 /* Default to unlimited bandwidth. */
277 *bytes = ~0UL;
278 *usec = 0;
279
280 ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
281 if (IS_ERR(ratestr))
282 return;
283
284 s = ratestr;
285 b = simple_strtoul(s, &e, 10);
286 if ((s == e) || (*e != ','))
287 goto fail;
288
289 s = e + 1;
290 u = simple_strtoul(s, &e, 10);
291 if ((s == e) || (*e != '\0'))
292 goto fail;
293
294 *bytes = b;
295 *usec = u;
296
297 kfree(ratestr);
298 return;
299
300 fail:
301 pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
302 kfree(ratestr);
303}
304
305static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
306{
307 char *s, *e, *macstr;
308 int i;
309
310 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
311 if (IS_ERR(macstr))
312 return PTR_ERR(macstr);
313
314 for (i = 0; i < ETH_ALEN; i++) {
315 mac[i] = simple_strtoul(s, &e, 16);
316 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
317 kfree(macstr);
318 return -ENOENT;
319 }
320 s = e+1;
321 }
322
323 kfree(macstr);
324 return 0;
325}
326
327static void unregister_hotplug_status_watch(struct backend_info *be)
328{
329 if (be->have_hotplug_status_watch) {
330 unregister_xenbus_watch(&be->hotplug_status_watch);
331 kfree(be->hotplug_status_watch.node);
332 }
333 be->have_hotplug_status_watch = 0;
334}
335
336static void hotplug_status_changed(struct xenbus_watch *watch,
337 const char **vec,
338 unsigned int vec_size)
339{
340 struct backend_info *be = container_of(watch,
341 struct backend_info,
342 hotplug_status_watch);
343 char *str;
344 unsigned int len;
345
346 str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
347 if (IS_ERR(str))
348 return;
349 if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
350 xenbus_switch_state(be->dev, XenbusStateConnected);
351 /* Not interested in this watch anymore. */
352 unregister_hotplug_status_watch(be);
353 }
354 kfree(str);
355}
356
357static void connect(struct backend_info *be)
358{
359 int err;
360 struct xenbus_device *dev = be->dev;
361
362 err = connect_rings(be);
363 if (err)
364 return;
365
366 err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
367 if (err) {
368 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
369 return;
370 }
371
372 xen_net_read_rate(dev, &be->vif->credit_bytes,
373 &be->vif->credit_usec);
374 be->vif->remaining_credit = be->vif->credit_bytes;
375
376 unregister_hotplug_status_watch(be);
377 err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
378 hotplug_status_changed,
379 "%s/%s", dev->nodename, "hotplug-status");
380 if (err) {
381 /* Switch now, since we can't do a watch. */
382 xenbus_switch_state(dev, XenbusStateConnected);
383 } else {
384 be->have_hotplug_status_watch = 1;
385 }
386
387 netif_wake_queue(be->vif->dev);
388}
389
390
391static int connect_rings(struct backend_info *be)
392{
393 struct xenvif *vif = be->vif;
394 struct xenbus_device *dev = be->dev;
395 unsigned long tx_ring_ref, rx_ring_ref;
396 unsigned int evtchn, rx_copy;
397 int err;
398 int val;
399
400 err = xenbus_gather(XBT_NIL, dev->otherend,
401 "tx-ring-ref", "%lu", &tx_ring_ref,
402 "rx-ring-ref", "%lu", &rx_ring_ref,
403 "event-channel", "%u", &evtchn, NULL);
404 if (err) {
405 xenbus_dev_fatal(dev, err,
406 "reading %s/ring-ref and event-channel",
407 dev->otherend);
408 return err;
409 }
410
411 err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
412 &rx_copy);
413 if (err == -ENOENT) {
414 err = 0;
415 rx_copy = 0;
416 }
417 if (err < 0) {
418 xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
419 dev->otherend);
420 return err;
421 }
422 if (!rx_copy)
423 return -EOPNOTSUPP;
424
425 if (vif->dev->tx_queue_len != 0) {
426 if (xenbus_scanf(XBT_NIL, dev->otherend,
427 "feature-rx-notify", "%d", &val) < 0)
428 val = 0;
429 if (val)
430 vif->can_queue = 1;
431 else
432 /* Must be non-zero for pfifo_fast to work. */
433 vif->dev->tx_queue_len = 1;
434 }
435
436 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
437 "%d", &val) < 0)
438 val = 0;
439 vif->can_sg = !!val;
440
441 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
442 "%d", &val) < 0)
443 val = 0;
444 vif->gso = !!val;
445
446 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
447 "%d", &val) < 0)
448 val = 0;
449 vif->gso_prefix = !!val;
450
451 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
452 "%d", &val) < 0)
453 val = 0;
454 vif->csum = !val;
455
456 /* Map the shared frame, irq etc. */
457 err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
458 if (err) {
459 xenbus_dev_fatal(dev, err,
460 "mapping shared-frames %lu/%lu port %u",
461 tx_ring_ref, rx_ring_ref, evtchn);
462 return err;
463 }
464 return 0;
465}
466
467
468/* ** Driver Registration ** */
469
470
471static const struct xenbus_device_id netback_ids[] = {
472 { "vif" },
473 { "" }
474};
475
476
477static struct xenbus_driver netback = {
478 .name = "vif",
479 .owner = THIS_MODULE,
480 .ids = netback_ids,
481 .probe = netback_probe,
482 .remove = netback_remove,
483 .uevent = netback_uevent,
484 .otherend_changed = frontend_changed,
485};
486
487int xenvif_xenbus_init(void)
488{
489 return xenbus_register_backend(&netback);
490}
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 5b399b54fef7..5c8d9c385be0 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -359,7 +359,7 @@ static void xennet_tx_buf_gc(struct net_device *dev)
359 struct xen_netif_tx_response *txrsp; 359 struct xen_netif_tx_response *txrsp;
360 360
361 txrsp = RING_GET_RESPONSE(&np->tx, cons); 361 txrsp = RING_GET_RESPONSE(&np->tx, cons);
362 if (txrsp->status == NETIF_RSP_NULL) 362 if (txrsp->status == XEN_NETIF_RSP_NULL)
363 continue; 363 continue;
364 364
365 id = txrsp->id; 365 id = txrsp->id;
@@ -416,7 +416,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
416 larger than a page), split it it into page-sized chunks. */ 416 larger than a page), split it it into page-sized chunks. */
417 while (len > PAGE_SIZE - offset) { 417 while (len > PAGE_SIZE - offset) {
418 tx->size = PAGE_SIZE - offset; 418 tx->size = PAGE_SIZE - offset;
419 tx->flags |= NETTXF_more_data; 419 tx->flags |= XEN_NETTXF_more_data;
420 len -= tx->size; 420 len -= tx->size;
421 data += tx->size; 421 data += tx->size;
422 offset = 0; 422 offset = 0;
@@ -442,7 +442,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
442 for (i = 0; i < frags; i++) { 442 for (i = 0; i < frags; i++) {
443 skb_frag_t *frag = skb_shinfo(skb)->frags + i; 443 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
444 444
445 tx->flags |= NETTXF_more_data; 445 tx->flags |= XEN_NETTXF_more_data;
446 446
447 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); 447 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
448 np->tx_skbs[id].skb = skb_get(skb); 448 np->tx_skbs[id].skb = skb_get(skb);
@@ -517,10 +517,10 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
517 tx->flags = 0; 517 tx->flags = 0;
518 if (skb->ip_summed == CHECKSUM_PARTIAL) 518 if (skb->ip_summed == CHECKSUM_PARTIAL)
519 /* local packet? */ 519 /* local packet? */
520 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; 520 tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
521 else if (skb->ip_summed == CHECKSUM_UNNECESSARY) 521 else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
522 /* remote but checksummed. */ 522 /* remote but checksummed. */
523 tx->flags |= NETTXF_data_validated; 523 tx->flags |= XEN_NETTXF_data_validated;
524 524
525 if (skb_shinfo(skb)->gso_size) { 525 if (skb_shinfo(skb)->gso_size) {
526 struct xen_netif_extra_info *gso; 526 struct xen_netif_extra_info *gso;
@@ -531,7 +531,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
531 if (extra) 531 if (extra)
532 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; 532 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
533 else 533 else
534 tx->flags |= NETTXF_extra_info; 534 tx->flags |= XEN_NETTXF_extra_info;
535 535
536 gso->u.gso.size = skb_shinfo(skb)->gso_size; 536 gso->u.gso.size = skb_shinfo(skb)->gso_size;
537 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; 537 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
@@ -651,7 +651,7 @@ static int xennet_get_responses(struct netfront_info *np,
651 int err = 0; 651 int err = 0;
652 unsigned long ret; 652 unsigned long ret;
653 653
654 if (rx->flags & NETRXF_extra_info) { 654 if (rx->flags & XEN_NETRXF_extra_info) {
655 err = xennet_get_extras(np, extras, rp); 655 err = xennet_get_extras(np, extras, rp);
656 cons = np->rx.rsp_cons; 656 cons = np->rx.rsp_cons;
657 } 657 }
@@ -688,7 +688,7 @@ static int xennet_get_responses(struct netfront_info *np,
688 __skb_queue_tail(list, skb); 688 __skb_queue_tail(list, skb);
689 689
690next: 690next:
691 if (!(rx->flags & NETRXF_more_data)) 691 if (!(rx->flags & XEN_NETRXF_more_data))
692 break; 692 break;
693 693
694 if (cons + frags == rp) { 694 if (cons + frags == rp) {
@@ -983,9 +983,9 @@ err:
983 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); 983 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
984 skb->len += skb->data_len; 984 skb->len += skb->data_len;
985 985
986 if (rx->flags & NETRXF_csum_blank) 986 if (rx->flags & XEN_NETRXF_csum_blank)
987 skb->ip_summed = CHECKSUM_PARTIAL; 987 skb->ip_summed = CHECKSUM_PARTIAL;
988 else if (rx->flags & NETRXF_data_validated) 988 else if (rx->flags & XEN_NETRXF_data_validated)
989 skb->ip_summed = CHECKSUM_UNNECESSARY; 989 skb->ip_summed = CHECKSUM_UNNECESSARY;
990 990
991 __skb_queue_tail(&rxq, skb); 991 __skb_queue_tail(&rxq, skb);
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index 518481c95f18..cb94668f6e9f 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -22,50 +22,50 @@
22 22
23/* 23/*
24 * This is the 'wire' format for packets: 24 * This is the 'wire' format for packets:
25 * Request 1: netif_tx_request -- NETTXF_* (any flags) 25 * Request 1: xen_netif_tx_request -- XEN_NETTXF_* (any flags)
26 * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info) 26 * [Request 2: xen_netif_extra_info] (only if request 1 has XEN_NETTXF_extra_info)
27 * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE) 27 * [Request 3: xen_netif_extra_info] (only if request 2 has XEN_NETIF_EXTRA_MORE)
28 * Request 4: netif_tx_request -- NETTXF_more_data 28 * Request 4: xen_netif_tx_request -- XEN_NETTXF_more_data
29 * Request 5: netif_tx_request -- NETTXF_more_data 29 * Request 5: xen_netif_tx_request -- XEN_NETTXF_more_data
30 * ... 30 * ...
31 * Request N: netif_tx_request -- 0 31 * Request N: xen_netif_tx_request -- 0
32 */ 32 */
33 33
34/* Protocol checksum field is blank in the packet (hardware offload)? */ 34/* Protocol checksum field is blank in the packet (hardware offload)? */
35#define _NETTXF_csum_blank (0) 35#define _XEN_NETTXF_csum_blank (0)
36#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) 36#define XEN_NETTXF_csum_blank (1U<<_XEN_NETTXF_csum_blank)
37 37
38/* Packet data has been validated against protocol checksum. */ 38/* Packet data has been validated against protocol checksum. */
39#define _NETTXF_data_validated (1) 39#define _XEN_NETTXF_data_validated (1)
40#define NETTXF_data_validated (1U<<_NETTXF_data_validated) 40#define XEN_NETTXF_data_validated (1U<<_XEN_NETTXF_data_validated)
41 41
42/* Packet continues in the next request descriptor. */ 42/* Packet continues in the next request descriptor. */
43#define _NETTXF_more_data (2) 43#define _XEN_NETTXF_more_data (2)
44#define NETTXF_more_data (1U<<_NETTXF_more_data) 44#define XEN_NETTXF_more_data (1U<<_XEN_NETTXF_more_data)
45 45
46/* Packet to be followed by extra descriptor(s). */ 46/* Packet to be followed by extra descriptor(s). */
47#define _NETTXF_extra_info (3) 47#define _XEN_NETTXF_extra_info (3)
48#define NETTXF_extra_info (1U<<_NETTXF_extra_info) 48#define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info)
49 49
50struct xen_netif_tx_request { 50struct xen_netif_tx_request {
51 grant_ref_t gref; /* Reference to buffer page */ 51 grant_ref_t gref; /* Reference to buffer page */
52 uint16_t offset; /* Offset within buffer page */ 52 uint16_t offset; /* Offset within buffer page */
53 uint16_t flags; /* NETTXF_* */ 53 uint16_t flags; /* XEN_NETTXF_* */
54 uint16_t id; /* Echoed in response message. */ 54 uint16_t id; /* Echoed in response message. */
55 uint16_t size; /* Packet size in bytes. */ 55 uint16_t size; /* Packet size in bytes. */
56}; 56};
57 57
58/* Types of netif_extra_info descriptors. */ 58/* Types of xen_netif_extra_info descriptors. */
59#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ 59#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
60#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ 60#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
61#define XEN_NETIF_EXTRA_TYPE_MAX (2) 61#define XEN_NETIF_EXTRA_TYPE_MAX (2)
62 62
63/* netif_extra_info flags. */ 63/* xen_netif_extra_info flags. */
64#define _XEN_NETIF_EXTRA_FLAG_MORE (0) 64#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
65#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) 65#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
66 66
67/* GSO types - only TCPv4 currently supported. */ 67/* GSO types - only TCPv4 currently supported. */
68#define XEN_NETIF_GSO_TYPE_TCPV4 (1) 68#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
69 69
70/* 70/*
71 * This structure needs to fit within both netif_tx_request and 71 * This structure needs to fit within both netif_tx_request and
@@ -107,7 +107,7 @@ struct xen_netif_extra_info {
107 107
108struct xen_netif_tx_response { 108struct xen_netif_tx_response {
109 uint16_t id; 109 uint16_t id;
110 int16_t status; /* NETIF_RSP_* */ 110 int16_t status; /* XEN_NETIF_RSP_* */
111}; 111};
112 112
113struct xen_netif_rx_request { 113struct xen_netif_rx_request {
@@ -116,25 +116,29 @@ struct xen_netif_rx_request {
116}; 116};
117 117
118/* Packet data has been validated against protocol checksum. */ 118/* Packet data has been validated against protocol checksum. */
119#define _NETRXF_data_validated (0) 119#define _XEN_NETRXF_data_validated (0)
120#define NETRXF_data_validated (1U<<_NETRXF_data_validated) 120#define XEN_NETRXF_data_validated (1U<<_XEN_NETRXF_data_validated)
121 121
122/* Protocol checksum field is blank in the packet (hardware offload)? */ 122/* Protocol checksum field is blank in the packet (hardware offload)? */
123#define _NETRXF_csum_blank (1) 123#define _XEN_NETRXF_csum_blank (1)
124#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) 124#define XEN_NETRXF_csum_blank (1U<<_XEN_NETRXF_csum_blank)
125 125
126/* Packet continues in the next request descriptor. */ 126/* Packet continues in the next request descriptor. */
127#define _NETRXF_more_data (2) 127#define _XEN_NETRXF_more_data (2)
128#define NETRXF_more_data (1U<<_NETRXF_more_data) 128#define XEN_NETRXF_more_data (1U<<_XEN_NETRXF_more_data)
129 129
130/* Packet to be followed by extra descriptor(s). */ 130/* Packet to be followed by extra descriptor(s). */
131#define _NETRXF_extra_info (3) 131#define _XEN_NETRXF_extra_info (3)
132#define NETRXF_extra_info (1U<<_NETRXF_extra_info) 132#define XEN_NETRXF_extra_info (1U<<_XEN_NETRXF_extra_info)
133
134/* GSO Prefix descriptor. */
135#define _XEN_NETRXF_gso_prefix (4)
136#define XEN_NETRXF_gso_prefix (1U<<_XEN_NETRXF_gso_prefix)
133 137
134struct xen_netif_rx_response { 138struct xen_netif_rx_response {
135 uint16_t id; 139 uint16_t id;
136 uint16_t offset; /* Offset in page of start of received packet */ 140 uint16_t offset; /* Offset in page of start of received packet */
137 uint16_t flags; /* NETRXF_* */ 141 uint16_t flags; /* XEN_NETRXF_* */
138 int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ 142 int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
139}; 143};
140 144
@@ -149,10 +153,10 @@ DEFINE_RING_TYPES(xen_netif_rx,
149 struct xen_netif_rx_request, 153 struct xen_netif_rx_request,
150 struct xen_netif_rx_response); 154 struct xen_netif_rx_response);
151 155
152#define NETIF_RSP_DROPPED -2 156#define XEN_NETIF_RSP_DROPPED -2
153#define NETIF_RSP_ERROR -1 157#define XEN_NETIF_RSP_ERROR -1
154#define NETIF_RSP_OKAY 0 158#define XEN_NETIF_RSP_OKAY 0
155/* No response: used for auxiliary requests (e.g., netif_tx_extra). */ 159/* No response: used for auxiliary requests (e.g., xen_netif_extra_info). */
156#define NETIF_RSP_NULL 1 160#define XEN_NETIF_RSP_NULL 1
157 161
158#endif 162#endif