1 files changed, 96 insertions, 410 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 78425554a537..6b62c3eb8e18 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -39,7 +39,6 @@
 #include <linux/udp.h>
 #include <net/tcp.h>
-#include <net/ip6_checksum.h>
 #include <xen/xen.h>
 #include <xen/events.h>
@@ -138,36 +137,26 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
                vif->pending_prod + vif->pending_cons;
 }
-static int max_required_rx_slots(struct xenvif *vif)
+bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
 {
-        int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
+        RING_IDX prod, cons;
-        /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
+        do {
-        if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask)
+                prod = vif->rx.sring->req_prod;
-                max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
+                cons = vif->rx.req_cons;
-        return max;
-}
-int xenvif_rx_ring_full(struct xenvif *vif)
+                if (prod - cons >= needed)
-{
+                        return true;
-        RING_IDX peek   = vif->rx_req_cons_peek;
-        RING_IDX needed = max_required_rx_slots(vif);
-        return ((vif->rx.sring->req_prod - peek) < needed) ||
+                vif->rx.sring->req_event = prod + 1;
-               ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
-}
-int xenvif_must_stop_queue(struct xenvif *vif)
+                /* Make sure event is visible before we check prod
-{
+                 * again.
-        if (!xenvif_rx_ring_full(vif))
+                 */
-                return 0;
+                mb();
+        } while (vif->rx.sring->req_prod != prod);
-        vif->rx.sring->req_event = vif->rx_req_cons_peek +
-                max_required_rx_slots(vif);
-        mb(); /* request notification /then/ check the queue */
-        return xenvif_rx_ring_full(vif);
+        return false;
 }
 /*
@@ -210,93 +199,6 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
        return false;
 }
-struct xenvif_count_slot_state {
-        unsigned long copy_off;
-        bool head;
-};
-unsigned int xenvif_count_frag_slots(struct xenvif *vif,
-                                     unsigned long offset, unsigned long size,
-                                     struct xenvif_count_slot_state *state)
-{
-        unsigned count = 0;
-        offset &= ~PAGE_MASK;
-        while (size > 0) {
-                unsigned long bytes;
-                bytes = PAGE_SIZE - offset;
-                if (bytes > size)
-                        bytes = size;
-                if (start_new_rx_buffer(state->copy_off, bytes, state->head)) {
-                        count++;
-                        state->copy_off = 0;
-                }
-                if (state->copy_off + bytes > MAX_BUFFER_OFFSET)
-                        bytes = MAX_BUFFER_OFFSET - state->copy_off;
-                state->copy_off += bytes;
-                offset += bytes;
-                size -= bytes;
-                if (offset == PAGE_SIZE)
-                        offset = 0;
-                state->head = false;
-        }
-        return count;
-}
-/*
- * Figure out how many ring slots we're going to need to send @skb to
- * the guest. This function is essentially a dry run of
- * xenvif_gop_frag_copy.
- */
-unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
-{
-        struct xenvif_count_slot_state state;
-        unsigned int count;
-        unsigned char *data;
-        unsigned i;
-        state.head = true;
-        state.copy_off = 0;
-        /* Slot for the first (partial) page of data. */
-        count = 1;
-        /* Need a slot for the GSO prefix for GSO extra data? */
-        if (skb_shinfo(skb)->gso_size)
-                count++;
-        data = skb->data;
-        while (data < skb_tail_pointer(skb)) {
-                unsigned long offset = offset_in_page(data);
-                unsigned long size = PAGE_SIZE - offset;
-                if (data + size > skb_tail_pointer(skb))
-                        size = skb_tail_pointer(skb) - data;
-                count += xenvif_count_frag_slots(vif, offset, size, &state);
-                data += size;
-        }
-        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-                unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
-                unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;
-                count += xenvif_count_frag_slots(vif, offset, size, &state);
-        }
-        return count;
-}
 struct netrx_pending_operations {
        unsigned copy_prod, copy_cons;
        unsigned meta_prod, meta_cons;
@@ -557,12 +459,12 @@ struct skb_cb_overlay {
        int meta_slots_used;
 };
-static void xenvif_kick_thread(struct xenvif *vif)
+void xenvif_kick_thread(struct xenvif *vif)
 {
        wake_up(&vif->wq);
 }
-void xenvif_rx_action(struct xenvif *vif)
+static void xenvif_rx_action(struct xenvif *vif)
 {
        s8 status;
        u16 flags;
@@ -571,11 +473,10 @@ void xenvif_rx_action(struct xenvif *vif)
        struct sk_buff *skb;
        LIST_HEAD(notify);
        int ret;
-        int nr_frags;
-        int count;
        unsigned long offset;
        struct skb_cb_overlay *sco;
-        int need_to_notify = 0;
+        bool need_to_notify = false;
+        bool ring_full = false;
        struct netrx_pending_operations npo = {
                .copy  = vif->grant_copy_op,
@@ -584,29 +485,47 @@ void xenvif_rx_action(struct xenvif *vif)
        skb_queue_head_init(&rxq);
-        count = 0;
        while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
-                vif = netdev_priv(skb->dev);
+                int max_slots_needed;
-                nr_frags = skb_shinfo(skb)->nr_frags;
+                int i;
+                /* We need a cheap worse case estimate for the number of
+                 * slots we'll use.
+                 */
+                max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
+                                                skb_headlen(skb),
+                                                PAGE_SIZE);
+                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                        unsigned int size;
+                        size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+                        max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE);
+                }
+                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
+                    skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+                        max_slots_needed++;
+                /* If the skb may not fit then bail out now */
+                if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) {
+                        skb_queue_head(&vif->rx_queue, skb);
+                        need_to_notify = true;
+                        ring_full = true;
+                        break;
+                }
                sco = (struct skb_cb_overlay *)skb->cb;
                sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
+                BUG_ON(sco->meta_slots_used > max_slots_needed);
-                count += nr_frags + 1;
                __skb_queue_tail(&rxq, skb);
-                /* Filled the batch queue? */
-                /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
-                if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
-                        break;
        }
        BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));
+        vif->rx_queue_stopped = !npo.copy_prod && ring_full;
        if (!npo.copy_prod)
-                return;
+                goto done;
        BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
        gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
@@ -614,8 +533,6 @@ void xenvif_rx_action(struct xenvif *vif)
        while ((skb = __skb_dequeue(&rxq)) != NULL) {
                sco = (struct skb_cb_overlay *)skb->cb;
-                vif = netdev_priv(skb->dev);
                if ((1 << vif->meta[npo.meta_cons].gso_type) &
                    vif->gso_prefix_mask) {
                        resp = RING_GET_RESPONSE(&vif->rx,
@@ -678,28 +595,15 @@ void xenvif_rx_action(struct xenvif *vif)
                RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
-                if (ret)
+                need_to_notify |= !!ret;
-                        need_to_notify = 1;
-                xenvif_notify_tx_completion(vif);
                npo.meta_cons += sco->meta_slots_used;
                dev_kfree_skb(skb);
        }
+done:
        if (need_to_notify)
                notify_remote_via_irq(vif->rx_irq);
-        /* More work to do? */
-        if (!skb_queue_empty(&vif->rx_queue))
-                xenvif_kick_thread(vif);
-}
-void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
-{
-        skb_queue_tail(&vif->rx_queue, skb);
-        xenvif_kick_thread(vif);
 }
 void xenvif_check_rx_xenvif(struct xenvif *vif)
@@ -1141,265 +1045,14 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
        }
        skb_shinfo(skb)->gso_size = gso->u.gso.size;
+        /* gso_segs will be calculated later */
-        /* Header must be checked, and gso_segs computed. */
-        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-        skb_shinfo(skb)->gso_segs = 0;
-        return 0;
-}
-static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len,
-                                  unsigned int max)
-{
-        if (skb_headlen(skb) >= len)
-                return 0;
-        /* If we need to pullup then pullup to the max, so we
-         * won't need to do it again.
-         */
-        if (max > skb->len)
-                max = skb->len;
-        if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
-                return -ENOMEM;
-        if (skb_headlen(skb) < len)
-                return -EPROTO;
        return 0;
 }
-/* This value should be large enough to cover a tagged ethernet header plus
- * maximally sized IP and TCP or UDP headers.
- */
-#define MAX_IP_HDR_LEN 128
-static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
-                             int recalculate_partial_csum)
-{
-        unsigned int off;
-        bool fragment;
-        int err;
-        fragment = false;
-        err = maybe_pull_tail(skb,
-                              sizeof(struct iphdr),
-                              MAX_IP_HDR_LEN);
-        if (err < 0)
-                goto out;
-        if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
-                fragment = true;
-        off = ip_hdrlen(skb);
-        err = -EPROTO;
-        if (fragment)
-                goto out;
-        switch (ip_hdr(skb)->protocol) {
-        case IPPROTO_TCP:
-                err = maybe_pull_tail(skb,
-                                      off + sizeof(struct tcphdr),
-                                      MAX_IP_HDR_LEN);
-                if (err < 0)
-                        goto out;
-                if (!skb_partial_csum_set(skb, off,
-                                          offsetof(struct tcphdr, check))) {
-                        err = -EPROTO;
-                        goto out;
-                }
-                if (recalculate_partial_csum)
-                        tcp_hdr(skb)->check =
-                                ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
-                                                   ip_hdr(skb)->daddr,
-                                                   skb->len - off,
-                                                   IPPROTO_TCP, 0);
-                break;
-        case IPPROTO_UDP:
-                err = maybe_pull_tail(skb,
-                                      off + sizeof(struct udphdr),
-                                      MAX_IP_HDR_LEN);
-                if (err < 0)
-                        goto out;
-                if (!skb_partial_csum_set(skb, off,
-                                          offsetof(struct udphdr, check))) {
-                        err = -EPROTO;
-                        goto out;
-                }
-                if (recalculate_partial_csum)
-                        udp_hdr(skb)->check =
-                                ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
-                                                   ip_hdr(skb)->daddr,
-                                                   skb->len - off,
-                                                   IPPROTO_UDP, 0);
-                break;
-        default:
-                goto out;
-        }
-        err = 0;
-out:
-        return err;
-}
-/* This value should be large enough to cover a tagged ethernet header plus
- * an IPv6 header, all options, and a maximal TCP or UDP header.
- */
-#define MAX_IPV6_HDR_LEN 256
-#define OPT_HDR(type, skb, off) \
-        (type *)(skb_network_header(skb) + (off))
-static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
-                               int recalculate_partial_csum)
-{
-        int err;
-        u8 nexthdr;
-        unsigned int off;
-        unsigned int len;
-        bool fragment;
-        bool done;
-        fragment = false;
-        done = false;
-        off = sizeof(struct ipv6hdr);
-        err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
-        if (err < 0)
-                goto out;
-        nexthdr = ipv6_hdr(skb)->nexthdr;
-        len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
-        while (off <= len && !done) {
-                switch (nexthdr) {
-                case IPPROTO_DSTOPTS:
-                case IPPROTO_HOPOPTS:
-                case IPPROTO_ROUTING: {
-                        struct ipv6_opt_hdr *hp;
-                        err = maybe_pull_tail(skb,
-                                              off +
-                                              sizeof(struct ipv6_opt_hdr),
-                                              MAX_IPV6_HDR_LEN);
-                        if (err < 0)
-                                goto out;
-                        hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
-                        nexthdr = hp->nexthdr;
-                        off += ipv6_optlen(hp);
-                        break;
-                }
-                case IPPROTO_AH: {
-                        struct ip_auth_hdr *hp;
-                        err = maybe_pull_tail(skb,
-                                              off +
-                                              sizeof(struct ip_auth_hdr),
-                                              MAX_IPV6_HDR_LEN);
-                        if (err < 0)
-                                goto out;
-                        hp = OPT_HDR(struct ip_auth_hdr, skb, off);
-                        nexthdr = hp->nexthdr;
-                        off += ipv6_authlen(hp);
-                        break;
-                }
-                case IPPROTO_FRAGMENT: {
-                        struct frag_hdr *hp;
-                        err = maybe_pull_tail(skb,
-                                              off +
-                                              sizeof(struct frag_hdr),
-                                              MAX_IPV6_HDR_LEN);
-                        if (err < 0)
-                                goto out;
-                        hp = OPT_HDR(struct frag_hdr, skb, off);
-                        if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
-                                fragment = true;
-                        nexthdr = hp->nexthdr;
-                        off += sizeof(struct frag_hdr);
-                        break;
-                }
-                default:
-                        done = true;
-                        break;
-                }
-        }
-        err = -EPROTO;
-        if (!done || fragment)
-                goto out;
-        switch (nexthdr) {
-        case IPPROTO_TCP:
-                err = maybe_pull_tail(skb,
-                                      off + sizeof(struct tcphdr),
-                                      MAX_IPV6_HDR_LEN);
-                if (err < 0)
-                        goto out;
-                if (!skb_partial_csum_set(skb, off,
-                                          offsetof(struct tcphdr, check))) {
-                        err = -EPROTO;
-                        goto out;
-                }
-                if (recalculate_partial_csum)
-                        tcp_hdr(skb)->check =
-                                ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-                                                 &ipv6_hdr(skb)->daddr,
-                                                 skb->len - off,
-                                                 IPPROTO_TCP, 0);
-                break;
-        case IPPROTO_UDP:
-                err = maybe_pull_tail(skb,
-                                      off + sizeof(struct udphdr),
-                                      MAX_IPV6_HDR_LEN);
-                if (err < 0)
-                        goto out;
-                if (!skb_partial_csum_set(skb, off,
-                                          offsetof(struct udphdr, check))) {
-                        err = -EPROTO;
-                        goto out;
-                }
-                if (recalculate_partial_csum)
-                        udp_hdr(skb)->check =
-                                ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-                                                 &ipv6_hdr(skb)->daddr,
-                                                 skb->len - off,
-                                                 IPPROTO_UDP, 0);
-                break;
-        default:
-                goto out;
-        }
-        err = 0;
-out:
-        return err;
-}
 static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
 {
-        int err = -EPROTO;
+        bool recalculate_partial_csum = false;
-        int recalculate_partial_csum = 0;
        /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
         * peers can fail to set NETRXF_csum_blank when sending a GSO
@@ -1409,19 +1062,14 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
        if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
                vif->rx_gso_checksum_fixup++;
                skb->ip_summed = CHECKSUM_PARTIAL;
-                recalculate_partial_csum = 1;
+                recalculate_partial_csum = true;
        }
        /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
        if (skb->ip_summed != CHECKSUM_PARTIAL)
                return 0;
-        if (skb->protocol == htons(ETH_P_IP))
+        return skb_checksum_setup(skb, recalculate_partial_csum);
-                err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
-        else if (skb->protocol == htons(ETH_P_IPV6))
-                err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
-        return err;
 }
 static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
@@ -1687,6 +1335,20 @@ static int xenvif_tx_submit(struct xenvif *vif)
                skb_probe_transport_header(skb, 0);
+                /* If the packet is GSO then we will have just set up the
+                 * transport header offset in checksum_setup so it's now
+                 * straightforward to calculate gso_segs.
+                 */
+                if (skb_is_gso(skb)) {
+                        int mss = skb_shinfo(skb)->gso_size;
+                        int hdrlen = skb_transport_header(skb) -
+                                skb_mac_header(skb) +
+                                tcp_hdrlen(skb);
+                        skb_shinfo(skb)->gso_segs =
+                                DIV_ROUND_UP(skb->len - hdrlen, mss);
+                }
                vif->dev->stats.rx_bytes += skb->len;
                vif->dev->stats.rx_packets++;
@@ -1811,7 +1473,8 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
 static inline int rx_work_todo(struct xenvif *vif)
 {
-        return !skb_queue_empty(&vif->rx_queue);
+        return (!skb_queue_empty(&vif->rx_queue) && !vif->rx_queue_stopped) ||
+                vif->rx_event;
 }
 static inline int tx_work_todo(struct xenvif *vif)
@@ -1861,8 +1524,6 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
        rxs = (struct xen_netif_rx_sring *)addr;
        BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
-        vif->rx_req_cons_peek = 0;
        return 0;
 err:
@@ -1870,9 +1531,24 @@ err:
        return err;
 }
+void xenvif_stop_queue(struct xenvif *vif)
+{
+        if (!vif->can_queue)
+                return;
+        netif_stop_queue(vif->dev);
+}
+static void xenvif_start_queue(struct xenvif *vif)
+{
+        if (xenvif_schedulable(vif))
+                netif_wake_queue(vif->dev);
+}
 int xenvif_kthread(void *data)
 {
        struct xenvif *vif = data;
+        struct sk_buff *skb;
        while (!kthread_should_stop()) {
                wait_event_interruptible(vif->wq,
@@ -1881,12 +1557,22 @@ int xenvif_kthread(void *data)
                if (kthread_should_stop())
                        break;
-                if (rx_work_todo(vif))
+                if (!skb_queue_empty(&vif->rx_queue))
                        xenvif_rx_action(vif);
+                vif->rx_event = false;
+                if (skb_queue_empty(&vif->rx_queue) &&
+                    netif_queue_stopped(vif->dev))
+                        xenvif_start_queue(vif);
                cond_resched();
        }
+        /* Bin any remaining skbs */
+        while ((skb = skb_dequeue(&vif->rx_queue)) != NULL)
+                dev_kfree_skb(skb);
        return 0;
 }

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 78425554a537..6b62c3eb8e18 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c
@@ -39,7 +39,6 @@
39	#include <linux/udp.h>	39	#include <linux/udp.h>
40		40
41	#include <net/tcp.h>	41	#include <net/tcp.h>
42	#include <net/ip6_checksum.h>
43		42
44	#include <xen/xen.h>	43	#include <xen/xen.h>
45	#include <xen/events.h>	44	#include <xen/events.h>
@@ -138,36 +137,26 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
138	vif->pending_prod + vif->pending_cons;	137	vif->pending_prod + vif->pending_cons;
139	}	138	}
140		139
141	static int max_required_rx_slots(struct xenvif *vif)	140	bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
142	{	141	{
143	int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);	142	RING_IDX prod, cons;
144		143
145	/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */	144	do {
146	if (vif->can_sg \|\| vif->gso_mask \|\| vif->gso_prefix_mask)	145	prod = vif->rx.sring->req_prod;
147	max += MAX_SKB_FRAGS + 1; /* extra_info + frags */	146	cons = vif->rx.req_cons;
148
149	return max;
150	}
151		147
152	int xenvif_rx_ring_full(struct xenvif *vif)	148	if (prod - cons >= needed)
153	{	149	return true;
154	RING_IDX peek = vif->rx_req_cons_peek;
155	RING_IDX needed = max_required_rx_slots(vif);
156		150
157	return ((vif->rx.sring->req_prod - peek) < needed) \|\|	151	vif->rx.sring->req_event = prod + 1;
158	((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
159	}
160		152
161	int xenvif_must_stop_queue(struct xenvif *vif)	153	/* Make sure event is visible before we check prod
162	{	154	* again.
163	if (!xenvif_rx_ring_full(vif))	155	*/
164	return 0;	156	mb();
165		157	} while (vif->rx.sring->req_prod != prod);
166	vif->rx.sring->req_event = vif->rx_req_cons_peek +
167	max_required_rx_slots(vif);
168	mb(); /* request notification /then/ check the queue */
169		158
170	return xenvif_rx_ring_full(vif);	159	return false;
171	}	160	}
172		161
173	/*	162	/*
@@ -210,93 +199,6 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
210	return false;	199	return false;
211	}	200	}
212		201
213	struct xenvif_count_slot_state {
214	unsigned long copy_off;
215	bool head;
216	};
217
218	unsigned int xenvif_count_frag_slots(struct xenvif *vif,
219	unsigned long offset, unsigned long size,
220	struct xenvif_count_slot_state *state)
221	{
222	unsigned count = 0;
223
224	offset &= ~PAGE_MASK;
225
226	while (size > 0) {
227	unsigned long bytes;
228
229	bytes = PAGE_SIZE - offset;
230
231	if (bytes > size)
232	bytes = size;
233
234	if (start_new_rx_buffer(state->copy_off, bytes, state->head)) {
235	count++;
236	state->copy_off = 0;
237	}
238
239	if (state->copy_off + bytes > MAX_BUFFER_OFFSET)
240	bytes = MAX_BUFFER_OFFSET - state->copy_off;
241
242	state->copy_off += bytes;
243
244	offset += bytes;
245	size -= bytes;
246
247	if (offset == PAGE_SIZE)
248	offset = 0;
249
250	state->head = false;
251	}
252
253	return count;
254	}
255
256	/*
257	* Figure out how many ring slots we're going to need to send @skb to
258	* the guest. This function is essentially a dry run of
259	* xenvif_gop_frag_copy.
260	*/
261	unsigned int xenvif_count_skb_slots(struct xenvif vif, struct sk_buff skb)
262	{
263	struct xenvif_count_slot_state state;
264	unsigned int count;
265	unsigned char *data;
266	unsigned i;
267
268	state.head = true;
269	state.copy_off = 0;
270
271	/* Slot for the first (partial) page of data. */
272	count = 1;
273
274	/* Need a slot for the GSO prefix for GSO extra data? */
275	if (skb_shinfo(skb)->gso_size)
276	count++;
277
278	data = skb->data;
279	while (data < skb_tail_pointer(skb)) {
280	unsigned long offset = offset_in_page(data);
281	unsigned long size = PAGE_SIZE - offset;
282
283	if (data + size > skb_tail_pointer(skb))
284	size = skb_tail_pointer(skb) - data;
285
286	count += xenvif_count_frag_slots(vif, offset, size, &state);
287
288	data += size;
289	}
290
291	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
292	unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
293	unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;
294
295	count += xenvif_count_frag_slots(vif, offset, size, &state);
296	}
297	return count;
298	}
299
300	struct netrx_pending_operations {	202	struct netrx_pending_operations {
301	unsigned copy_prod, copy_cons;	203	unsigned copy_prod, copy_cons;
302	unsigned meta_prod, meta_cons;	204	unsigned meta_prod, meta_cons;
@@ -557,12 +459,12 @@ struct skb_cb_overlay {
557	int meta_slots_used;	459	int meta_slots_used;
558	};	460	};
559		461
560	static void xenvif_kick_thread(struct xenvif *vif)	462	void xenvif_kick_thread(struct xenvif *vif)
561	{	463	{
562	wake_up(&vif->wq);	464	wake_up(&vif->wq);
563	}	465	}
564		466
565	void xenvif_rx_action(struct xenvif *vif)	467	static void xenvif_rx_action(struct xenvif *vif)
566	{	468	{
567	s8 status;	469	s8 status;
568	u16 flags;	470	u16 flags;
@@ -571,11 +473,10 @@ void xenvif_rx_action(struct xenvif *vif)
571	struct sk_buff *skb;	473	struct sk_buff *skb;
572	LIST_HEAD(notify);	474	LIST_HEAD(notify);
573	int ret;	475	int ret;
574	int nr_frags;
575	int count;
576	unsigned long offset;	476	unsigned long offset;
577	struct skb_cb_overlay *sco;	477	struct skb_cb_overlay *sco;
578	int need_to_notify = 0;	478	bool need_to_notify = false;
		479	bool ring_full = false;
579		480
580	struct netrx_pending_operations npo = {	481	struct netrx_pending_operations npo = {
581	.copy = vif->grant_copy_op,	482	.copy = vif->grant_copy_op,
@@ -584,29 +485,47 @@ void xenvif_rx_action(struct xenvif *vif)
584		485
585	skb_queue_head_init(&rxq);	486	skb_queue_head_init(&rxq);
586		487
587	count = 0;
588
589	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {	488	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
590	vif = netdev_priv(skb->dev);	489	int max_slots_needed;
591	nr_frags = skb_shinfo(skb)->nr_frags;	490	int i;
		491
		492	/* We need a cheap worse case estimate for the number of
		493	* slots we'll use.
		494	*/
		495
		496	max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
		497	skb_headlen(skb),
		498	PAGE_SIZE);
		499	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		500	unsigned int size;
		501	size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
		502	max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE);
		503	}
		504	if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 \|\|
		505	skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
		506	max_slots_needed++;
		507
		508	/* If the skb may not fit then bail out now */
		509	if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) {
		510	skb_queue_head(&vif->rx_queue, skb);
		511	need_to_notify = true;
		512	ring_full = true;
		513	break;
		514	}
592		515
593	sco = (struct skb_cb_overlay *)skb->cb;	516	sco = (struct skb_cb_overlay *)skb->cb;
594	sco->meta_slots_used = xenvif_gop_skb(skb, &npo);	517	sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
595		518	BUG_ON(sco->meta_slots_used > max_slots_needed);
596	count += nr_frags + 1;
597		519
598	__skb_queue_tail(&rxq, skb);	520	__skb_queue_tail(&rxq, skb);
599
600	/* Filled the batch queue? */
601	/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
602	if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
603	break;
604	}	521	}
605		522
606	BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));	523	BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));
607		524
		525	vif->rx_queue_stopped = !npo.copy_prod && ring_full;
		526
608	if (!npo.copy_prod)	527	if (!npo.copy_prod)
609	return;	528	goto done;
610		529
611	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);	530	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
612	gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);	531	gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
@@ -614,8 +533,6 @@ void xenvif_rx_action(struct xenvif *vif)
614	while ((skb = __skb_dequeue(&rxq)) != NULL) {	533	while ((skb = __skb_dequeue(&rxq)) != NULL) {
615	sco = (struct skb_cb_overlay *)skb->cb;	534	sco = (struct skb_cb_overlay *)skb->cb;
616		535
617	vif = netdev_priv(skb->dev);
618
619	if ((1 << vif->meta[npo.meta_cons].gso_type) &	536	if ((1 << vif->meta[npo.meta_cons].gso_type) &
620	vif->gso_prefix_mask) {	537	vif->gso_prefix_mask) {
621	resp = RING_GET_RESPONSE(&vif->rx,	538	resp = RING_GET_RESPONSE(&vif->rx,
@@ -678,28 +595,15 @@ void xenvif_rx_action(struct xenvif *vif)
678		595
679	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);	596	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
680		597
681	if (ret)	598	need_to_notify \|= !!ret;
682	need_to_notify = 1;
683
684	xenvif_notify_tx_completion(vif);
685		599
686	npo.meta_cons += sco->meta_slots_used;	600	npo.meta_cons += sco->meta_slots_used;
687	dev_kfree_skb(skb);	601	dev_kfree_skb(skb);
688	}	602	}
689		603
		604	done:
690	if (need_to_notify)	605	if (need_to_notify)
691	notify_remote_via_irq(vif->rx_irq);	606	notify_remote_via_irq(vif->rx_irq);
692
693	/* More work to do? */
694	if (!skb_queue_empty(&vif->rx_queue))
695	xenvif_kick_thread(vif);
696	}
697
698	void xenvif_queue_tx_skb(struct xenvif vif, struct sk_buff skb)
699	{
700	skb_queue_tail(&vif->rx_queue, skb);
701
702	xenvif_kick_thread(vif);
703	}	607	}
704		608
705	void xenvif_check_rx_xenvif(struct xenvif *vif)	609	void xenvif_check_rx_xenvif(struct xenvif *vif)
@@ -1141,265 +1045,14 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1141	}	1045	}
1142		1046
1143	skb_shinfo(skb)->gso_size = gso->u.gso.size;	1047	skb_shinfo(skb)->gso_size = gso->u.gso.size;
1144		1048	/* gso_segs will be calculated later */
1145	/* Header must be checked, and gso_segs computed. */
1146	skb_shinfo(skb)->gso_type \|= SKB_GSO_DODGY;
1147	skb_shinfo(skb)->gso_segs = 0;
1148
1149	return 0;
1150	}
1151
1152	static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len,
1153	unsigned int max)
1154	{
1155	if (skb_headlen(skb) >= len)
1156	return 0;
1157
1158	/* If we need to pullup then pullup to the max, so we
1159	* won't need to do it again.
1160	*/
1161	if (max > skb->len)
1162	max = skb->len;
1163
1164	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
1165	return -ENOMEM;
1166
1167	if (skb_headlen(skb) < len)
1168	return -EPROTO;
1169		1049
1170	return 0;	1050	return 0;
1171	}	1051	}
1172		1052
1173	/* This value should be large enough to cover a tagged ethernet header plus
1174	* maximally sized IP and TCP or UDP headers.
1175	*/
1176	#define MAX_IP_HDR_LEN 128
1177
1178	static int checksum_setup_ip(struct xenvif vif, struct sk_buff skb,
1179	int recalculate_partial_csum)
1180	{
1181	unsigned int off;
1182	bool fragment;
1183	int err;
1184
1185	fragment = false;
1186
1187	err = maybe_pull_tail(skb,
1188	sizeof(struct iphdr),
1189	MAX_IP_HDR_LEN);
1190	if (err < 0)
1191	goto out;
1192
1193	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET \| IP_MF))
1194	fragment = true;
1195
1196	off = ip_hdrlen(skb);
1197
1198	err = -EPROTO;
1199
1200	if (fragment)
1201	goto out;
1202
1203	switch (ip_hdr(skb)->protocol) {
1204	case IPPROTO_TCP:
1205	err = maybe_pull_tail(skb,
1206	off + sizeof(struct tcphdr),
1207	MAX_IP_HDR_LEN);
1208	if (err < 0)
1209	goto out;
1210
1211	if (!skb_partial_csum_set(skb, off,
1212	offsetof(struct tcphdr, check))) {
1213	err = -EPROTO;
1214	goto out;
1215	}
1216
1217	if (recalculate_partial_csum)
1218	tcp_hdr(skb)->check =
1219	~csum_tcpudp_magic(ip_hdr(skb)->saddr,
1220	ip_hdr(skb)->daddr,
1221	skb->len - off,
1222	IPPROTO_TCP, 0);
1223	break;
1224	case IPPROTO_UDP:
1225	err = maybe_pull_tail(skb,
1226	off + sizeof(struct udphdr),
1227	MAX_IP_HDR_LEN);
1228	if (err < 0)
1229	goto out;
1230
1231	if (!skb_partial_csum_set(skb, off,
1232	offsetof(struct udphdr, check))) {
1233	err = -EPROTO;
1234	goto out;
1235	}
1236
1237	if (recalculate_partial_csum)
1238	udp_hdr(skb)->check =
1239	~csum_tcpudp_magic(ip_hdr(skb)->saddr,
1240	ip_hdr(skb)->daddr,
1241	skb->len - off,
1242	IPPROTO_UDP, 0);
1243	break;
1244	default:
1245	goto out;
1246	}
1247
1248	err = 0;
1249
1250	out:
1251	return err;
1252	}
1253
1254	/* This value should be large enough to cover a tagged ethernet header plus
1255	* an IPv6 header, all options, and a maximal TCP or UDP header.
1256	*/
1257	#define MAX_IPV6_HDR_LEN 256
1258
1259	#define OPT_HDR(type, skb, off) \
1260	(type *)(skb_network_header(skb) + (off))
1261
1262	static int checksum_setup_ipv6(struct xenvif vif, struct sk_buff skb,
1263	int recalculate_partial_csum)
1264	{
1265	int err;
1266	u8 nexthdr;
1267	unsigned int off;
1268	unsigned int len;
1269	bool fragment;
1270	bool done;
1271
1272	fragment = false;
1273	done = false;
1274
1275	off = sizeof(struct ipv6hdr);
1276
1277	err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
1278	if (err < 0)
1279	goto out;
1280
1281	nexthdr = ipv6_hdr(skb)->nexthdr;
1282
1283	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
1284	while (off <= len && !done) {
1285	switch (nexthdr) {
1286	case IPPROTO_DSTOPTS:
1287	case IPPROTO_HOPOPTS:
1288	case IPPROTO_ROUTING: {
1289	struct ipv6_opt_hdr *hp;
1290
1291	err = maybe_pull_tail(skb,
1292	off +
1293	sizeof(struct ipv6_opt_hdr),
1294	MAX_IPV6_HDR_LEN);
1295	if (err < 0)
1296	goto out;
1297
1298	hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
1299	nexthdr = hp->nexthdr;
1300	off += ipv6_optlen(hp);
1301	break;
1302	}
1303	case IPPROTO_AH: {
1304	struct ip_auth_hdr *hp;
1305
1306	err = maybe_pull_tail(skb,
1307	off +
1308	sizeof(struct ip_auth_hdr),
1309	MAX_IPV6_HDR_LEN);
1310	if (err < 0)
1311	goto out;
1312
1313	hp = OPT_HDR(struct ip_auth_hdr, skb, off);
1314	nexthdr = hp->nexthdr;
1315	off += ipv6_authlen(hp);
1316	break;
1317	}
1318	case IPPROTO_FRAGMENT: {
1319	struct frag_hdr *hp;
1320
1321	err = maybe_pull_tail(skb,
1322	off +
1323	sizeof(struct frag_hdr),
1324	MAX_IPV6_HDR_LEN);
1325	if (err < 0)
1326	goto out;
1327
1328	hp = OPT_HDR(struct frag_hdr, skb, off);
1329
1330	if (hp->frag_off & htons(IP6_OFFSET \| IP6_MF))
1331	fragment = true;
1332
1333	nexthdr = hp->nexthdr;
1334	off += sizeof(struct frag_hdr);
1335	break;
1336	}
1337	default:
1338	done = true;
1339	break;
1340	}
1341	}
1342
1343	err = -EPROTO;
1344
1345	if (!done \|\| fragment)
1346	goto out;
1347
1348	switch (nexthdr) {
1349	case IPPROTO_TCP:
1350	err = maybe_pull_tail(skb,
1351	off + sizeof(struct tcphdr),
1352	MAX_IPV6_HDR_LEN);
1353	if (err < 0)
1354	goto out;
1355
1356	if (!skb_partial_csum_set(skb, off,
1357	offsetof(struct tcphdr, check))) {
1358	err = -EPROTO;
1359	goto out;
1360	}
1361
1362	if (recalculate_partial_csum)
1363	tcp_hdr(skb)->check =
1364	~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
1365	&ipv6_hdr(skb)->daddr,
1366	skb->len - off,
1367	IPPROTO_TCP, 0);
1368	break;
1369	case IPPROTO_UDP:
1370	err = maybe_pull_tail(skb,
1371	off + sizeof(struct udphdr),
1372	MAX_IPV6_HDR_LEN);
1373	if (err < 0)
1374	goto out;
1375
1376	if (!skb_partial_csum_set(skb, off,
1377	offsetof(struct udphdr, check))) {
1378	err = -EPROTO;
1379	goto out;
1380	}
1381
1382	if (recalculate_partial_csum)
1383	udp_hdr(skb)->check =
1384	~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
1385	&ipv6_hdr(skb)->daddr,
1386	skb->len - off,
1387	IPPROTO_UDP, 0);
1388	break;
1389	default:
1390	goto out;
1391	}
1392
1393	err = 0;
1394
1395	out:
1396	return err;
1397	}
1398
1399	static int checksum_setup(struct xenvif vif, struct sk_buff skb)	1053	static int checksum_setup(struct xenvif vif, struct sk_buff skb)
1400	{	1054	{
1401	int err = -EPROTO;	1055	bool recalculate_partial_csum = false;
1402	int recalculate_partial_csum = 0;
1403		1056
1404	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy	1057	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1405	* peers can fail to set NETRXF_csum_blank when sending a GSO	1058	* peers can fail to set NETRXF_csum_blank when sending a GSO
@@ -1409,19 +1062,14 @@ static int checksum_setup(struct xenvif vif, struct sk_buff skb)
1409	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {	1062	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1410	vif->rx_gso_checksum_fixup++;	1063	vif->rx_gso_checksum_fixup++;
1411	skb->ip_summed = CHECKSUM_PARTIAL;	1064	skb->ip_summed = CHECKSUM_PARTIAL;
1412	recalculate_partial_csum = 1;	1065	recalculate_partial_csum = true;
1413	}	1066	}
1414		1067
1415	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */	1068	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1416	if (skb->ip_summed != CHECKSUM_PARTIAL)	1069	if (skb->ip_summed != CHECKSUM_PARTIAL)
1417	return 0;	1070	return 0;
1418		1071
1419	if (skb->protocol == htons(ETH_P_IP))	1072	return skb_checksum_setup(skb, recalculate_partial_csum);
1420	err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
1421	else if (skb->protocol == htons(ETH_P_IPV6))
1422	err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
1423
1424	return err;
1425	}	1073	}
1426		1074
1427	static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)	1075	static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
@@ -1687,6 +1335,20 @@ static int xenvif_tx_submit(struct xenvif *vif)
1687		1335
1688	skb_probe_transport_header(skb, 0);	1336	skb_probe_transport_header(skb, 0);
1689		1337
		1338	/* If the packet is GSO then we will have just set up the
		1339	* transport header offset in checksum_setup so it's now
		1340	* straightforward to calculate gso_segs.
		1341	*/
		1342	if (skb_is_gso(skb)) {
		1343	int mss = skb_shinfo(skb)->gso_size;
		1344	int hdrlen = skb_transport_header(skb) -
		1345	skb_mac_header(skb) +
		1346	tcp_hdrlen(skb);
		1347
		1348	skb_shinfo(skb)->gso_segs =
		1349	DIV_ROUND_UP(skb->len - hdrlen, mss);
		1350	}
		1351
1690	vif->dev->stats.rx_bytes += skb->len;	1352	vif->dev->stats.rx_bytes += skb->len;
1691	vif->dev->stats.rx_packets++;	1353	vif->dev->stats.rx_packets++;
1692		1354
@@ -1811,7 +1473,8 @@ static struct xen_netif_rx_response make_rx_response(struct xenvif vif,
1811		1473
1812	static inline int rx_work_todo(struct xenvif *vif)	1474	static inline int rx_work_todo(struct xenvif *vif)
1813	{	1475	{
1814	return !skb_queue_empty(&vif->rx_queue);	1476	return (!skb_queue_empty(&vif->rx_queue) && !vif->rx_queue_stopped) \|\|
		1477	vif->rx_event;
1815	}	1478	}
1816		1479
1817	static inline int tx_work_todo(struct xenvif *vif)	1480	static inline int tx_work_todo(struct xenvif *vif)
@@ -1861,8 +1524,6 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
1861	rxs = (struct xen_netif_rx_sring *)addr;	1524	rxs = (struct xen_netif_rx_sring *)addr;
1862	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);	1525	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
1863		1526
1864	vif->rx_req_cons_peek = 0;
1865
1866	return 0;	1527	return 0;
1867		1528
1868	err:	1529	err:
@@ -1870,9 +1531,24 @@ err:
1870	return err;	1531	return err;
1871	}	1532	}
1872		1533
		1534	void xenvif_stop_queue(struct xenvif *vif)
		1535	{
		1536	if (!vif->can_queue)
		1537	return;
		1538
		1539	netif_stop_queue(vif->dev);
		1540	}
		1541
		1542	static void xenvif_start_queue(struct xenvif *vif)
		1543	{
		1544	if (xenvif_schedulable(vif))
		1545	netif_wake_queue(vif->dev);
		1546	}
		1547
1873	int xenvif_kthread(void *data)	1548	int xenvif_kthread(void *data)
1874	{	1549	{
1875	struct xenvif *vif = data;	1550	struct xenvif *vif = data;
		1551	struct sk_buff *skb;
1876		1552
1877	while (!kthread_should_stop()) {	1553	while (!kthread_should_stop()) {
1878	wait_event_interruptible(vif->wq,	1554	wait_event_interruptible(vif->wq,
@@ -1881,12 +1557,22 @@ int xenvif_kthread(void *data)
1881	if (kthread_should_stop())	1557	if (kthread_should_stop())
1882	break;	1558	break;
1883		1559
1884	if (rx_work_todo(vif))	1560	if (!skb_queue_empty(&vif->rx_queue))
1885	xenvif_rx_action(vif);	1561	xenvif_rx_action(vif);
1886		1562
		1563	vif->rx_event = false;
		1564
		1565	if (skb_queue_empty(&vif->rx_queue) &&
		1566	netif_queue_stopped(vif->dev))
		1567	xenvif_start_queue(vif);
		1568
1887	cond_resched();	1569	cond_resched();
1888	}	1570	}
1889		1571
		1572	/* Bin any remaining skbs */
		1573	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL)
		1574	dev_kfree_skb(skb);
		1575
1890	return 0;	1576	return 0;
1891	}	1577	}
1892		1578