aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback
diff options
context:
space:
mode:
authorZoltan Kiss <zoltan.kiss@citrix.com>2014-03-06 16:48:26 -0500
committerDavid S. Miller <davem@davemloft.net>2014-03-07 15:56:35 -0500
commitf53c3fe8dad725b014e9c7682720d8e3e2a8a5b3 (patch)
tree11cb77466fbb32cd1ca6f84a5ea0daca233a49c0 /drivers/net/xen-netback
parent3e2234b3149f66bc4be2343a3a0f637d922e4a36 (diff)
xen-netback: Introduce TX grant mapping
This patch introduces grant mapping on netback TX path. It replaces grant copy operations, ditching grant copy coalescing along the way. Another solution for copy coalescing is introduced in "xen-netback: Handle guests with too many frags", older guests and Windows can broke before that patch applies. There is a callback (xenvif_zerocopy_callback) from core stack to release the slots back to the guests when kfree_skb or skb_orphan_frags called. It feeds a separate dealloc thread, as scheduling NAPI instance from there is inefficient, therefore we can't do dealloc from the instance. Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback')
-rw-r--r--drivers/net/xen-netback/common.h39
-rw-r--r--drivers/net/xen-netback/interface.c65
-rw-r--r--drivers/net/xen-netback/netback.c432
3 files changed, 371 insertions, 165 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 8f264df8818a..5a991266a394 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -79,6 +79,17 @@ struct pending_tx_info {
79 * if it is head of one or more tx 79 * if it is head of one or more tx
80 * reqs 80 * reqs
81 */ 81 */
82 /* Callback data for released SKBs. The callback is always
83 * xenvif_zerocopy_callback, desc contains the pending_idx, which is
84 * also an index in pending_tx_info array. It is initialized in
85 * xenvif_alloc and it never changes.
86 * skb_shinfo(skb)->destructor_arg points to the first mapped slot's
87 * callback_struct in this array of struct pending_tx_info's, then ctx
88 * to the next, or NULL if there is no more slot for this skb.
89 * ubuf_to_vif is a helper which finds the struct xenvif from a pointer
90 * to this field.
91 */
92 struct ubuf_info callback_struct;
82}; 93};
83 94
84#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) 95#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
@@ -135,13 +146,31 @@ struct xenvif {
135 pending_ring_idx_t pending_cons; 146 pending_ring_idx_t pending_cons;
136 u16 pending_ring[MAX_PENDING_REQS]; 147 u16 pending_ring[MAX_PENDING_REQS];
137 struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; 148 struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
149 grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
138 150
139 /* Coalescing tx requests before copying makes number of grant 151 /* Coalescing tx requests before copying makes number of grant
140 * copy ops greater or equal to number of slots required. In 152 * copy ops greater or equal to number of slots required. In
141 * worst case a tx request consumes 2 gnttab_copy. 153 * worst case a tx request consumes 2 gnttab_copy.
142 */ 154 */
143 struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; 155 struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
144 156 struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
157 struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
158 /* passed to gnttab_[un]map_refs with pages under (un)mapping */
159 struct page *pages_to_map[MAX_PENDING_REQS];
160 struct page *pages_to_unmap[MAX_PENDING_REQS];
161
162 /* This prevents zerocopy callbacks to race over dealloc_ring */
163 spinlock_t callback_lock;
164 /* This prevents dealloc thread and NAPI instance to race over response
165 * creation and pending_ring in xenvif_idx_release. In xenvif_tx_err
166 * it only protect response creation
167 */
168 spinlock_t response_lock;
169 pending_ring_idx_t dealloc_prod;
170 pending_ring_idx_t dealloc_cons;
171 u16 dealloc_ring[MAX_PENDING_REQS];
172 struct task_struct *dealloc_task;
173 wait_queue_head_t dealloc_wq;
145 174
146 /* Use kthread for guest RX */ 175 /* Use kthread for guest RX */
147 struct task_struct *task; 176 struct task_struct *task;
@@ -228,6 +257,8 @@ int xenvif_tx_action(struct xenvif *vif, int budget);
228int xenvif_kthread_guest_rx(void *data); 257int xenvif_kthread_guest_rx(void *data);
229void xenvif_kick_thread(struct xenvif *vif); 258void xenvif_kick_thread(struct xenvif *vif);
230 259
260int xenvif_dealloc_kthread(void *data);
261
231/* Determine whether the needed number of slots (req) are available, 262/* Determine whether the needed number of slots (req) are available,
232 * and set req_event if not. 263 * and set req_event if not.
233 */ 264 */
@@ -235,6 +266,12 @@ bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);
235 266
236void xenvif_stop_queue(struct xenvif *vif); 267void xenvif_stop_queue(struct xenvif *vif);
237 268
269/* Callback from stack when TX packet can be released */
270void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
271
272/* Unmap a pending page and release it back to the guest */
273void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx);
274
238static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) 275static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
239{ 276{
240 return MAX_PENDING_REQS - 277 return MAX_PENDING_REQS -
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index bc32627a22cb..1fe9fe523cc8 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,6 +38,7 @@
38 38
39#include <xen/events.h> 39#include <xen/events.h>
40#include <asm/xen/hypercall.h> 40#include <asm/xen/hypercall.h>
41#include <xen/balloon.h>
41 42
42#define XENVIF_QUEUE_LENGTH 32 43#define XENVIF_QUEUE_LENGTH 32
43#define XENVIF_NAPI_WEIGHT 64 44#define XENVIF_NAPI_WEIGHT 64
@@ -87,7 +88,8 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
87 local_irq_save(flags); 88 local_irq_save(flags);
88 89
89 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); 90 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
90 if (!more_to_do) 91 if (!(more_to_do &&
92 xenvif_tx_pending_slots_available(vif)))
91 __napi_complete(napi); 93 __napi_complete(napi);
92 94
93 local_irq_restore(flags); 95 local_irq_restore(flags);
@@ -121,7 +123,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
121 BUG_ON(skb->dev != dev); 123 BUG_ON(skb->dev != dev);
122 124
123 /* Drop the packet if vif is not ready */ 125 /* Drop the packet if vif is not ready */
124 if (vif->task == NULL || !xenvif_schedulable(vif)) 126 if (vif->task == NULL ||
127 vif->dealloc_task == NULL ||
128 !xenvif_schedulable(vif))
125 goto drop; 129 goto drop;
126 130
127 /* At best we'll need one slot for the header and one for each 131 /* At best we'll need one slot for the header and one for each
@@ -343,8 +347,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
343 vif->pending_prod = MAX_PENDING_REQS; 347 vif->pending_prod = MAX_PENDING_REQS;
344 for (i = 0; i < MAX_PENDING_REQS; i++) 348 for (i = 0; i < MAX_PENDING_REQS; i++)
345 vif->pending_ring[i] = i; 349 vif->pending_ring[i] = i;
346 for (i = 0; i < MAX_PENDING_REQS; i++) 350 spin_lock_init(&vif->callback_lock);
347 vif->mmap_pages[i] = NULL; 351 spin_lock_init(&vif->response_lock);
352 /* If ballooning is disabled, this will consume real memory, so you
353 * better enable it. The long term solution would be to use just a
354 * bunch of valid page descriptors, without dependency on ballooning
355 */
356 err = alloc_xenballooned_pages(MAX_PENDING_REQS,
357 vif->mmap_pages,
358 false);
359 if (err) {
360 netdev_err(dev, "Could not reserve mmap_pages\n");
361 return ERR_PTR(-ENOMEM);
362 }
363 for (i = 0; i < MAX_PENDING_REQS; i++) {
364 vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
365 { .callback = xenvif_zerocopy_callback,
366 .ctx = NULL,
367 .desc = i };
368 vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
369 }
348 370
349 /* 371 /*
350 * Initialise a dummy MAC address. We choose the numerically 372 * Initialise a dummy MAC address. We choose the numerically
@@ -382,12 +404,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
382 404
383 BUG_ON(vif->tx_irq); 405 BUG_ON(vif->tx_irq);
384 BUG_ON(vif->task); 406 BUG_ON(vif->task);
407 BUG_ON(vif->dealloc_task);
385 408
386 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); 409 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
387 if (err < 0) 410 if (err < 0)
388 goto err; 411 goto err;
389 412
390 init_waitqueue_head(&vif->wq); 413 init_waitqueue_head(&vif->wq);
414 init_waitqueue_head(&vif->dealloc_wq);
391 415
392 if (tx_evtchn == rx_evtchn) { 416 if (tx_evtchn == rx_evtchn) {
393 /* feature-split-event-channels == 0 */ 417 /* feature-split-event-channels == 0 */
@@ -431,6 +455,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
431 455
432 vif->task = task; 456 vif->task = task;
433 457
458 task = kthread_create(xenvif_dealloc_kthread,
459 (void *)vif, "%s-dealloc", vif->dev->name);
460 if (IS_ERR(task)) {
461 pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
462 err = PTR_ERR(task);
463 goto err_rx_unbind;
464 }
465
466 vif->dealloc_task = task;
467
434 rtnl_lock(); 468 rtnl_lock();
435 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) 469 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
436 dev_set_mtu(vif->dev, ETH_DATA_LEN); 470 dev_set_mtu(vif->dev, ETH_DATA_LEN);
@@ -441,6 +475,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
441 rtnl_unlock(); 475 rtnl_unlock();
442 476
443 wake_up_process(vif->task); 477 wake_up_process(vif->task);
478 wake_up_process(vif->dealloc_task);
444 479
445 return 0; 480 return 0;
446 481
@@ -478,6 +513,11 @@ void xenvif_disconnect(struct xenvif *vif)
478 vif->task = NULL; 513 vif->task = NULL;
479 } 514 }
480 515
516 if (vif->dealloc_task) {
517 kthread_stop(vif->dealloc_task);
518 vif->dealloc_task = NULL;
519 }
520
481 if (vif->tx_irq) { 521 if (vif->tx_irq) {
482 if (vif->tx_irq == vif->rx_irq) 522 if (vif->tx_irq == vif->rx_irq)
483 unbind_from_irqhandler(vif->tx_irq, vif); 523 unbind_from_irqhandler(vif->tx_irq, vif);
@@ -493,6 +533,23 @@ void xenvif_disconnect(struct xenvif *vif)
493 533
494void xenvif_free(struct xenvif *vif) 534void xenvif_free(struct xenvif *vif)
495{ 535{
536 int i, unmap_timeout = 0;
537
538 for (i = 0; i < MAX_PENDING_REQS; ++i) {
539 if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
540 unmap_timeout++;
541 schedule_timeout(msecs_to_jiffies(1000));
542 if (unmap_timeout > 9 &&
543 net_ratelimit())
544 netdev_err(vif->dev,
545 "Page still granted! Index: %x\n",
546 i);
547 i = -1;
548 }
549 }
550
551 free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
552
496 netif_napi_del(&vif->napi); 553 netif_napi_del(&vif->napi);
497 554
498 unregister_netdev(vif->dev); 555 unregister_netdev(vif->dev);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index e9391badfa4a..cb29134147d1 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -101,10 +101,18 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
101 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 101 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
102} 102}
103 103
104/* Find the containing VIF's structure from a pointer in pending_tx_info array
105 */
104static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf) 106static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf)
105{ 107{
106 return NULL; 108 u16 pending_idx = ubuf->desc;
109 struct pending_tx_info *temp =
110 container_of(ubuf, struct pending_tx_info, callback_struct);
111 return container_of(temp - pending_idx,
112 struct xenvif,
113 pending_tx_info[0]);
107} 114}
115
108/* This is a miniumum size for the linear area to avoid lots of 116/* This is a miniumum size for the linear area to avoid lots of
109 * calls to __pskb_pull_tail() as we set up checksum offsets. The 117 * calls to __pskb_pull_tail() as we set up checksum offsets. The
110 * value 128 was chosen as it covers all IPv4 and most likely 118 * value 128 was chosen as it covers all IPv4 and most likely
@@ -665,9 +673,12 @@ static void xenvif_tx_err(struct xenvif *vif,
665 struct xen_netif_tx_request *txp, RING_IDX end) 673 struct xen_netif_tx_request *txp, RING_IDX end)
666{ 674{
667 RING_IDX cons = vif->tx.req_cons; 675 RING_IDX cons = vif->tx.req_cons;
676 unsigned long flags;
668 677
669 do { 678 do {
679 spin_lock_irqsave(&vif->response_lock, flags);
670 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); 680 make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
681 spin_unlock_irqrestore(&vif->response_lock, flags);
671 if (cons == end) 682 if (cons == end)
672 break; 683 break;
673 txp = RING_GET_REQUEST(&vif->tx, cons++); 684 txp = RING_GET_REQUEST(&vif->tx, cons++);
@@ -799,10 +810,24 @@ struct xenvif_tx_cb {
799 810
800#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) 811#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
801 812
802static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif, 813static inline void xenvif_tx_create_gop(struct xenvif *vif,
803 struct sk_buff *skb, 814 u16 pending_idx,
804 struct xen_netif_tx_request *txp, 815 struct xen_netif_tx_request *txp,
805 struct gnttab_copy *gop) 816 struct gnttab_map_grant_ref *gop)
817{
818 vif->pages_to_map[gop-vif->tx_map_ops] = vif->mmap_pages[pending_idx];
819 gnttab_set_map_op(gop, idx_to_kaddr(vif, pending_idx),
820 GNTMAP_host_map | GNTMAP_readonly,
821 txp->gref, vif->domid);
822
823 memcpy(&vif->pending_tx_info[pending_idx].req, txp,
824 sizeof(*txp));
825}
826
827static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
828 struct sk_buff *skb,
829 struct xen_netif_tx_request *txp,
830 struct gnttab_map_grant_ref *gop)
806{ 831{
807 struct skb_shared_info *shinfo = skb_shinfo(skb); 832 struct skb_shared_info *shinfo = skb_shinfo(skb);
808 skb_frag_t *frags = shinfo->frags; 833 skb_frag_t *frags = shinfo->frags;
@@ -823,83 +848,12 @@ static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
823 /* Skip first skb fragment if it is on same page as header fragment. */ 848 /* Skip first skb fragment if it is on same page as header fragment. */
824 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 849 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
825 850
826 /* Coalesce tx requests, at this point the packet passed in 851 for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
827 * should be <= 64K. Any packets larger than 64K have been 852 shinfo->nr_frags++, txp++, gop++) {
828 * handled in xenvif_count_requests().
829 */
830 for (shinfo->nr_frags = slot = start; slot < nr_slots;
831 shinfo->nr_frags++) {
832 struct pending_tx_info *pending_tx_info =
833 vif->pending_tx_info;
834
835 page = alloc_page(GFP_ATOMIC|__GFP_COLD);
836 if (!page)
837 goto err;
838
839 dst_offset = 0;
840 first = NULL;
841 while (dst_offset < PAGE_SIZE && slot < nr_slots) {
842 gop->flags = GNTCOPY_source_gref;
843
844 gop->source.u.ref = txp->gref;
845 gop->source.domid = vif->domid;
846 gop->source.offset = txp->offset;
847
848 gop->dest.domid = DOMID_SELF;
849
850 gop->dest.offset = dst_offset;
851 gop->dest.u.gmfn = virt_to_mfn(page_address(page));
852
853 if (dst_offset + txp->size > PAGE_SIZE) {
854 /* This page can only merge a portion
855 * of tx request. Do not increment any
856 * pointer / counter here. The txp
857 * will be dealt with in future
858 * rounds, eventually hitting the
859 * `else` branch.
860 */
861 gop->len = PAGE_SIZE - dst_offset;
862 txp->offset += gop->len;
863 txp->size -= gop->len;
864 dst_offset += gop->len; /* quit loop */
865 } else {
866 /* This tx request can be merged in the page */
867 gop->len = txp->size;
868 dst_offset += gop->len;
869
870 index = pending_index(vif->pending_cons++); 853 index = pending_index(vif->pending_cons++);
871
872 pending_idx = vif->pending_ring[index]; 854 pending_idx = vif->pending_ring[index];
873 855 xenvif_tx_create_gop(vif, pending_idx, txp, gop);
874 memcpy(&pending_tx_info[pending_idx].req, txp, 856 frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
875 sizeof(*txp));
876
877 /* Poison these fields, corresponding
878 * fields for head tx req will be set
879 * to correct values after the loop.
880 */
881 vif->mmap_pages[pending_idx] = (void *)(~0UL);
882 pending_tx_info[pending_idx].head =
883 INVALID_PENDING_RING_IDX;
884
885 if (!first) {
886 first = &pending_tx_info[pending_idx];
887 start_idx = index;
888 head_idx = pending_idx;
889 }
890
891 txp++;
892 slot++;
893 }
894
895 gop++;
896 }
897
898 first->req.offset = 0;
899 first->req.size = dst_offset;
900 first->head = start_idx;
901 vif->mmap_pages[head_idx] = page;
902 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
903 } 857 }
904 858
905 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); 859 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
@@ -919,11 +873,38 @@ err:
919 return NULL; 873 return NULL;
920} 874}
921 875
876static inline void xenvif_grant_handle_set(struct xenvif *vif,
877 u16 pending_idx,
878 grant_handle_t handle)
879{
880 if (unlikely(vif->grant_tx_handle[pending_idx] !=
881 NETBACK_INVALID_HANDLE)) {
882 netdev_err(vif->dev,
883 "Trying to overwrite active handle! pending_idx: %x\n",
884 pending_idx);
885 BUG();
886 }
887 vif->grant_tx_handle[pending_idx] = handle;
888}
889
890static inline void xenvif_grant_handle_reset(struct xenvif *vif,
891 u16 pending_idx)
892{
893 if (unlikely(vif->grant_tx_handle[pending_idx] ==
894 NETBACK_INVALID_HANDLE)) {
895 netdev_err(vif->dev,
896 "Trying to unmap invalid handle! pending_idx: %x\n",
897 pending_idx);
898 BUG();
899 }
900 vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
901}
902
922static int xenvif_tx_check_gop(struct xenvif *vif, 903static int xenvif_tx_check_gop(struct xenvif *vif,
923 struct sk_buff *skb, 904 struct sk_buff *skb,
924 struct gnttab_copy **gopp) 905 struct gnttab_map_grant_ref **gopp)
925{ 906{
926 struct gnttab_copy *gop = *gopp; 907 struct gnttab_map_grant_ref *gop = *gopp;
927 u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 908 u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
928 struct skb_shared_info *shinfo = skb_shinfo(skb); 909 struct skb_shared_info *shinfo = skb_shinfo(skb);
929 struct pending_tx_info *tx_info; 910 struct pending_tx_info *tx_info;
@@ -935,6 +916,8 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
935 err = gop->status; 916 err = gop->status;
936 if (unlikely(err)) 917 if (unlikely(err))
937 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); 918 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
919 else
920 xenvif_grant_handle_set(vif, pending_idx , gop->handle);
938 921
939 /* Skip first skb fragment if it is on same page as header fragment. */ 922 /* Skip first skb fragment if it is on same page as header fragment. */
940 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 923 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
@@ -948,18 +931,13 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
948 head = tx_info->head; 931 head = tx_info->head;
949 932
950 /* Check error status: if okay then remember grant handle. */ 933 /* Check error status: if okay then remember grant handle. */
951 do {
952 newerr = (++gop)->status; 934 newerr = (++gop)->status;
953 if (newerr)
954 break;
955 peek = vif->pending_ring[pending_index(++head)];
956 } while (!pending_tx_is_head(vif, peek));
957 935
958 if (likely(!newerr)) { 936 if (likely(!newerr)) {
937 xenvif_grant_handle_set(vif, pending_idx , gop->handle);
959 /* Had a previous error? Invalidate this fragment. */ 938 /* Had a previous error? Invalidate this fragment. */
960 if (unlikely(err)) 939 if (unlikely(err))
961 xenvif_idx_release(vif, pending_idx, 940 xenvif_idx_unmap(vif, pending_idx);
962 XEN_NETIF_RSP_OKAY);
963 continue; 941 continue;
964 } 942 }
965 943
@@ -972,11 +950,10 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
972 950
973 /* First error: invalidate header and preceding fragments. */ 951 /* First error: invalidate header and preceding fragments. */
974 pending_idx = XENVIF_TX_CB(skb)->pending_idx; 952 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
975 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); 953 xenvif_idx_unmap(vif, pending_idx);
976 for (j = start; j < i; j++) { 954 for (j = start; j < i; j++) {
977 pending_idx = frag_get_pending_idx(&shinfo->frags[j]); 955 pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
978 xenvif_idx_release(vif, pending_idx, 956 xenvif_idx_unmap(vif, pending_idx);
979 XEN_NETIF_RSP_OKAY);
980 } 957 }
981 958
982 /* Remember the error: invalidate all subsequent fragments. */ 959 /* Remember the error: invalidate all subsequent fragments. */
@@ -992,6 +969,10 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
992 struct skb_shared_info *shinfo = skb_shinfo(skb); 969 struct skb_shared_info *shinfo = skb_shinfo(skb);
993 int nr_frags = shinfo->nr_frags; 970 int nr_frags = shinfo->nr_frags;
994 int i; 971 int i;
972 u16 prev_pending_idx = INVALID_PENDING_IDX;
973
974 if (skb_shinfo(skb)->destructor_arg)
975 prev_pending_idx = XENVIF_TX_CB(skb)->pending_idx;
995 976
996 for (i = 0; i < nr_frags; i++) { 977 for (i = 0; i < nr_frags; i++) {
997 skb_frag_t *frag = shinfo->frags + i; 978 skb_frag_t *frag = shinfo->frags + i;
@@ -1001,6 +982,17 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
1001 982
1002 pending_idx = frag_get_pending_idx(frag); 983 pending_idx = frag_get_pending_idx(frag);
1003 984
985 /* If this is not the first frag, chain it to the previous*/
986 if (unlikely(prev_pending_idx == INVALID_PENDING_IDX))
987 skb_shinfo(skb)->destructor_arg =
988 &vif->pending_tx_info[pending_idx].callback_struct;
989 else if (likely(pending_idx != prev_pending_idx))
990 vif->pending_tx_info[prev_pending_idx].callback_struct.ctx =
991 &(vif->pending_tx_info[pending_idx].callback_struct);
992
993 vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
994 prev_pending_idx = pending_idx;
995
1004 txp = &vif->pending_tx_info[pending_idx].req; 996 txp = &vif->pending_tx_info[pending_idx].req;
1005 page = virt_to_page(idx_to_kaddr(vif, pending_idx)); 997 page = virt_to_page(idx_to_kaddr(vif, pending_idx));
1006 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); 998 __skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
@@ -1008,10 +1000,15 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
1008 skb->data_len += txp->size; 1000 skb->data_len += txp->size;
1009 skb->truesize += txp->size; 1001 skb->truesize += txp->size;
1010 1002
1011 /* Take an extra reference to offset xenvif_idx_release */ 1003 /* Take an extra reference to offset network stack's put_page */
1012 get_page(vif->mmap_pages[pending_idx]); 1004 get_page(vif->mmap_pages[pending_idx]);
1013 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
1014 } 1005 }
1006 /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1007 * overlaps with "index", and "mapping" is not set. I think mapping
1008 * should be set. If delivered to local stack, it would drop this
1009 * skb in sk_filter unless the socket has the right to use it.
1010 */
1011 skb->pfmemalloc = false;
1015} 1012}
1016 1013
1017static int xenvif_get_extras(struct xenvif *vif, 1014static int xenvif_get_extras(struct xenvif *vif,
@@ -1131,7 +1128,7 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1131 1128
1132static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) 1129static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
1133{ 1130{
1134 struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop; 1131 struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop;
1135 struct sk_buff *skb; 1132 struct sk_buff *skb;
1136 int ret; 1133 int ret;
1137 1134
@@ -1238,30 +1235,10 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
1238 } 1235 }
1239 } 1236 }
1240 1237
1241 /* XXX could copy straight to head */ 1238 xenvif_tx_create_gop(vif, pending_idx, &txreq, gop);
1242 page = xenvif_alloc_page(vif, pending_idx);
1243 if (!page) {
1244 kfree_skb(skb);
1245 xenvif_tx_err(vif, &txreq, idx);
1246 break;
1247 }
1248
1249 gop->source.u.ref = txreq.gref;
1250 gop->source.domid = vif->domid;
1251 gop->source.offset = txreq.offset;
1252
1253 gop->dest.u.gmfn = virt_to_mfn(page_address(page));
1254 gop->dest.domid = DOMID_SELF;
1255 gop->dest.offset = txreq.offset;
1256
1257 gop->len = txreq.size;
1258 gop->flags = GNTCOPY_source_gref;
1259 1239
1260 gop++; 1240 gop++;
1261 1241
1262 memcpy(&vif->pending_tx_info[pending_idx].req,
1263 &txreq, sizeof(txreq));
1264 vif->pending_tx_info[pending_idx].head = index;
1265 XENVIF_TX_CB(skb)->pending_idx = pending_idx; 1242 XENVIF_TX_CB(skb)->pending_idx = pending_idx;
1266 1243
1267 __skb_put(skb, data_len); 1244 __skb_put(skb, data_len);
@@ -1290,17 +1267,17 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
1290 1267
1291 vif->tx.req_cons = idx; 1268 vif->tx.req_cons = idx;
1292 1269
1293 if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) 1270 if ((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops))
1294 break; 1271 break;
1295 } 1272 }
1296 1273
1297 return gop - vif->tx_copy_ops; 1274 return gop - vif->tx_map_ops;
1298} 1275}
1299 1276
1300 1277
1301static int xenvif_tx_submit(struct xenvif *vif) 1278static int xenvif_tx_submit(struct xenvif *vif)
1302{ 1279{
1303 struct gnttab_copy *gop = vif->tx_copy_ops; 1280 struct gnttab_map_grant_ref *gop = vif->tx_map_ops;
1304 struct sk_buff *skb; 1281 struct sk_buff *skb;
1305 int work_done = 0; 1282 int work_done = 0;
1306 1283
@@ -1324,14 +1301,17 @@ static int xenvif_tx_submit(struct xenvif *vif)
1324 memcpy(skb->data, 1301 memcpy(skb->data,
1325 (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset), 1302 (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
1326 data_len); 1303 data_len);
1304 vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
1327 if (data_len < txp->size) { 1305 if (data_len < txp->size) {
1328 /* Append the packet payload as a fragment. */ 1306 /* Append the packet payload as a fragment. */
1329 txp->offset += data_len; 1307 txp->offset += data_len;
1330 txp->size -= data_len; 1308 txp->size -= data_len;
1309 skb_shinfo(skb)->destructor_arg =
1310 &vif->pending_tx_info[pending_idx].callback_struct;
1331 } else { 1311 } else {
1332 /* Schedule a response immediately. */ 1312 /* Schedule a response immediately. */
1333 xenvif_idx_release(vif, pending_idx, 1313 skb_shinfo(skb)->destructor_arg = NULL;
1334 XEN_NETIF_RSP_OKAY); 1314 xenvif_idx_unmap(vif, pending_idx);
1335 } 1315 }
1336 1316
1337 if (txp->flags & XEN_NETTXF_csum_blank) 1317 if (txp->flags & XEN_NETTXF_csum_blank)
@@ -1353,6 +1333,9 @@ static int xenvif_tx_submit(struct xenvif *vif)
1353 if (checksum_setup(vif, skb)) { 1333 if (checksum_setup(vif, skb)) {
1354 netdev_dbg(vif->dev, 1334 netdev_dbg(vif->dev,
1355 "Can't setup checksum in net_tx_action\n"); 1335 "Can't setup checksum in net_tx_action\n");
1336 /* We have to set this flag to trigger the callback */
1337 if (skb_shinfo(skb)->destructor_arg)
1338 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1356 kfree_skb(skb); 1339 kfree_skb(skb);
1357 continue; 1340 continue;
1358 } 1341 }
@@ -1378,6 +1361,14 @@ static int xenvif_tx_submit(struct xenvif *vif)
1378 1361
1379 work_done++; 1362 work_done++;
1380 1363
1364 /* Set this flag right before netif_receive_skb, otherwise
1365 * someone might think this packet already left netback, and
1366 * do a skb_copy_ubufs while we are still in control of the
1367 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1368 */
1369 if (skb_shinfo(skb)->destructor_arg)
1370 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1371
1381 netif_receive_skb(skb); 1372 netif_receive_skb(skb);
1382 } 1373 }
1383 1374
@@ -1386,14 +1377,111 @@ static int xenvif_tx_submit(struct xenvif *vif)
1386 1377
1387void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) 1378void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
1388{ 1379{
1389 return; 1380 unsigned long flags;
1381 pending_ring_idx_t index;
1382 struct xenvif *vif = ubuf_to_vif(ubuf);
1383
1384 /* This is the only place where we grab this lock, to protect callbacks
1385 * from each other.
1386 */
1387 spin_lock_irqsave(&vif->callback_lock, flags);
1388 do {
1389 u16 pending_idx = ubuf->desc;
1390 ubuf = (struct ubuf_info *) ubuf->ctx;
1391 BUG_ON(vif->dealloc_prod - vif->dealloc_cons >=
1392 MAX_PENDING_REQS);
1393 index = pending_index(vif->dealloc_prod);
1394 vif->dealloc_ring[index] = pending_idx;
1395 /* Sync with xenvif_tx_dealloc_action:
1396 * insert idx then incr producer.
1397 */
1398 smp_wmb();
1399 vif->dealloc_prod++;
1400 } while (ubuf);
1401 wake_up(&vif->dealloc_wq);
1402 spin_unlock_irqrestore(&vif->callback_lock, flags);
1403
1404 if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx) &&
1405 xenvif_tx_pending_slots_available(vif)) {
1406 local_bh_disable();
1407 napi_schedule(&vif->napi);
1408 local_bh_enable();
1409 }
1410}
1411
1412static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
1413{
1414 struct gnttab_unmap_grant_ref *gop;
1415 pending_ring_idx_t dc, dp;
1416 u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
1417 unsigned int i = 0;
1418
1419 dc = vif->dealloc_cons;
1420 gop = vif->tx_unmap_ops;
1421
1422 /* Free up any grants we have finished using */
1423 do {
1424 dp = vif->dealloc_prod;
1425
1426 /* Ensure we see all indices enqueued by all
1427 * xenvif_zerocopy_callback().
1428 */
1429 smp_rmb();
1430
1431 while (dc != dp) {
1432 BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS);
1433 pending_idx =
1434 vif->dealloc_ring[pending_index(dc++)];
1435
1436 pending_idx_release[gop-vif->tx_unmap_ops] =
1437 pending_idx;
1438 vif->pages_to_unmap[gop-vif->tx_unmap_ops] =
1439 vif->mmap_pages[pending_idx];
1440 gnttab_set_unmap_op(gop,
1441 idx_to_kaddr(vif, pending_idx),
1442 GNTMAP_host_map,
1443 vif->grant_tx_handle[pending_idx]);
1444 /* Btw. already unmapped? */
1445 xenvif_grant_handle_reset(vif, pending_idx);
1446 ++gop;
1447 }
1448
1449 } while (dp != vif->dealloc_prod);
1450
1451 vif->dealloc_cons = dc;
1452
1453 if (gop - vif->tx_unmap_ops > 0) {
1454 int ret;
1455 ret = gnttab_unmap_refs(vif->tx_unmap_ops,
1456 NULL,
1457 vif->pages_to_unmap,
1458 gop - vif->tx_unmap_ops);
1459 if (ret) {
1460 netdev_err(vif->dev, "Unmap fail: nr_ops %x ret %d\n",
1461 gop - vif->tx_unmap_ops, ret);
1462 for (i = 0; i < gop - vif->tx_unmap_ops; ++i) {
1463 if (gop[i].status != GNTST_okay)
1464 netdev_err(vif->dev,
1465 " host_addr: %llx handle: %x status: %d\n",
1466 gop[i].host_addr,
1467 gop[i].handle,
1468 gop[i].status);
1469 }
1470 BUG();
1471 }
1472 }
1473
1474 for (i = 0; i < gop - vif->tx_unmap_ops; ++i)
1475 xenvif_idx_release(vif, pending_idx_release[i],
1476 XEN_NETIF_RSP_OKAY);
1390} 1477}
1391 1478
1479
1392/* Called after netfront has transmitted */ 1480/* Called after netfront has transmitted */
1393int xenvif_tx_action(struct xenvif *vif, int budget) 1481int xenvif_tx_action(struct xenvif *vif, int budget)
1394{ 1482{
1395 unsigned nr_gops; 1483 unsigned nr_gops;
1396 int work_done; 1484 int work_done, ret;
1397 1485
1398 if (unlikely(!tx_work_todo(vif))) 1486 if (unlikely(!tx_work_todo(vif)))
1399 return 0; 1487 return 0;
@@ -1403,7 +1491,11 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
1403 if (nr_gops == 0) 1491 if (nr_gops == 0)
1404 return 0; 1492 return 0;
1405 1493
1406 gnttab_batch_copy(vif->tx_copy_ops, nr_gops); 1494 ret = gnttab_map_refs(vif->tx_map_ops,
1495 NULL,
1496 vif->pages_to_map,
1497 nr_gops);
1498 BUG_ON(ret);
1407 1499
1408 work_done = xenvif_tx_submit(vif); 1500 work_done = xenvif_tx_submit(vif);
1409 1501
@@ -1414,45 +1506,19 @@ static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
1414 u8 status) 1506 u8 status)
1415{ 1507{
1416 struct pending_tx_info *pending_tx_info; 1508 struct pending_tx_info *pending_tx_info;
1417 pending_ring_idx_t head; 1509 pending_ring_idx_t index;
1418 u16 peek; /* peek into next tx request */ 1510 u16 peek; /* peek into next tx request */
1511 unsigned long flags;
1419 1512
1420 BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL)); 1513 pending_tx_info = &vif->pending_tx_info[pending_idx];
1421 1514 spin_lock_irqsave(&vif->response_lock, flags);
1422 /* Already complete? */
1423 if (vif->mmap_pages[pending_idx] == NULL)
1424 return;
1425
1426 pending_tx_info = &vif->pending_tx_info[pending_idx];
1427
1428 head = pending_tx_info->head;
1429
1430 BUG_ON(!pending_tx_is_head(vif, head));
1431 BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
1432
1433 do {
1434 pending_ring_idx_t index;
1435 pending_ring_idx_t idx = pending_index(head);
1436 u16 info_idx = vif->pending_ring[idx];
1437
1438 pending_tx_info = &vif->pending_tx_info[info_idx];
1439 make_tx_response(vif, &pending_tx_info->req, status); 1515 make_tx_response(vif, &pending_tx_info->req, status);
1440 1516 index = pending_index(vif->pending_prod);
1441 /* Setting any number other than 1517 vif->pending_ring[index] = pending_idx;
1442 * INVALID_PENDING_RING_IDX indicates this slot is 1518 /* TX shouldn't use the index before we give it back here */
1443 * starting a new packet / ending a previous packet. 1519 mb();
1444 */ 1520 vif->pending_prod++;
1445 pending_tx_info->head = 0; 1521 spin_unlock_irqrestore(&vif->response_lock, flags);
1446
1447 index = pending_index(vif->pending_prod++);
1448 vif->pending_ring[index] = vif->pending_ring[info_idx];
1449
1450 peek = vif->pending_ring[pending_index(++head)];
1451
1452 } while (!pending_tx_is_head(vif, peek));
1453
1454 put_page(vif->mmap_pages[pending_idx]);
1455 vif->mmap_pages[pending_idx] = NULL;
1456} 1522}
1457 1523
1458 1524
@@ -1500,6 +1566,25 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
1500 return resp; 1566 return resp;
1501} 1567}
1502 1568
1569void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
1570{
1571 int ret;
1572 struct gnttab_unmap_grant_ref tx_unmap_op;
1573
1574 gnttab_set_unmap_op(&tx_unmap_op,
1575 idx_to_kaddr(vif, pending_idx),
1576 GNTMAP_host_map,
1577 vif->grant_tx_handle[pending_idx]);
1578 /* Btw. already unmapped? */
1579 xenvif_grant_handle_reset(vif, pending_idx);
1580
1581 ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
1582 &vif->mmap_pages[pending_idx], 1);
1583 BUG_ON(ret);
1584
1585 xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
1586}
1587
1503static inline int rx_work_todo(struct xenvif *vif) 1588static inline int rx_work_todo(struct xenvif *vif)
1504{ 1589{
1505 return !skb_queue_empty(&vif->rx_queue) && 1590 return !skb_queue_empty(&vif->rx_queue) &&
@@ -1516,6 +1601,11 @@ static inline int tx_work_todo(struct xenvif *vif)
1516 return 0; 1601 return 0;
1517} 1602}
1518 1603
1604static inline bool tx_dealloc_work_todo(struct xenvif *vif)
1605{
1606 return vif->dealloc_cons != vif->dealloc_prod;
1607}
1608
1519void xenvif_unmap_frontend_rings(struct xenvif *vif) 1609void xenvif_unmap_frontend_rings(struct xenvif *vif)
1520{ 1610{
1521 if (vif->tx.sring) 1611 if (vif->tx.sring)
@@ -1602,6 +1692,28 @@ int xenvif_kthread_guest_rx(void *data)
1602 return 0; 1692 return 0;
1603} 1693}
1604 1694
1695int xenvif_dealloc_kthread(void *data)
1696{
1697 struct xenvif *vif = data;
1698
1699 while (!kthread_should_stop()) {
1700 wait_event_interruptible(vif->dealloc_wq,
1701 tx_dealloc_work_todo(vif) ||
1702 kthread_should_stop());
1703 if (kthread_should_stop())
1704 break;
1705
1706 xenvif_tx_dealloc_action(vif);
1707 cond_resched();
1708 }
1709
1710 /* Unmap anything remaining*/
1711 if (tx_dealloc_work_todo(vif))
1712 xenvif_tx_dealloc_action(vif);
1713
1714 return 0;
1715}
1716
1605static int __init netback_init(void) 1717static int __init netback_init(void)
1606{ 1718{
1607 int rc = 0; 1719 int rc = 0;