diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2014-06-08 02:24:07 -0400 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2014-06-08 02:24:07 -0400 |
| commit | a292241cccb7e20e8b997a9a44177e7c98141859 (patch) | |
| tree | a0b0bb95e7dce3233a2d8b203f9e326cdec7a00e /drivers/net/xen-netback | |
| parent | d49cb7aeebb974713f9f7ab2991352d3050b095b (diff) | |
| parent | 68807a0c2015cb40df4869e16651f0ce5cc14d52 (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 3.16.
Diffstat (limited to 'drivers/net/xen-netback')
| -rw-r--r-- | drivers/net/xen-netback/common.h | 113 | ||||
| -rw-r--r-- | drivers/net/xen-netback/interface.c | 144 | ||||
| -rw-r--r-- | drivers/net/xen-netback/netback.c | 910 |
3 files changed, 825 insertions, 342 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index ae413a2cbee7..630a3fcf65bc 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h | |||
| @@ -48,37 +48,19 @@ | |||
| 48 | typedef unsigned int pending_ring_idx_t; | 48 | typedef unsigned int pending_ring_idx_t; |
| 49 | #define INVALID_PENDING_RING_IDX (~0U) | 49 | #define INVALID_PENDING_RING_IDX (~0U) |
| 50 | 50 | ||
| 51 | /* For the head field in pending_tx_info: it is used to indicate | ||
| 52 | * whether this tx info is the head of one or more coalesced requests. | ||
| 53 | * | ||
| 54 | * When head != INVALID_PENDING_RING_IDX, it means the start of a new | ||
| 55 | * tx requests queue and the end of previous queue. | ||
| 56 | * | ||
| 57 | * An example sequence of head fields (I = INVALID_PENDING_RING_IDX): | ||
| 58 | * | ||
| 59 | * ...|0 I I I|5 I|9 I I I|... | ||
| 60 | * -->|<-INUSE---------------- | ||
| 61 | * | ||
| 62 | * After consuming the first slot(s) we have: | ||
| 63 | * | ||
| 64 | * ...|V V V V|5 I|9 I I I|... | ||
| 65 | * -----FREE->|<-INUSE-------- | ||
| 66 | * | ||
| 67 | * where V stands for "valid pending ring index". Any number other | ||
| 68 | * than INVALID_PENDING_RING_IDX is OK. These entries are considered | ||
| 69 | * free and can contain any number other than | ||
| 70 | * INVALID_PENDING_RING_IDX. In practice we use 0. | ||
| 71 | * | ||
| 72 | * The in use non-INVALID_PENDING_RING_IDX (say 0, 5 and 9 in the | ||
| 73 | * above example) number is the index into pending_tx_info and | ||
| 74 | * mmap_pages arrays. | ||
| 75 | */ | ||
| 76 | struct pending_tx_info { | 51 | struct pending_tx_info { |
| 77 | struct xen_netif_tx_request req; /* coalesced tx request */ | 52 | struct xen_netif_tx_request req; /* tx request */ |
| 78 | pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX | 53 | /* Callback data for released SKBs. The callback is always |
| 79 | * if it is head of one or more tx | 54 | * xenvif_zerocopy_callback, desc contains the pending_idx, which is |
| 80 | * reqs | 55 | * also an index in pending_tx_info array. It is initialized in |
| 81 | */ | 56 | * xenvif_alloc and it never changes. |
| 57 | * skb_shinfo(skb)->destructor_arg points to the first mapped slot's | ||
| 58 | * callback_struct in this array of struct pending_tx_info's, then ctx | ||
| 59 | * to the next, or NULL if there is no more slot for this skb. | ||
| 60 | * ubuf_to_vif is a helper which finds the struct xenvif from a pointer | ||
| 61 | * to this field. | ||
| 62 | */ | ||
| 63 | struct ubuf_info callback_struct; | ||
| 82 | }; | 64 | }; |
| 83 | 65 | ||
| 84 | #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) | 66 | #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) |
| @@ -99,7 +81,7 @@ struct xenvif_rx_meta { | |||
| 99 | 81 | ||
| 100 | #define MAX_BUFFER_OFFSET PAGE_SIZE | 82 | #define MAX_BUFFER_OFFSET PAGE_SIZE |
| 101 | 83 | ||
| 102 | #define MAX_PENDING_REQS 256 | 84 | #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE |
| 103 | 85 | ||
| 104 | /* It's possible for an skb to have a maximal number of frags | 86 | /* It's possible for an skb to have a maximal number of frags |
| 105 | * but still be less than MAX_BUFFER_OFFSET in size. Thus the | 87 | * but still be less than MAX_BUFFER_OFFSET in size. Thus the |
| @@ -108,11 +90,25 @@ struct xenvif_rx_meta { | |||
| 108 | */ | 90 | */ |
| 109 | #define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE) | 91 | #define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE) |
| 110 | 92 | ||
| 93 | #define NETBACK_INVALID_HANDLE -1 | ||
| 94 | |||
| 95 | /* To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating | ||
| 96 | * the maximum slots a valid packet can use. Now this value is defined | ||
| 97 | * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by | ||
| 98 | * all backend. | ||
| 99 | */ | ||
| 100 | #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN | ||
| 101 | |||
| 111 | struct xenvif { | 102 | struct xenvif { |
| 112 | /* Unique identifier for this interface. */ | 103 | /* Unique identifier for this interface. */ |
| 113 | domid_t domid; | 104 | domid_t domid; |
| 114 | unsigned int handle; | 105 | unsigned int handle; |
| 115 | 106 | ||
| 107 | /* Is this interface disabled? True when backend discovers | ||
| 108 | * frontend is rogue. | ||
| 109 | */ | ||
| 110 | bool disabled; | ||
| 111 | |||
| 116 | /* Use NAPI for guest TX */ | 112 | /* Use NAPI for guest TX */ |
| 117 | struct napi_struct napi; | 113 | struct napi_struct napi; |
| 118 | /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ | 114 | /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ |
| @@ -126,13 +122,27 @@ struct xenvif { | |||
| 126 | pending_ring_idx_t pending_cons; | 122 | pending_ring_idx_t pending_cons; |
| 127 | u16 pending_ring[MAX_PENDING_REQS]; | 123 | u16 pending_ring[MAX_PENDING_REQS]; |
| 128 | struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; | 124 | struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; |
| 129 | 125 | grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; | |
| 130 | /* Coalescing tx requests before copying makes number of grant | 126 | |
| 131 | * copy ops greater or equal to number of slots required. In | 127 | struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; |
| 132 | * worst case a tx request consumes 2 gnttab_copy. | 128 | struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; |
| 129 | struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; | ||
| 130 | /* passed to gnttab_[un]map_refs with pages under (un)mapping */ | ||
| 131 | struct page *pages_to_map[MAX_PENDING_REQS]; | ||
| 132 | struct page *pages_to_unmap[MAX_PENDING_REQS]; | ||
| 133 | |||
| 134 | /* This prevents zerocopy callbacks to race over dealloc_ring */ | ||
| 135 | spinlock_t callback_lock; | ||
| 136 | /* This prevents dealloc thread and NAPI instance to race over response | ||
| 137 | * creation and pending_ring in xenvif_idx_release. In xenvif_tx_err | ||
| 138 | * it only protect response creation | ||
| 133 | */ | 139 | */ |
| 134 | struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; | 140 | spinlock_t response_lock; |
| 135 | 141 | pending_ring_idx_t dealloc_prod; | |
| 142 | pending_ring_idx_t dealloc_cons; | ||
| 143 | u16 dealloc_ring[MAX_PENDING_REQS]; | ||
| 144 | struct task_struct *dealloc_task; | ||
| 145 | wait_queue_head_t dealloc_wq; | ||
| 136 | 146 | ||
| 137 | /* Use kthread for guest RX */ | 147 | /* Use kthread for guest RX */ |
| 138 | struct task_struct *task; | 148 | struct task_struct *task; |
| @@ -144,6 +154,9 @@ struct xenvif { | |||
| 144 | struct xen_netif_rx_back_ring rx; | 154 | struct xen_netif_rx_back_ring rx; |
| 145 | struct sk_buff_head rx_queue; | 155 | struct sk_buff_head rx_queue; |
| 146 | RING_IDX rx_last_skb_slots; | 156 | RING_IDX rx_last_skb_slots; |
| 157 | bool rx_queue_purge; | ||
| 158 | |||
| 159 | struct timer_list wake_queue; | ||
| 147 | 160 | ||
| 148 | /* This array is allocated seperately as it is large */ | 161 | /* This array is allocated seperately as it is large */ |
| 149 | struct gnttab_copy *grant_copy_op; | 162 | struct gnttab_copy *grant_copy_op; |
| @@ -175,6 +188,10 @@ struct xenvif { | |||
| 175 | 188 | ||
| 176 | /* Statistics */ | 189 | /* Statistics */ |
| 177 | unsigned long rx_gso_checksum_fixup; | 190 | unsigned long rx_gso_checksum_fixup; |
| 191 | unsigned long tx_zerocopy_sent; | ||
| 192 | unsigned long tx_zerocopy_success; | ||
| 193 | unsigned long tx_zerocopy_fail; | ||
| 194 | unsigned long tx_frag_overflow; | ||
| 178 | 195 | ||
| 179 | /* Miscellaneous private stuff. */ | 196 | /* Miscellaneous private stuff. */ |
| 180 | struct net_device *dev; | 197 | struct net_device *dev; |
| @@ -216,9 +233,11 @@ void xenvif_carrier_off(struct xenvif *vif); | |||
| 216 | 233 | ||
| 217 | int xenvif_tx_action(struct xenvif *vif, int budget); | 234 | int xenvif_tx_action(struct xenvif *vif, int budget); |
| 218 | 235 | ||
| 219 | int xenvif_kthread(void *data); | 236 | int xenvif_kthread_guest_rx(void *data); |
| 220 | void xenvif_kick_thread(struct xenvif *vif); | 237 | void xenvif_kick_thread(struct xenvif *vif); |
| 221 | 238 | ||
| 239 | int xenvif_dealloc_kthread(void *data); | ||
| 240 | |||
| 222 | /* Determine whether the needed number of slots (req) are available, | 241 | /* Determine whether the needed number of slots (req) are available, |
| 223 | * and set req_event if not. | 242 | * and set req_event if not. |
| 224 | */ | 243 | */ |
| @@ -226,6 +245,24 @@ bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed); | |||
| 226 | 245 | ||
| 227 | void xenvif_stop_queue(struct xenvif *vif); | 246 | void xenvif_stop_queue(struct xenvif *vif); |
| 228 | 247 | ||
| 248 | /* Callback from stack when TX packet can be released */ | ||
| 249 | void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); | ||
| 250 | |||
| 251 | /* Unmap a pending page and release it back to the guest */ | ||
| 252 | void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx); | ||
| 253 | |||
| 254 | static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) | ||
| 255 | { | ||
| 256 | return MAX_PENDING_REQS - | ||
| 257 | vif->pending_prod + vif->pending_cons; | ||
| 258 | } | ||
| 259 | |||
| 260 | /* Callback from stack when TX packet can be released */ | ||
| 261 | void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); | ||
| 262 | |||
| 229 | extern bool separate_tx_rx_irq; | 263 | extern bool separate_tx_rx_irq; |
| 230 | 264 | ||
| 265 | extern unsigned int rx_drain_timeout_msecs; | ||
| 266 | extern unsigned int rx_drain_timeout_jiffies; | ||
| 267 | |||
| 231 | #endif /* __XEN_NETBACK__COMMON_H__ */ | 268 | #endif /* __XEN_NETBACK__COMMON_H__ */ |
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 7669d49a67e2..ef05c5c49d41 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | 38 | ||
| 39 | #include <xen/events.h> | 39 | #include <xen/events.h> |
| 40 | #include <asm/xen/hypercall.h> | 40 | #include <asm/xen/hypercall.h> |
| 41 | #include <xen/balloon.h> | ||
| 41 | 42 | ||
| 42 | #define XENVIF_QUEUE_LENGTH 32 | 43 | #define XENVIF_QUEUE_LENGTH 32 |
| 43 | #define XENVIF_NAPI_WEIGHT 64 | 44 | #define XENVIF_NAPI_WEIGHT 64 |
| @@ -62,6 +63,15 @@ static int xenvif_poll(struct napi_struct *napi, int budget) | |||
| 62 | struct xenvif *vif = container_of(napi, struct xenvif, napi); | 63 | struct xenvif *vif = container_of(napi, struct xenvif, napi); |
| 63 | int work_done; | 64 | int work_done; |
| 64 | 65 | ||
| 66 | /* This vif is rogue, we pretend we've there is nothing to do | ||
| 67 | * for this vif to deschedule it from NAPI. But this interface | ||
| 68 | * will be turned off in thread context later. | ||
| 69 | */ | ||
| 70 | if (unlikely(vif->disabled)) { | ||
| 71 | napi_complete(napi); | ||
| 72 | return 0; | ||
| 73 | } | ||
| 74 | |||
| 65 | work_done = xenvif_tx_action(vif, budget); | 75 | work_done = xenvif_tx_action(vif, budget); |
| 66 | 76 | ||
| 67 | if (work_done < budget) { | 77 | if (work_done < budget) { |
| @@ -113,6 +123,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id) | |||
| 113 | return IRQ_HANDLED; | 123 | return IRQ_HANDLED; |
| 114 | } | 124 | } |
| 115 | 125 | ||
| 126 | static void xenvif_wake_queue(unsigned long data) | ||
| 127 | { | ||
| 128 | struct xenvif *vif = (struct xenvif *)data; | ||
| 129 | |||
| 130 | if (netif_queue_stopped(vif->dev)) { | ||
| 131 | netdev_err(vif->dev, "draining TX queue\n"); | ||
| 132 | vif->rx_queue_purge = true; | ||
| 133 | xenvif_kick_thread(vif); | ||
| 134 | netif_wake_queue(vif->dev); | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 116 | static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | 138 | static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) |
| 117 | { | 139 | { |
| 118 | struct xenvif *vif = netdev_priv(dev); | 140 | struct xenvif *vif = netdev_priv(dev); |
| @@ -121,7 +143,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 121 | BUG_ON(skb->dev != dev); | 143 | BUG_ON(skb->dev != dev); |
| 122 | 144 | ||
| 123 | /* Drop the packet if vif is not ready */ | 145 | /* Drop the packet if vif is not ready */ |
| 124 | if (vif->task == NULL || !xenvif_schedulable(vif)) | 146 | if (vif->task == NULL || |
| 147 | vif->dealloc_task == NULL || | ||
| 148 | !xenvif_schedulable(vif)) | ||
| 125 | goto drop; | 149 | goto drop; |
| 126 | 150 | ||
| 127 | /* At best we'll need one slot for the header and one for each | 151 | /* At best we'll need one slot for the header and one for each |
| @@ -132,16 +156,20 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 132 | /* If the skb is GSO then we'll also need an extra slot for the | 156 | /* If the skb is GSO then we'll also need an extra slot for the |
| 133 | * metadata. | 157 | * metadata. |
| 134 | */ | 158 | */ |
| 135 | if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || | 159 | if (skb_is_gso(skb)) |
| 136 | skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) | ||
| 137 | min_slots_needed++; | 160 | min_slots_needed++; |
| 138 | 161 | ||
| 139 | /* If the skb can't possibly fit in the remaining slots | 162 | /* If the skb can't possibly fit in the remaining slots |
| 140 | * then turn off the queue to give the ring a chance to | 163 | * then turn off the queue to give the ring a chance to |
| 141 | * drain. | 164 | * drain. |
| 142 | */ | 165 | */ |
| 143 | if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) | 166 | if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) { |
| 167 | vif->wake_queue.function = xenvif_wake_queue; | ||
| 168 | vif->wake_queue.data = (unsigned long)vif; | ||
| 144 | xenvif_stop_queue(vif); | 169 | xenvif_stop_queue(vif); |
| 170 | mod_timer(&vif->wake_queue, | ||
| 171 | jiffies + rx_drain_timeout_jiffies); | ||
| 172 | } | ||
| 145 | 173 | ||
| 146 | skb_queue_tail(&vif->rx_queue, skb); | 174 | skb_queue_tail(&vif->rx_queue, skb); |
| 147 | xenvif_kick_thread(vif); | 175 | xenvif_kick_thread(vif); |
| @@ -234,6 +262,28 @@ static const struct xenvif_stat { | |||
| 234 | "rx_gso_checksum_fixup", | 262 | "rx_gso_checksum_fixup", |
| 235 | offsetof(struct xenvif, rx_gso_checksum_fixup) | 263 | offsetof(struct xenvif, rx_gso_checksum_fixup) |
| 236 | }, | 264 | }, |
| 265 | /* If (sent != success + fail), there are probably packets never | ||
| 266 | * freed up properly! | ||
| 267 | */ | ||
| 268 | { | ||
| 269 | "tx_zerocopy_sent", | ||
| 270 | offsetof(struct xenvif, tx_zerocopy_sent), | ||
| 271 | }, | ||
| 272 | { | ||
| 273 | "tx_zerocopy_success", | ||
| 274 | offsetof(struct xenvif, tx_zerocopy_success), | ||
| 275 | }, | ||
| 276 | { | ||
| 277 | "tx_zerocopy_fail", | ||
| 278 | offsetof(struct xenvif, tx_zerocopy_fail) | ||
| 279 | }, | ||
| 280 | /* Number of packets exceeding MAX_SKB_FRAG slots. You should use | ||
| 281 | * a guest with the same MAX_SKB_FRAG | ||
| 282 | */ | ||
| 283 | { | ||
| 284 | "tx_frag_overflow", | ||
| 285 | offsetof(struct xenvif, tx_frag_overflow) | ||
| 286 | }, | ||
| 237 | }; | 287 | }; |
| 238 | 288 | ||
| 239 | static int xenvif_get_sset_count(struct net_device *dev, int string_set) | 289 | static int xenvif_get_sset_count(struct net_device *dev, int string_set) |
| @@ -322,11 +372,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, | |||
| 322 | vif->ip_csum = 1; | 372 | vif->ip_csum = 1; |
| 323 | vif->dev = dev; | 373 | vif->dev = dev; |
| 324 | 374 | ||
| 375 | vif->disabled = false; | ||
| 376 | |||
| 325 | vif->credit_bytes = vif->remaining_credit = ~0UL; | 377 | vif->credit_bytes = vif->remaining_credit = ~0UL; |
| 326 | vif->credit_usec = 0UL; | 378 | vif->credit_usec = 0UL; |
| 327 | init_timer(&vif->credit_timeout); | 379 | init_timer(&vif->credit_timeout); |
| 328 | vif->credit_window_start = get_jiffies_64(); | 380 | vif->credit_window_start = get_jiffies_64(); |
| 329 | 381 | ||
| 382 | init_timer(&vif->wake_queue); | ||
| 383 | |||
| 330 | dev->netdev_ops = &xenvif_netdev_ops; | 384 | dev->netdev_ops = &xenvif_netdev_ops; |
| 331 | dev->hw_features = NETIF_F_SG | | 385 | dev->hw_features = NETIF_F_SG | |
| 332 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | | 386 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | |
| @@ -343,8 +397,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, | |||
| 343 | vif->pending_prod = MAX_PENDING_REQS; | 397 | vif->pending_prod = MAX_PENDING_REQS; |
| 344 | for (i = 0; i < MAX_PENDING_REQS; i++) | 398 | for (i = 0; i < MAX_PENDING_REQS; i++) |
| 345 | vif->pending_ring[i] = i; | 399 | vif->pending_ring[i] = i; |
| 346 | for (i = 0; i < MAX_PENDING_REQS; i++) | 400 | spin_lock_init(&vif->callback_lock); |
| 347 | vif->mmap_pages[i] = NULL; | 401 | spin_lock_init(&vif->response_lock); |
| 402 | /* If ballooning is disabled, this will consume real memory, so you | ||
| 403 | * better enable it. The long term solution would be to use just a | ||
| 404 | * bunch of valid page descriptors, without dependency on ballooning | ||
| 405 | */ | ||
| 406 | err = alloc_xenballooned_pages(MAX_PENDING_REQS, | ||
| 407 | vif->mmap_pages, | ||
| 408 | false); | ||
| 409 | if (err) { | ||
| 410 | netdev_err(dev, "Could not reserve mmap_pages\n"); | ||
| 411 | return ERR_PTR(-ENOMEM); | ||
| 412 | } | ||
| 413 | for (i = 0; i < MAX_PENDING_REQS; i++) { | ||
| 414 | vif->pending_tx_info[i].callback_struct = (struct ubuf_info) | ||
| 415 | { .callback = xenvif_zerocopy_callback, | ||
| 416 | .ctx = NULL, | ||
| 417 | .desc = i }; | ||
| 418 | vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; | ||
| 419 | } | ||
| 348 | 420 | ||
| 349 | /* | 421 | /* |
| 350 | * Initialise a dummy MAC address. We choose the numerically | 422 | * Initialise a dummy MAC address. We choose the numerically |
| @@ -382,12 +454,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 382 | 454 | ||
| 383 | BUG_ON(vif->tx_irq); | 455 | BUG_ON(vif->tx_irq); |
| 384 | BUG_ON(vif->task); | 456 | BUG_ON(vif->task); |
| 457 | BUG_ON(vif->dealloc_task); | ||
| 385 | 458 | ||
| 386 | err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); | 459 | err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); |
| 387 | if (err < 0) | 460 | if (err < 0) |
| 388 | goto err; | 461 | goto err; |
| 389 | 462 | ||
| 390 | init_waitqueue_head(&vif->wq); | 463 | init_waitqueue_head(&vif->wq); |
| 464 | init_waitqueue_head(&vif->dealloc_wq); | ||
| 391 | 465 | ||
| 392 | if (tx_evtchn == rx_evtchn) { | 466 | if (tx_evtchn == rx_evtchn) { |
| 393 | /* feature-split-event-channels == 0 */ | 467 | /* feature-split-event-channels == 0 */ |
| @@ -421,8 +495,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 421 | disable_irq(vif->rx_irq); | 495 | disable_irq(vif->rx_irq); |
| 422 | } | 496 | } |
| 423 | 497 | ||
| 424 | task = kthread_create(xenvif_kthread, | 498 | task = kthread_create(xenvif_kthread_guest_rx, |
| 425 | (void *)vif, "%s", vif->dev->name); | 499 | (void *)vif, "%s-guest-rx", vif->dev->name); |
| 426 | if (IS_ERR(task)) { | 500 | if (IS_ERR(task)) { |
| 427 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); | 501 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); |
| 428 | err = PTR_ERR(task); | 502 | err = PTR_ERR(task); |
| @@ -431,6 +505,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 431 | 505 | ||
| 432 | vif->task = task; | 506 | vif->task = task; |
| 433 | 507 | ||
| 508 | task = kthread_create(xenvif_dealloc_kthread, | ||
| 509 | (void *)vif, "%s-dealloc", vif->dev->name); | ||
| 510 | if (IS_ERR(task)) { | ||
| 511 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); | ||
| 512 | err = PTR_ERR(task); | ||
| 513 | goto err_rx_unbind; | ||
| 514 | } | ||
| 515 | |||
| 516 | vif->dealloc_task = task; | ||
| 517 | |||
| 434 | rtnl_lock(); | 518 | rtnl_lock(); |
| 435 | if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) | 519 | if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) |
| 436 | dev_set_mtu(vif->dev, ETH_DATA_LEN); | 520 | dev_set_mtu(vif->dev, ETH_DATA_LEN); |
| @@ -441,6 +525,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 441 | rtnl_unlock(); | 525 | rtnl_unlock(); |
| 442 | 526 | ||
| 443 | wake_up_process(vif->task); | 527 | wake_up_process(vif->task); |
| 528 | wake_up_process(vif->dealloc_task); | ||
| 444 | 529 | ||
| 445 | return 0; | 530 | return 0; |
| 446 | 531 | ||
| @@ -474,10 +559,16 @@ void xenvif_disconnect(struct xenvif *vif) | |||
| 474 | xenvif_carrier_off(vif); | 559 | xenvif_carrier_off(vif); |
| 475 | 560 | ||
| 476 | if (vif->task) { | 561 | if (vif->task) { |
| 562 | del_timer_sync(&vif->wake_queue); | ||
| 477 | kthread_stop(vif->task); | 563 | kthread_stop(vif->task); |
| 478 | vif->task = NULL; | 564 | vif->task = NULL; |
| 479 | } | 565 | } |
| 480 | 566 | ||
| 567 | if (vif->dealloc_task) { | ||
| 568 | kthread_stop(vif->dealloc_task); | ||
| 569 | vif->dealloc_task = NULL; | ||
| 570 | } | ||
| 571 | |||
| 481 | if (vif->tx_irq) { | 572 | if (vif->tx_irq) { |
| 482 | if (vif->tx_irq == vif->rx_irq) | 573 | if (vif->tx_irq == vif->rx_irq) |
| 483 | unbind_from_irqhandler(vif->tx_irq, vif); | 574 | unbind_from_irqhandler(vif->tx_irq, vif); |
| @@ -493,6 +584,43 @@ void xenvif_disconnect(struct xenvif *vif) | |||
| 493 | 584 | ||
| 494 | void xenvif_free(struct xenvif *vif) | 585 | void xenvif_free(struct xenvif *vif) |
| 495 | { | 586 | { |
| 587 | int i, unmap_timeout = 0; | ||
| 588 | /* Here we want to avoid timeout messages if an skb can be legitimately | ||
| 589 | * stuck somewhere else. Realistically this could be an another vif's | ||
| 590 | * internal or QDisc queue. That another vif also has this | ||
| 591 | * rx_drain_timeout_msecs timeout, but the timer only ditches the | ||
| 592 | * internal queue. After that, the QDisc queue can put in worst case | ||
| 593 | * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's | ||
| 594 | * internal queue, so we need several rounds of such timeouts until we | ||
| 595 | * can be sure that no another vif should have skb's from us. We are | ||
| 596 | * not sending more skb's, so newly stuck packets are not interesting | ||
| 597 | * for us here. | ||
| 598 | */ | ||
| 599 | unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * | ||
| 600 | DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS)); | ||
| 601 | |||
| 602 | for (i = 0; i < MAX_PENDING_REQS; ++i) { | ||
| 603 | if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) { | ||
| 604 | unmap_timeout++; | ||
| 605 | schedule_timeout(msecs_to_jiffies(1000)); | ||
| 606 | if (unmap_timeout > worst_case_skb_lifetime && | ||
| 607 | net_ratelimit()) | ||
| 608 | netdev_err(vif->dev, | ||
| 609 | "Page still granted! Index: %x\n", | ||
| 610 | i); | ||
| 611 | /* If there are still unmapped pages, reset the loop to | ||
| 612 | * start checking again. We shouldn't exit here until | ||
| 613 | * dealloc thread and NAPI instance release all the | ||
| 614 | * pages. If a kernel bug causes the skbs to stall | ||
| 615 | * somewhere, the interface cannot be brought down | ||
| 616 | * properly. | ||
| 617 | */ | ||
| 618 | i = -1; | ||
| 619 | } | ||
| 620 | } | ||
| 621 | |||
| 622 | free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages); | ||
| 623 | |||
| 496 | netif_napi_del(&vif->napi); | 624 | netif_napi_del(&vif->napi); |
| 497 | 625 | ||
| 498 | unregister_netdev(vif->dev); | 626 | unregister_netdev(vif->dev); |
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index e5284bca2d90..76665405c5aa 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
| 38 | #include <linux/if_vlan.h> | 38 | #include <linux/if_vlan.h> |
| 39 | #include <linux/udp.h> | 39 | #include <linux/udp.h> |
| 40 | #include <linux/highmem.h> | ||
| 40 | 41 | ||
| 41 | #include <net/tcp.h> | 42 | #include <net/tcp.h> |
| 42 | 43 | ||
| @@ -54,6 +55,13 @@ | |||
| 54 | bool separate_tx_rx_irq = 1; | 55 | bool separate_tx_rx_irq = 1; |
| 55 | module_param(separate_tx_rx_irq, bool, 0644); | 56 | module_param(separate_tx_rx_irq, bool, 0644); |
| 56 | 57 | ||
| 58 | /* When guest ring is filled up, qdisc queues the packets for us, but we have | ||
| 59 | * to timeout them, otherwise other guests' packets can get stuck there | ||
| 60 | */ | ||
| 61 | unsigned int rx_drain_timeout_msecs = 10000; | ||
| 62 | module_param(rx_drain_timeout_msecs, uint, 0444); | ||
| 63 | unsigned int rx_drain_timeout_jiffies; | ||
| 64 | |||
| 57 | /* | 65 | /* |
| 58 | * This is the maximum slots a skb can have. If a guest sends a skb | 66 | * This is the maximum slots a skb can have. If a guest sends a skb |
| 59 | * which exceeds this limit it is considered malicious. | 67 | * which exceeds this limit it is considered malicious. |
| @@ -62,24 +70,6 @@ module_param(separate_tx_rx_irq, bool, 0644); | |||
| 62 | static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; | 70 | static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; |
| 63 | module_param(fatal_skb_slots, uint, 0444); | 71 | module_param(fatal_skb_slots, uint, 0444); |
| 64 | 72 | ||
| 65 | /* | ||
| 66 | * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating | ||
| 67 | * the maximum slots a valid packet can use. Now this value is defined | ||
| 68 | * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by | ||
| 69 | * all backend. | ||
| 70 | */ | ||
| 71 | #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN | ||
| 72 | |||
| 73 | /* | ||
| 74 | * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of | ||
| 75 | * one or more merged tx requests, otherwise it is the continuation of | ||
| 76 | * previous tx request. | ||
| 77 | */ | ||
| 78 | static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx) | ||
| 79 | { | ||
| 80 | return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; | ||
| 81 | } | ||
| 82 | |||
| 83 | static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, | 73 | static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, |
| 84 | u8 status); | 74 | u8 status); |
| 85 | 75 | ||
| @@ -109,6 +99,21 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, | |||
| 109 | return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); | 99 | return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); |
| 110 | } | 100 | } |
| 111 | 101 | ||
| 102 | #define callback_param(vif, pending_idx) \ | ||
| 103 | (vif->pending_tx_info[pending_idx].callback_struct) | ||
| 104 | |||
| 105 | /* Find the containing VIF's structure from a pointer in pending_tx_info array | ||
| 106 | */ | ||
| 107 | static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf) | ||
| 108 | { | ||
| 109 | u16 pending_idx = ubuf->desc; | ||
| 110 | struct pending_tx_info *temp = | ||
| 111 | container_of(ubuf, struct pending_tx_info, callback_struct); | ||
| 112 | return container_of(temp - pending_idx, | ||
| 113 | struct xenvif, | ||
| 114 | pending_tx_info[0]); | ||
| 115 | } | ||
| 116 | |||
| 112 | /* This is a miniumum size for the linear area to avoid lots of | 117 | /* This is a miniumum size for the linear area to avoid lots of |
| 113 | * calls to __pskb_pull_tail() as we set up checksum offsets. The | 118 | * calls to __pskb_pull_tail() as we set up checksum offsets. The |
| 114 | * value 128 was chosen as it covers all IPv4 and most likely | 119 | * value 128 was chosen as it covers all IPv4 and most likely |
| @@ -131,12 +136,6 @@ static inline pending_ring_idx_t pending_index(unsigned i) | |||
| 131 | return i & (MAX_PENDING_REQS-1); | 136 | return i & (MAX_PENDING_REQS-1); |
| 132 | } | 137 | } |
| 133 | 138 | ||
| 134 | static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) | ||
| 135 | { | ||
| 136 | return MAX_PENDING_REQS - | ||
| 137 | vif->pending_prod + vif->pending_cons; | ||
| 138 | } | ||
| 139 | |||
| 140 | bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed) | 139 | bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed) |
| 141 | { | 140 | { |
| 142 | RING_IDX prod, cons; | 141 | RING_IDX prod, cons; |
| @@ -192,8 +191,8 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head) | |||
| 192 | * into multiple copies tend to give large frags their | 191 | * into multiple copies tend to give large frags their |
| 193 | * own buffers as before. | 192 | * own buffers as before. |
| 194 | */ | 193 | */ |
| 195 | if ((offset + size > MAX_BUFFER_OFFSET) && | 194 | BUG_ON(size > MAX_BUFFER_OFFSET); |
| 196 | (size <= MAX_BUFFER_OFFSET) && offset && !head) | 195 | if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head) |
| 197 | return true; | 196 | return true; |
| 198 | 197 | ||
| 199 | return false; | 198 | return false; |
| @@ -235,12 +234,14 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, | |||
| 235 | static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, | 234 | static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, |
| 236 | struct netrx_pending_operations *npo, | 235 | struct netrx_pending_operations *npo, |
| 237 | struct page *page, unsigned long size, | 236 | struct page *page, unsigned long size, |
| 238 | unsigned long offset, int *head) | 237 | unsigned long offset, int *head, |
| 238 | struct xenvif *foreign_vif, | ||
| 239 | grant_ref_t foreign_gref) | ||
| 239 | { | 240 | { |
| 240 | struct gnttab_copy *copy_gop; | 241 | struct gnttab_copy *copy_gop; |
| 241 | struct xenvif_rx_meta *meta; | 242 | struct xenvif_rx_meta *meta; |
| 242 | unsigned long bytes; | 243 | unsigned long bytes; |
| 243 | int gso_type; | 244 | int gso_type = XEN_NETIF_GSO_TYPE_NONE; |
| 244 | 245 | ||
| 245 | /* Data must not cross a page boundary. */ | 246 | /* Data must not cross a page boundary. */ |
| 246 | BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); | 247 | BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); |
| @@ -277,8 +278,15 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, | |||
| 277 | copy_gop->flags = GNTCOPY_dest_gref; | 278 | copy_gop->flags = GNTCOPY_dest_gref; |
| 278 | copy_gop->len = bytes; | 279 | copy_gop->len = bytes; |
| 279 | 280 | ||
| 280 | copy_gop->source.domid = DOMID_SELF; | 281 | if (foreign_vif) { |
| 281 | copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); | 282 | copy_gop->source.domid = foreign_vif->domid; |
| 283 | copy_gop->source.u.ref = foreign_gref; | ||
| 284 | copy_gop->flags |= GNTCOPY_source_gref; | ||
| 285 | } else { | ||
| 286 | copy_gop->source.domid = DOMID_SELF; | ||
| 287 | copy_gop->source.u.gmfn = | ||
| 288 | virt_to_mfn(page_address(page)); | ||
| 289 | } | ||
| 282 | copy_gop->source.offset = offset; | 290 | copy_gop->source.offset = offset; |
| 283 | 291 | ||
| 284 | copy_gop->dest.domid = vif->domid; | 292 | copy_gop->dest.domid = vif->domid; |
| @@ -299,12 +307,12 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, | |||
| 299 | } | 307 | } |
| 300 | 308 | ||
| 301 | /* Leave a gap for the GSO descriptor. */ | 309 | /* Leave a gap for the GSO descriptor. */ |
| 302 | if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) | 310 | if (skb_is_gso(skb)) { |
| 303 | gso_type = XEN_NETIF_GSO_TYPE_TCPV4; | 311 | if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) |
| 304 | else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) | 312 | gso_type = XEN_NETIF_GSO_TYPE_TCPV4; |
| 305 | gso_type = XEN_NETIF_GSO_TYPE_TCPV6; | 313 | else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) |
| 306 | else | 314 | gso_type = XEN_NETIF_GSO_TYPE_TCPV6; |
| 307 | gso_type = XEN_NETIF_GSO_TYPE_NONE; | 315 | } |
| 308 | 316 | ||
| 309 | if (*head && ((1 << gso_type) & vif->gso_mask)) | 317 | if (*head && ((1 << gso_type) & vif->gso_mask)) |
| 310 | vif->rx.req_cons++; | 318 | vif->rx.req_cons++; |
| @@ -338,19 +346,18 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
| 338 | int head = 1; | 346 | int head = 1; |
| 339 | int old_meta_prod; | 347 | int old_meta_prod; |
| 340 | int gso_type; | 348 | int gso_type; |
| 341 | int gso_size; | 349 | struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg; |
| 350 | grant_ref_t foreign_grefs[MAX_SKB_FRAGS]; | ||
| 351 | struct xenvif *foreign_vif = NULL; | ||
| 342 | 352 | ||
| 343 | old_meta_prod = npo->meta_prod; | 353 | old_meta_prod = npo->meta_prod; |
| 344 | 354 | ||
| 345 | if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { | 355 | gso_type = XEN_NETIF_GSO_TYPE_NONE; |
| 346 | gso_type = XEN_NETIF_GSO_TYPE_TCPV4; | 356 | if (skb_is_gso(skb)) { |
| 347 | gso_size = skb_shinfo(skb)->gso_size; | 357 | if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) |
| 348 | } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { | 358 | gso_type = XEN_NETIF_GSO_TYPE_TCPV4; |
| 349 | gso_type = XEN_NETIF_GSO_TYPE_TCPV6; | 359 | else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) |
| 350 | gso_size = skb_shinfo(skb)->gso_size; | 360 | gso_type = XEN_NETIF_GSO_TYPE_TCPV6; |
| 351 | } else { | ||
| 352 | gso_type = XEN_NETIF_GSO_TYPE_NONE; | ||
| 353 | gso_size = 0; | ||
| 354 | } | 361 | } |
| 355 | 362 | ||
| 356 | /* Set up a GSO prefix descriptor, if necessary */ | 363 | /* Set up a GSO prefix descriptor, if necessary */ |
| @@ -358,7 +365,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
| 358 | req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); | 365 | req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); |
| 359 | meta = npo->meta + npo->meta_prod++; | 366 | meta = npo->meta + npo->meta_prod++; |
| 360 | meta->gso_type = gso_type; | 367 | meta->gso_type = gso_type; |
| 361 | meta->gso_size = gso_size; | 368 | meta->gso_size = skb_shinfo(skb)->gso_size; |
| 362 | meta->size = 0; | 369 | meta->size = 0; |
| 363 | meta->id = req->id; | 370 | meta->id = req->id; |
| 364 | } | 371 | } |
| @@ -368,7 +375,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
| 368 | 375 | ||
| 369 | if ((1 << gso_type) & vif->gso_mask) { | 376 | if ((1 << gso_type) & vif->gso_mask) { |
| 370 | meta->gso_type = gso_type; | 377 | meta->gso_type = gso_type; |
| 371 | meta->gso_size = gso_size; | 378 | meta->gso_size = skb_shinfo(skb)->gso_size; |
| 372 | } else { | 379 | } else { |
| 373 | meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; | 380 | meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; |
| 374 | meta->gso_size = 0; | 381 | meta->gso_size = 0; |
| @@ -379,6 +386,19 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
| 379 | npo->copy_off = 0; | 386 | npo->copy_off = 0; |
| 380 | npo->copy_gref = req->gref; | 387 | npo->copy_gref = req->gref; |
| 381 | 388 | ||
| 389 | if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && | ||
| 390 | (ubuf->callback == &xenvif_zerocopy_callback)) { | ||
| 391 | int i = 0; | ||
| 392 | foreign_vif = ubuf_to_vif(ubuf); | ||
| 393 | |||
| 394 | do { | ||
| 395 | u16 pending_idx = ubuf->desc; | ||
| 396 | foreign_grefs[i++] = | ||
| 397 | foreign_vif->pending_tx_info[pending_idx].req.gref; | ||
| 398 | ubuf = (struct ubuf_info *) ubuf->ctx; | ||
| 399 | } while (ubuf); | ||
| 400 | } | ||
| 401 | |||
| 382 | data = skb->data; | 402 | data = skb->data; |
| 383 | while (data < skb_tail_pointer(skb)) { | 403 | while (data < skb_tail_pointer(skb)) { |
| 384 | unsigned int offset = offset_in_page(data); | 404 | unsigned int offset = offset_in_page(data); |
| @@ -388,7 +408,9 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
| 388 | len = skb_tail_pointer(skb) - data; | 408 | len = skb_tail_pointer(skb) - data; |
| 389 | 409 | ||
| 390 | xenvif_gop_frag_copy(vif, skb, npo, | 410 | xenvif_gop_frag_copy(vif, skb, npo, |
| 391 | virt_to_page(data), len, offset, &head); | 411 | virt_to_page(data), len, offset, &head, |
| 412 | NULL, | ||
| 413 | 0); | ||
| 392 | data += len; | 414 | data += len; |
| 393 | } | 415 | } |
| 394 | 416 | ||
| @@ -397,7 +419,9 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
| 397 | skb_frag_page(&skb_shinfo(skb)->frags[i]), | 419 | skb_frag_page(&skb_shinfo(skb)->frags[i]), |
| 398 | skb_frag_size(&skb_shinfo(skb)->frags[i]), | 420 | skb_frag_size(&skb_shinfo(skb)->frags[i]), |
| 399 | skb_shinfo(skb)->frags[i].page_offset, | 421 | skb_shinfo(skb)->frags[i].page_offset, |
| 400 | &head); | 422 | &head, |
| 423 | foreign_vif, | ||
| 424 | foreign_grefs[i]); | ||
| 401 | } | 425 | } |
| 402 | 426 | ||
| 403 | return npo->meta_prod - old_meta_prod; | 427 | return npo->meta_prod - old_meta_prod; |
| @@ -455,10 +479,12 @@ static void xenvif_add_frag_responses(struct xenvif *vif, int status, | |||
| 455 | } | 479 | } |
| 456 | } | 480 | } |
| 457 | 481 | ||
| 458 | struct skb_cb_overlay { | 482 | struct xenvif_rx_cb { |
| 459 | int meta_slots_used; | 483 | int meta_slots_used; |
| 460 | }; | 484 | }; |
| 461 | 485 | ||
| 486 | #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb) | ||
| 487 | |||
| 462 | void xenvif_kick_thread(struct xenvif *vif) | 488 | void xenvif_kick_thread(struct xenvif *vif) |
| 463 | { | 489 | { |
| 464 | wake_up(&vif->wq); | 490 | wake_up(&vif->wq); |
| @@ -474,7 +500,6 @@ static void xenvif_rx_action(struct xenvif *vif) | |||
| 474 | LIST_HEAD(notify); | 500 | LIST_HEAD(notify); |
| 475 | int ret; | 501 | int ret; |
| 476 | unsigned long offset; | 502 | unsigned long offset; |
| 477 | struct skb_cb_overlay *sco; | ||
| 478 | bool need_to_notify = false; | 503 | bool need_to_notify = false; |
| 479 | 504 | ||
| 480 | struct netrx_pending_operations npo = { | 505 | struct netrx_pending_operations npo = { |
| @@ -486,6 +511,8 @@ static void xenvif_rx_action(struct xenvif *vif) | |||
| 486 | 511 | ||
| 487 | while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { | 512 | while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { |
| 488 | RING_IDX max_slots_needed; | 513 | RING_IDX max_slots_needed; |
| 514 | RING_IDX old_req_cons; | ||
| 515 | RING_IDX ring_slots_used; | ||
| 489 | int i; | 516 | int i; |
| 490 | 517 | ||
| 491 | /* We need a cheap worse case estimate for the number of | 518 | /* We need a cheap worse case estimate for the number of |
| @@ -497,11 +524,31 @@ static void xenvif_rx_action(struct xenvif *vif) | |||
| 497 | PAGE_SIZE); | 524 | PAGE_SIZE); |
| 498 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 525 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
| 499 | unsigned int size; | 526 | unsigned int size; |
| 527 | unsigned int offset; | ||
| 528 | |||
| 500 | size = skb_frag_size(&skb_shinfo(skb)->frags[i]); | 529 | size = skb_frag_size(&skb_shinfo(skb)->frags[i]); |
| 501 | max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE); | 530 | offset = skb_shinfo(skb)->frags[i].page_offset; |
| 531 | |||
| 532 | /* For a worse-case estimate we need to factor in | ||
| 533 | * the fragment page offset as this will affect the | ||
| 534 | * number of times xenvif_gop_frag_copy() will | ||
| 535 | * call start_new_rx_buffer(). | ||
| 536 | */ | ||
| 537 | max_slots_needed += DIV_ROUND_UP(offset + size, | ||
| 538 | PAGE_SIZE); | ||
| 502 | } | 539 | } |
| 503 | if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || | 540 | |
| 504 | skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) | 541 | /* To avoid the estimate becoming too pessimal for some |
| 542 | * frontends that limit posted rx requests, cap the estimate | ||
| 543 | * at MAX_SKB_FRAGS. | ||
| 544 | */ | ||
| 545 | if (max_slots_needed > MAX_SKB_FRAGS) | ||
| 546 | max_slots_needed = MAX_SKB_FRAGS; | ||
| 547 | |||
| 548 | /* We may need one more slot for GSO metadata */ | ||
| 549 | if (skb_is_gso(skb) && | ||
| 550 | (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || | ||
| 551 | skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) | ||
| 505 | max_slots_needed++; | 552 | max_slots_needed++; |
| 506 | 553 | ||
| 507 | /* If the skb may not fit then bail out now */ | 554 | /* If the skb may not fit then bail out now */ |
| @@ -513,9 +560,11 @@ static void xenvif_rx_action(struct xenvif *vif) | |||
| 513 | } else | 560 | } else |
| 514 | vif->rx_last_skb_slots = 0; | 561 | vif->rx_last_skb_slots = 0; |
| 515 | 562 | ||
| 516 | sco = (struct skb_cb_overlay *)skb->cb; | 563 | old_req_cons = vif->rx.req_cons; |
| 517 | sco->meta_slots_used = xenvif_gop_skb(skb, &npo); | 564 | XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo); |
| 518 | BUG_ON(sco->meta_slots_used > max_slots_needed); | 565 | ring_slots_used = vif->rx.req_cons - old_req_cons; |
| 566 | |||
| 567 | BUG_ON(ring_slots_used > max_slots_needed); | ||
| 519 | 568 | ||
| 520 | __skb_queue_tail(&rxq, skb); | 569 | __skb_queue_tail(&rxq, skb); |
| 521 | } | 570 | } |
| @@ -529,7 +578,6 @@ static void xenvif_rx_action(struct xenvif *vif) | |||
| 529 | gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); | 578 | gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); |
| 530 | 579 | ||
| 531 | while ((skb = __skb_dequeue(&rxq)) != NULL) { | 580 | while ((skb = __skb_dequeue(&rxq)) != NULL) { |
| 532 | sco = (struct skb_cb_overlay *)skb->cb; | ||
| 533 | 581 | ||
| 534 | if ((1 << vif->meta[npo.meta_cons].gso_type) & | 582 | if ((1 << vif->meta[npo.meta_cons].gso_type) & |
| 535 | vif->gso_prefix_mask) { | 583 | vif->gso_prefix_mask) { |
| @@ -540,19 +588,21 @@ static void xenvif_rx_action(struct xenvif *vif) | |||
| 540 | 588 | ||
| 541 | resp->offset = vif->meta[npo.meta_cons].gso_size; | 589 | resp->offset = vif->meta[npo.meta_cons].gso_size; |
| 542 | resp->id = vif->meta[npo.meta_cons].id; | 590 | resp->id = vif->meta[npo.meta_cons].id; |
| 543 | resp->status = sco->meta_slots_used; | 591 | resp->status = XENVIF_RX_CB(skb)->meta_slots_used; |
| 544 | 592 | ||
| 545 | npo.meta_cons++; | 593 | npo.meta_cons++; |
| 546 | sco->meta_slots_used--; | 594 | XENVIF_RX_CB(skb)->meta_slots_used--; |
| 547 | } | 595 | } |
| 548 | 596 | ||
| 549 | 597 | ||
| 550 | vif->dev->stats.tx_bytes += skb->len; | 598 | vif->dev->stats.tx_bytes += skb->len; |
| 551 | vif->dev->stats.tx_packets++; | 599 | vif->dev->stats.tx_packets++; |
| 552 | 600 | ||
| 553 | status = xenvif_check_gop(vif, sco->meta_slots_used, &npo); | 601 | status = xenvif_check_gop(vif, |
| 602 | XENVIF_RX_CB(skb)->meta_slots_used, | ||
| 603 | &npo); | ||
| 554 | 604 | ||
| 555 | if (sco->meta_slots_used == 1) | 605 | if (XENVIF_RX_CB(skb)->meta_slots_used == 1) |
| 556 | flags = 0; | 606 | flags = 0; |
| 557 | else | 607 | else |
| 558 | flags = XEN_NETRXF_more_data; | 608 | flags = XEN_NETRXF_more_data; |
| @@ -589,13 +639,13 @@ static void xenvif_rx_action(struct xenvif *vif) | |||
| 589 | 639 | ||
| 590 | xenvif_add_frag_responses(vif, status, | 640 | xenvif_add_frag_responses(vif, status, |
| 591 | vif->meta + npo.meta_cons + 1, | 641 | vif->meta + npo.meta_cons + 1, |
| 592 | sco->meta_slots_used); | 642 | XENVIF_RX_CB(skb)->meta_slots_used); |
| 593 | 643 | ||
| 594 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); | 644 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); |
| 595 | 645 | ||
| 596 | need_to_notify |= !!ret; | 646 | need_to_notify |= !!ret; |
| 597 | 647 | ||
| 598 | npo.meta_cons += sco->meta_slots_used; | 648 | npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used; |
| 599 | dev_kfree_skb(skb); | 649 | dev_kfree_skb(skb); |
| 600 | } | 650 | } |
| 601 | 651 | ||
| @@ -645,9 +695,12 @@ static void xenvif_tx_err(struct xenvif *vif, | |||
| 645 | struct xen_netif_tx_request *txp, RING_IDX end) | 695 | struct xen_netif_tx_request *txp, RING_IDX end) |
| 646 | { | 696 | { |
| 647 | RING_IDX cons = vif->tx.req_cons; | 697 | RING_IDX cons = vif->tx.req_cons; |
| 698 | unsigned long flags; | ||
| 648 | 699 | ||
| 649 | do { | 700 | do { |
| 701 | spin_lock_irqsave(&vif->response_lock, flags); | ||
| 650 | make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); | 702 | make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); |
| 703 | spin_unlock_irqrestore(&vif->response_lock, flags); | ||
| 651 | if (cons == end) | 704 | if (cons == end) |
| 652 | break; | 705 | break; |
| 653 | txp = RING_GET_REQUEST(&vif->tx, cons++); | 706 | txp = RING_GET_REQUEST(&vif->tx, cons++); |
| @@ -658,7 +711,8 @@ static void xenvif_tx_err(struct xenvif *vif, | |||
| 658 | static void xenvif_fatal_tx_err(struct xenvif *vif) | 711 | static void xenvif_fatal_tx_err(struct xenvif *vif) |
| 659 | { | 712 | { |
| 660 | netdev_err(vif->dev, "fatal error; disabling device\n"); | 713 | netdev_err(vif->dev, "fatal error; disabling device\n"); |
| 661 | xenvif_carrier_off(vif); | 714 | vif->disabled = true; |
| 715 | xenvif_kick_thread(vif); | ||
| 662 | } | 716 | } |
| 663 | 717 | ||
| 664 | static int xenvif_count_requests(struct xenvif *vif, | 718 | static int xenvif_count_requests(struct xenvif *vif, |
| @@ -759,204 +813,220 @@ static int xenvif_count_requests(struct xenvif *vif, | |||
| 759 | return slots; | 813 | return slots; |
| 760 | } | 814 | } |
| 761 | 815 | ||
| 762 | static struct page *xenvif_alloc_page(struct xenvif *vif, | 816 | |
| 763 | u16 pending_idx) | 817 | struct xenvif_tx_cb { |
| 818 | u16 pending_idx; | ||
| 819 | }; | ||
| 820 | |||
| 821 | #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) | ||
| 822 | |||
| 823 | static inline void xenvif_tx_create_map_op(struct xenvif *vif, | ||
| 824 | u16 pending_idx, | ||
| 825 | struct xen_netif_tx_request *txp, | ||
| 826 | struct gnttab_map_grant_ref *mop) | ||
| 764 | { | 827 | { |
| 765 | struct page *page; | 828 | vif->pages_to_map[mop-vif->tx_map_ops] = vif->mmap_pages[pending_idx]; |
| 829 | gnttab_set_map_op(mop, idx_to_kaddr(vif, pending_idx), | ||
| 830 | GNTMAP_host_map | GNTMAP_readonly, | ||
| 831 | txp->gref, vif->domid); | ||
| 832 | |||
| 833 | memcpy(&vif->pending_tx_info[pending_idx].req, txp, | ||
| 834 | sizeof(*txp)); | ||
| 835 | } | ||
| 766 | 836 | ||
| 767 | page = alloc_page(GFP_ATOMIC|__GFP_COLD); | 837 | static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) |
| 768 | if (!page) | 838 | { |
| 839 | struct sk_buff *skb = | ||
| 840 | alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN, | ||
| 841 | GFP_ATOMIC | __GFP_NOWARN); | ||
| 842 | if (unlikely(skb == NULL)) | ||
| 769 | return NULL; | 843 | return NULL; |
| 770 | vif->mmap_pages[pending_idx] = page; | ||
| 771 | 844 | ||
| 772 | return page; | 845 | /* Packets passed to netif_rx() must have some headroom. */ |
| 846 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); | ||
| 847 | |||
| 848 | /* Initialize it here to avoid later surprises */ | ||
| 849 | skb_shinfo(skb)->destructor_arg = NULL; | ||
| 850 | |||
| 851 | return skb; | ||
| 773 | } | 852 | } |
| 774 | 853 | ||
| 775 | static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif, | 854 | static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif, |
| 776 | struct sk_buff *skb, | 855 | struct sk_buff *skb, |
| 777 | struct xen_netif_tx_request *txp, | 856 | struct xen_netif_tx_request *txp, |
| 778 | struct gnttab_copy *gop) | 857 | struct gnttab_map_grant_ref *gop) |
| 779 | { | 858 | { |
| 780 | struct skb_shared_info *shinfo = skb_shinfo(skb); | 859 | struct skb_shared_info *shinfo = skb_shinfo(skb); |
| 781 | skb_frag_t *frags = shinfo->frags; | 860 | skb_frag_t *frags = shinfo->frags; |
| 782 | u16 pending_idx = *((u16 *)skb->data); | 861 | u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; |
| 783 | u16 head_idx = 0; | 862 | int start; |
| 784 | int slot, start; | 863 | pending_ring_idx_t index; |
| 785 | struct page *page; | 864 | unsigned int nr_slots, frag_overflow = 0; |
| 786 | pending_ring_idx_t index, start_idx = 0; | ||
| 787 | uint16_t dst_offset; | ||
| 788 | unsigned int nr_slots; | ||
| 789 | struct pending_tx_info *first = NULL; | ||
| 790 | 865 | ||
| 791 | /* At this point shinfo->nr_frags is in fact the number of | 866 | /* At this point shinfo->nr_frags is in fact the number of |
| 792 | * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. | 867 | * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. |
| 793 | */ | 868 | */ |
| 869 | if (shinfo->nr_frags > MAX_SKB_FRAGS) { | ||
| 870 | frag_overflow = shinfo->nr_frags - MAX_SKB_FRAGS; | ||
| 871 | BUG_ON(frag_overflow > MAX_SKB_FRAGS); | ||
| 872 | shinfo->nr_frags = MAX_SKB_FRAGS; | ||
| 873 | } | ||
| 794 | nr_slots = shinfo->nr_frags; | 874 | nr_slots = shinfo->nr_frags; |
| 795 | 875 | ||
| 796 | /* Skip first skb fragment if it is on same page as header fragment. */ | 876 | /* Skip first skb fragment if it is on same page as header fragment. */ |
| 797 | start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); | 877 | start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); |
| 798 | 878 | ||
| 799 | /* Coalesce tx requests, at this point the packet passed in | 879 | for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; |
| 800 | * should be <= 64K. Any packets larger than 64K have been | 880 | shinfo->nr_frags++, txp++, gop++) { |
| 801 | * handled in xenvif_count_requests(). | 881 | index = pending_index(vif->pending_cons++); |
| 802 | */ | 882 | pending_idx = vif->pending_ring[index]; |
| 803 | for (shinfo->nr_frags = slot = start; slot < nr_slots; | 883 | xenvif_tx_create_map_op(vif, pending_idx, txp, gop); |
| 804 | shinfo->nr_frags++) { | 884 | frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); |
| 805 | struct pending_tx_info *pending_tx_info = | 885 | } |
| 806 | vif->pending_tx_info; | ||
| 807 | 886 | ||
| 808 | page = alloc_page(GFP_ATOMIC|__GFP_COLD); | 887 | if (frag_overflow) { |
| 809 | if (!page) | 888 | struct sk_buff *nskb = xenvif_alloc_skb(0); |
| 810 | goto err; | 889 | if (unlikely(nskb == NULL)) { |
| 811 | 890 | if (net_ratelimit()) | |
| 812 | dst_offset = 0; | 891 | netdev_err(vif->dev, |
| 813 | first = NULL; | 892 | "Can't allocate the frag_list skb.\n"); |
| 814 | while (dst_offset < PAGE_SIZE && slot < nr_slots) { | 893 | return NULL; |
| 815 | gop->flags = GNTCOPY_source_gref; | 894 | } |
| 816 | |||
| 817 | gop->source.u.ref = txp->gref; | ||
| 818 | gop->source.domid = vif->domid; | ||
| 819 | gop->source.offset = txp->offset; | ||
| 820 | |||
| 821 | gop->dest.domid = DOMID_SELF; | ||
| 822 | |||
| 823 | gop->dest.offset = dst_offset; | ||
| 824 | gop->dest.u.gmfn = virt_to_mfn(page_address(page)); | ||
| 825 | |||
| 826 | if (dst_offset + txp->size > PAGE_SIZE) { | ||
| 827 | /* This page can only merge a portion | ||
| 828 | * of tx request. Do not increment any | ||
| 829 | * pointer / counter here. The txp | ||
| 830 | * will be dealt with in future | ||
| 831 | * rounds, eventually hitting the | ||
| 832 | * `else` branch. | ||
| 833 | */ | ||
| 834 | gop->len = PAGE_SIZE - dst_offset; | ||
| 835 | txp->offset += gop->len; | ||
| 836 | txp->size -= gop->len; | ||
| 837 | dst_offset += gop->len; /* quit loop */ | ||
| 838 | } else { | ||
| 839 | /* This tx request can be merged in the page */ | ||
| 840 | gop->len = txp->size; | ||
| 841 | dst_offset += gop->len; | ||
| 842 | |||
| 843 | index = pending_index(vif->pending_cons++); | ||
| 844 | |||
| 845 | pending_idx = vif->pending_ring[index]; | ||
| 846 | |||
| 847 | memcpy(&pending_tx_info[pending_idx].req, txp, | ||
| 848 | sizeof(*txp)); | ||
| 849 | |||
| 850 | /* Poison these fields, corresponding | ||
| 851 | * fields for head tx req will be set | ||
| 852 | * to correct values after the loop. | ||
| 853 | */ | ||
| 854 | vif->mmap_pages[pending_idx] = (void *)(~0UL); | ||
| 855 | pending_tx_info[pending_idx].head = | ||
| 856 | INVALID_PENDING_RING_IDX; | ||
| 857 | |||
| 858 | if (!first) { | ||
| 859 | first = &pending_tx_info[pending_idx]; | ||
| 860 | start_idx = index; | ||
| 861 | head_idx = pending_idx; | ||
| 862 | } | ||
| 863 | |||
| 864 | txp++; | ||
| 865 | slot++; | ||
| 866 | } | ||
| 867 | 895 | ||
| 868 | gop++; | 896 | shinfo = skb_shinfo(nskb); |
| 897 | frags = shinfo->frags; | ||
| 898 | |||
| 899 | for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow; | ||
| 900 | shinfo->nr_frags++, txp++, gop++) { | ||
| 901 | index = pending_index(vif->pending_cons++); | ||
| 902 | pending_idx = vif->pending_ring[index]; | ||
| 903 | xenvif_tx_create_map_op(vif, pending_idx, txp, gop); | ||
| 904 | frag_set_pending_idx(&frags[shinfo->nr_frags], | ||
| 905 | pending_idx); | ||
| 869 | } | 906 | } |
| 870 | 907 | ||
| 871 | first->req.offset = 0; | 908 | skb_shinfo(skb)->frag_list = nskb; |
| 872 | first->req.size = dst_offset; | ||
| 873 | first->head = start_idx; | ||
| 874 | vif->mmap_pages[head_idx] = page; | ||
| 875 | frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); | ||
| 876 | } | 909 | } |
| 877 | 910 | ||
| 878 | BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); | ||
| 879 | |||
| 880 | return gop; | 911 | return gop; |
| 881 | err: | 912 | } |
| 882 | /* Unwind, freeing all pages and sending error responses. */ | 913 | |
| 883 | while (shinfo->nr_frags-- > start) { | 914 | static inline void xenvif_grant_handle_set(struct xenvif *vif, |
| 884 | xenvif_idx_release(vif, | 915 | u16 pending_idx, |
| 885 | frag_get_pending_idx(&frags[shinfo->nr_frags]), | 916 | grant_handle_t handle) |
| 886 | XEN_NETIF_RSP_ERROR); | 917 | { |
| 918 | if (unlikely(vif->grant_tx_handle[pending_idx] != | ||
| 919 | NETBACK_INVALID_HANDLE)) { | ||
| 920 | netdev_err(vif->dev, | ||
| 921 | "Trying to overwrite active handle! pending_idx: %x\n", | ||
| 922 | pending_idx); | ||
| 923 | BUG(); | ||
| 887 | } | 924 | } |
| 888 | /* The head too, if necessary. */ | 925 | vif->grant_tx_handle[pending_idx] = handle; |
| 889 | if (start) | 926 | } |
| 890 | xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); | ||
| 891 | 927 | ||
| 892 | return NULL; | 928 | static inline void xenvif_grant_handle_reset(struct xenvif *vif, |
| 929 | u16 pending_idx) | ||
| 930 | { | ||
| 931 | if (unlikely(vif->grant_tx_handle[pending_idx] == | ||
| 932 | NETBACK_INVALID_HANDLE)) { | ||
| 933 | netdev_err(vif->dev, | ||
| 934 | "Trying to unmap invalid handle! pending_idx: %x\n", | ||
| 935 | pending_idx); | ||
| 936 | BUG(); | ||
| 937 | } | ||
| 938 | vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE; | ||
| 893 | } | 939 | } |
| 894 | 940 | ||
| 895 | static int xenvif_tx_check_gop(struct xenvif *vif, | 941 | static int xenvif_tx_check_gop(struct xenvif *vif, |
| 896 | struct sk_buff *skb, | 942 | struct sk_buff *skb, |
| 897 | struct gnttab_copy **gopp) | 943 | struct gnttab_map_grant_ref **gopp_map, |
| 944 | struct gnttab_copy **gopp_copy) | ||
| 898 | { | 945 | { |
| 899 | struct gnttab_copy *gop = *gopp; | 946 | struct gnttab_map_grant_ref *gop_map = *gopp_map; |
| 900 | u16 pending_idx = *((u16 *)skb->data); | 947 | u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; |
| 901 | struct skb_shared_info *shinfo = skb_shinfo(skb); | 948 | struct skb_shared_info *shinfo = skb_shinfo(skb); |
| 902 | struct pending_tx_info *tx_info; | ||
| 903 | int nr_frags = shinfo->nr_frags; | 949 | int nr_frags = shinfo->nr_frags; |
| 904 | int i, err, start; | 950 | int i, err; |
| 905 | u16 peek; /* peek into next tx request */ | 951 | struct sk_buff *first_skb = NULL; |
| 906 | 952 | ||
| 907 | /* Check status of header. */ | 953 | /* Check status of header. */ |
| 908 | err = gop->status; | 954 | err = (*gopp_copy)->status; |
| 909 | if (unlikely(err)) | 955 | (*gopp_copy)++; |
| 956 | if (unlikely(err)) { | ||
| 957 | if (net_ratelimit()) | ||
| 958 | netdev_dbg(vif->dev, | ||
| 959 | "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", | ||
| 960 | (*gopp_copy)->status, | ||
| 961 | pending_idx, | ||
| 962 | (*gopp_copy)->source.u.ref); | ||
| 910 | xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); | 963 | xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); |
| 964 | } | ||
| 911 | 965 | ||
| 912 | /* Skip first skb fragment if it is on same page as header fragment. */ | 966 | check_frags: |
| 913 | start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); | 967 | for (i = 0; i < nr_frags; i++, gop_map++) { |
| 914 | |||
| 915 | for (i = start; i < nr_frags; i++) { | ||
| 916 | int j, newerr; | 968 | int j, newerr; |
| 917 | pending_ring_idx_t head; | ||
| 918 | 969 | ||
| 919 | pending_idx = frag_get_pending_idx(&shinfo->frags[i]); | 970 | pending_idx = frag_get_pending_idx(&shinfo->frags[i]); |
| 920 | tx_info = &vif->pending_tx_info[pending_idx]; | ||
| 921 | head = tx_info->head; | ||
| 922 | 971 | ||
| 923 | /* Check error status: if okay then remember grant handle. */ | 972 | /* Check error status: if okay then remember grant handle. */ |
| 924 | do { | 973 | newerr = gop_map->status; |
| 925 | newerr = (++gop)->status; | ||
| 926 | if (newerr) | ||
| 927 | break; | ||
| 928 | peek = vif->pending_ring[pending_index(++head)]; | ||
| 929 | } while (!pending_tx_is_head(vif, peek)); | ||
| 930 | 974 | ||
| 931 | if (likely(!newerr)) { | 975 | if (likely(!newerr)) { |
| 976 | xenvif_grant_handle_set(vif, | ||
| 977 | pending_idx, | ||
| 978 | gop_map->handle); | ||
| 932 | /* Had a previous error? Invalidate this fragment. */ | 979 | /* Had a previous error? Invalidate this fragment. */ |
| 933 | if (unlikely(err)) | 980 | if (unlikely(err)) |
| 934 | xenvif_idx_release(vif, pending_idx, | 981 | xenvif_idx_unmap(vif, pending_idx); |
| 935 | XEN_NETIF_RSP_OKAY); | ||
| 936 | continue; | 982 | continue; |
| 937 | } | 983 | } |
| 938 | 984 | ||
| 939 | /* Error on this fragment: respond to client with an error. */ | 985 | /* Error on this fragment: respond to client with an error. */ |
| 986 | if (net_ratelimit()) | ||
| 987 | netdev_dbg(vif->dev, | ||
| 988 | "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n", | ||
| 989 | i, | ||
| 990 | gop_map->status, | ||
| 991 | pending_idx, | ||
| 992 | gop_map->ref); | ||
| 940 | xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); | 993 | xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); |
| 941 | 994 | ||
| 942 | /* Not the first error? Preceding frags already invalidated. */ | 995 | /* Not the first error? Preceding frags already invalidated. */ |
| 943 | if (err) | 996 | if (err) |
| 944 | continue; | 997 | continue; |
| 945 | 998 | /* First error: invalidate preceding fragments. */ | |
| 946 | /* First error: invalidate header and preceding fragments. */ | 999 | for (j = 0; j < i; j++) { |
| 947 | pending_idx = *((u16 *)skb->data); | ||
| 948 | xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); | ||
| 949 | for (j = start; j < i; j++) { | ||
| 950 | pending_idx = frag_get_pending_idx(&shinfo->frags[j]); | 1000 | pending_idx = frag_get_pending_idx(&shinfo->frags[j]); |
| 951 | xenvif_idx_release(vif, pending_idx, | 1001 | xenvif_idx_unmap(vif, pending_idx); |
| 952 | XEN_NETIF_RSP_OKAY); | ||
| 953 | } | 1002 | } |
| 954 | 1003 | ||
| 955 | /* Remember the error: invalidate all subsequent fragments. */ | 1004 | /* Remember the error: invalidate all subsequent fragments. */ |
| 956 | err = newerr; | 1005 | err = newerr; |
| 957 | } | 1006 | } |
| 958 | 1007 | ||
| 959 | *gopp = gop + 1; | 1008 | if (skb_has_frag_list(skb)) { |
| 1009 | first_skb = skb; | ||
| 1010 | skb = shinfo->frag_list; | ||
| 1011 | shinfo = skb_shinfo(skb); | ||
| 1012 | nr_frags = shinfo->nr_frags; | ||
| 1013 | |||
| 1014 | goto check_frags; | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | /* There was a mapping error in the frag_list skb. We have to unmap | ||
| 1018 | * the first skb's frags | ||
| 1019 | */ | ||
| 1020 | if (first_skb && err) { | ||
| 1021 | int j; | ||
| 1022 | shinfo = skb_shinfo(first_skb); | ||
| 1023 | for (j = 0; j < shinfo->nr_frags; j++) { | ||
| 1024 | pending_idx = frag_get_pending_idx(&shinfo->frags[j]); | ||
| 1025 | xenvif_idx_unmap(vif, pending_idx); | ||
| 1026 | } | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | *gopp_map = gop_map; | ||
| 960 | return err; | 1030 | return err; |
| 961 | } | 1031 | } |
| 962 | 1032 | ||
| @@ -965,6 +1035,7 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) | |||
| 965 | struct skb_shared_info *shinfo = skb_shinfo(skb); | 1035 | struct skb_shared_info *shinfo = skb_shinfo(skb); |
| 966 | int nr_frags = shinfo->nr_frags; | 1036 | int nr_frags = shinfo->nr_frags; |
| 967 | int i; | 1037 | int i; |
| 1038 | u16 prev_pending_idx = INVALID_PENDING_IDX; | ||
| 968 | 1039 | ||
| 969 | for (i = 0; i < nr_frags; i++) { | 1040 | for (i = 0; i < nr_frags; i++) { |
| 970 | skb_frag_t *frag = shinfo->frags + i; | 1041 | skb_frag_t *frag = shinfo->frags + i; |
| @@ -974,6 +1045,17 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) | |||
| 974 | 1045 | ||
| 975 | pending_idx = frag_get_pending_idx(frag); | 1046 | pending_idx = frag_get_pending_idx(frag); |
| 976 | 1047 | ||
| 1048 | /* If this is not the first frag, chain it to the previous*/ | ||
| 1049 | if (prev_pending_idx == INVALID_PENDING_IDX) | ||
| 1050 | skb_shinfo(skb)->destructor_arg = | ||
| 1051 | &callback_param(vif, pending_idx); | ||
| 1052 | else | ||
| 1053 | callback_param(vif, prev_pending_idx).ctx = | ||
| 1054 | &callback_param(vif, pending_idx); | ||
| 1055 | |||
| 1056 | callback_param(vif, pending_idx).ctx = NULL; | ||
| 1057 | prev_pending_idx = pending_idx; | ||
| 1058 | |||
| 977 | txp = &vif->pending_tx_info[pending_idx].req; | 1059 | txp = &vif->pending_tx_info[pending_idx].req; |
| 978 | page = virt_to_page(idx_to_kaddr(vif, pending_idx)); | 1060 | page = virt_to_page(idx_to_kaddr(vif, pending_idx)); |
| 979 | __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); | 1061 | __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); |
| @@ -981,10 +1063,15 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) | |||
| 981 | skb->data_len += txp->size; | 1063 | skb->data_len += txp->size; |
| 982 | skb->truesize += txp->size; | 1064 | skb->truesize += txp->size; |
| 983 | 1065 | ||
| 984 | /* Take an extra reference to offset xenvif_idx_release */ | 1066 | /* Take an extra reference to offset network stack's put_page */ |
| 985 | get_page(vif->mmap_pages[pending_idx]); | 1067 | get_page(vif->mmap_pages[pending_idx]); |
| 986 | xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); | ||
| 987 | } | 1068 | } |
| 1069 | /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc | ||
| 1070 | * overlaps with "index", and "mapping" is not set. I think mapping | ||
| 1071 | * should be set. If delivered to local stack, it would drop this | ||
| 1072 | * skb in sk_filter unless the socket has the right to use it. | ||
| 1073 | */ | ||
| 1074 | skb->pfmemalloc = false; | ||
| 988 | } | 1075 | } |
| 989 | 1076 | ||
| 990 | static int xenvif_get_extras(struct xenvif *vif, | 1077 | static int xenvif_get_extras(struct xenvif *vif, |
| @@ -1102,18 +1189,18 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) | |||
| 1102 | return false; | 1189 | return false; |
| 1103 | } | 1190 | } |
| 1104 | 1191 | ||
| 1105 | static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | 1192 | static void xenvif_tx_build_gops(struct xenvif *vif, |
| 1193 | int budget, | ||
| 1194 | unsigned *copy_ops, | ||
| 1195 | unsigned *map_ops) | ||
| 1106 | { | 1196 | { |
| 1107 | struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop; | 1197 | struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop; |
| 1108 | struct sk_buff *skb; | 1198 | struct sk_buff *skb; |
| 1109 | int ret; | 1199 | int ret; |
| 1110 | 1200 | ||
| 1111 | while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX | 1201 | while (skb_queue_len(&vif->tx_queue) < budget) { |
| 1112 | < MAX_PENDING_REQS) && | ||
| 1113 | (skb_queue_len(&vif->tx_queue) < budget)) { | ||
| 1114 | struct xen_netif_tx_request txreq; | 1202 | struct xen_netif_tx_request txreq; |
| 1115 | struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; | 1203 | struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; |
| 1116 | struct page *page; | ||
| 1117 | struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; | 1204 | struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; |
| 1118 | u16 pending_idx; | 1205 | u16 pending_idx; |
| 1119 | RING_IDX idx; | 1206 | RING_IDX idx; |
| @@ -1129,7 +1216,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | |||
| 1129 | vif->tx.sring->req_prod, vif->tx.req_cons, | 1216 | vif->tx.sring->req_prod, vif->tx.req_cons, |
| 1130 | XEN_NETIF_TX_RING_SIZE); | 1217 | XEN_NETIF_TX_RING_SIZE); |
| 1131 | xenvif_fatal_tx_err(vif); | 1218 | xenvif_fatal_tx_err(vif); |
| 1132 | continue; | 1219 | break; |
| 1133 | } | 1220 | } |
| 1134 | 1221 | ||
| 1135 | work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx); | 1222 | work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx); |
| @@ -1189,8 +1276,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | |||
| 1189 | ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? | 1276 | ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? |
| 1190 | PKT_PROT_LEN : txreq.size; | 1277 | PKT_PROT_LEN : txreq.size; |
| 1191 | 1278 | ||
| 1192 | skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, | 1279 | skb = xenvif_alloc_skb(data_len); |
| 1193 | GFP_ATOMIC | __GFP_NOWARN); | ||
| 1194 | if (unlikely(skb == NULL)) { | 1280 | if (unlikely(skb == NULL)) { |
| 1195 | netdev_dbg(vif->dev, | 1281 | netdev_dbg(vif->dev, |
| 1196 | "Can't allocate a skb in start_xmit.\n"); | 1282 | "Can't allocate a skb in start_xmit.\n"); |
| @@ -1198,9 +1284,6 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | |||
| 1198 | break; | 1284 | break; |
| 1199 | } | 1285 | } |
| 1200 | 1286 | ||
| 1201 | /* Packets passed to netif_rx() must have some headroom. */ | ||
| 1202 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); | ||
| 1203 | |||
| 1204 | if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { | 1287 | if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { |
| 1205 | struct xen_netif_extra_info *gso; | 1288 | struct xen_netif_extra_info *gso; |
| 1206 | gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; | 1289 | gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; |
| @@ -1212,42 +1295,36 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | |||
| 1212 | } | 1295 | } |
| 1213 | } | 1296 | } |
| 1214 | 1297 | ||
| 1215 | /* XXX could copy straight to head */ | 1298 | XENVIF_TX_CB(skb)->pending_idx = pending_idx; |
| 1216 | page = xenvif_alloc_page(vif, pending_idx); | ||
| 1217 | if (!page) { | ||
| 1218 | kfree_skb(skb); | ||
| 1219 | xenvif_tx_err(vif, &txreq, idx); | ||
| 1220 | break; | ||
| 1221 | } | ||
| 1222 | |||
| 1223 | gop->source.u.ref = txreq.gref; | ||
| 1224 | gop->source.domid = vif->domid; | ||
| 1225 | gop->source.offset = txreq.offset; | ||
| 1226 | 1299 | ||
| 1227 | gop->dest.u.gmfn = virt_to_mfn(page_address(page)); | 1300 | __skb_put(skb, data_len); |
| 1228 | gop->dest.domid = DOMID_SELF; | 1301 | vif->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; |
| 1229 | gop->dest.offset = txreq.offset; | 1302 | vif->tx_copy_ops[*copy_ops].source.domid = vif->domid; |
| 1303 | vif->tx_copy_ops[*copy_ops].source.offset = txreq.offset; | ||
| 1230 | 1304 | ||
| 1231 | gop->len = txreq.size; | 1305 | vif->tx_copy_ops[*copy_ops].dest.u.gmfn = |
| 1232 | gop->flags = GNTCOPY_source_gref; | 1306 | virt_to_mfn(skb->data); |
| 1307 | vif->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; | ||
| 1308 | vif->tx_copy_ops[*copy_ops].dest.offset = | ||
| 1309 | offset_in_page(skb->data); | ||
| 1233 | 1310 | ||
| 1234 | gop++; | 1311 | vif->tx_copy_ops[*copy_ops].len = data_len; |
| 1312 | vif->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; | ||
| 1235 | 1313 | ||
| 1236 | memcpy(&vif->pending_tx_info[pending_idx].req, | 1314 | (*copy_ops)++; |
| 1237 | &txreq, sizeof(txreq)); | ||
| 1238 | vif->pending_tx_info[pending_idx].head = index; | ||
| 1239 | *((u16 *)skb->data) = pending_idx; | ||
| 1240 | |||
| 1241 | __skb_put(skb, data_len); | ||
| 1242 | 1315 | ||
| 1243 | skb_shinfo(skb)->nr_frags = ret; | 1316 | skb_shinfo(skb)->nr_frags = ret; |
| 1244 | if (data_len < txreq.size) { | 1317 | if (data_len < txreq.size) { |
| 1245 | skb_shinfo(skb)->nr_frags++; | 1318 | skb_shinfo(skb)->nr_frags++; |
| 1246 | frag_set_pending_idx(&skb_shinfo(skb)->frags[0], | 1319 | frag_set_pending_idx(&skb_shinfo(skb)->frags[0], |
| 1247 | pending_idx); | 1320 | pending_idx); |
| 1321 | xenvif_tx_create_map_op(vif, pending_idx, &txreq, gop); | ||
| 1322 | gop++; | ||
| 1248 | } else { | 1323 | } else { |
| 1249 | frag_set_pending_idx(&skb_shinfo(skb)->frags[0], | 1324 | frag_set_pending_idx(&skb_shinfo(skb)->frags[0], |
| 1250 | INVALID_PENDING_IDX); | 1325 | INVALID_PENDING_IDX); |
| 1326 | memcpy(&vif->pending_tx_info[pending_idx].req, &txreq, | ||
| 1327 | sizeof(txreq)); | ||
| 1251 | } | 1328 | } |
| 1252 | 1329 | ||
| 1253 | vif->pending_cons++; | 1330 | vif->pending_cons++; |
| @@ -1264,17 +1341,85 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | |||
| 1264 | 1341 | ||
| 1265 | vif->tx.req_cons = idx; | 1342 | vif->tx.req_cons = idx; |
| 1266 | 1343 | ||
| 1267 | if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) | 1344 | if (((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops)) || |
| 1345 | (*copy_ops >= ARRAY_SIZE(vif->tx_copy_ops))) | ||
| 1268 | break; | 1346 | break; |
| 1269 | } | 1347 | } |
| 1270 | 1348 | ||
| 1271 | return gop - vif->tx_copy_ops; | 1349 | (*map_ops) = gop - vif->tx_map_ops; |
| 1350 | return; | ||
| 1272 | } | 1351 | } |
| 1273 | 1352 | ||
| 1353 | /* Consolidate skb with a frag_list into a brand new one with local pages on | ||
| 1354 | * frags. Returns 0 or -ENOMEM if can't allocate new pages. | ||
| 1355 | */ | ||
| 1356 | static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb) | ||
| 1357 | { | ||
| 1358 | unsigned int offset = skb_headlen(skb); | ||
| 1359 | skb_frag_t frags[MAX_SKB_FRAGS]; | ||
| 1360 | int i; | ||
| 1361 | struct ubuf_info *uarg; | ||
| 1362 | struct sk_buff *nskb = skb_shinfo(skb)->frag_list; | ||
| 1363 | |||
| 1364 | vif->tx_zerocopy_sent += 2; | ||
| 1365 | vif->tx_frag_overflow++; | ||
| 1366 | |||
| 1367 | xenvif_fill_frags(vif, nskb); | ||
| 1368 | /* Subtract frags size, we will correct it later */ | ||
| 1369 | skb->truesize -= skb->data_len; | ||
| 1370 | skb->len += nskb->len; | ||
| 1371 | skb->data_len += nskb->len; | ||
| 1372 | |||
| 1373 | /* create a brand new frags array and coalesce there */ | ||
| 1374 | for (i = 0; offset < skb->len; i++) { | ||
| 1375 | struct page *page; | ||
| 1376 | unsigned int len; | ||
| 1377 | |||
| 1378 | BUG_ON(i >= MAX_SKB_FRAGS); | ||
| 1379 | page = alloc_page(GFP_ATOMIC|__GFP_COLD); | ||
| 1380 | if (!page) { | ||
| 1381 | int j; | ||
| 1382 | skb->truesize += skb->data_len; | ||
| 1383 | for (j = 0; j < i; j++) | ||
| 1384 | put_page(frags[j].page.p); | ||
| 1385 | return -ENOMEM; | ||
| 1386 | } | ||
| 1387 | |||
| 1388 | if (offset + PAGE_SIZE < skb->len) | ||
| 1389 | len = PAGE_SIZE; | ||
| 1390 | else | ||
| 1391 | len = skb->len - offset; | ||
| 1392 | if (skb_copy_bits(skb, offset, page_address(page), len)) | ||
| 1393 | BUG(); | ||
| 1394 | |||
| 1395 | offset += len; | ||
| 1396 | frags[i].page.p = page; | ||
| 1397 | frags[i].page_offset = 0; | ||
| 1398 | skb_frag_size_set(&frags[i], len); | ||
| 1399 | } | ||
| 1400 | /* swap out with old one */ | ||
| 1401 | memcpy(skb_shinfo(skb)->frags, | ||
| 1402 | frags, | ||
| 1403 | i * sizeof(skb_frag_t)); | ||
| 1404 | skb_shinfo(skb)->nr_frags = i; | ||
| 1405 | skb->truesize += i * PAGE_SIZE; | ||
| 1406 | |||
| 1407 | /* remove traces of mapped pages and frag_list */ | ||
| 1408 | skb_frag_list_init(skb); | ||
| 1409 | uarg = skb_shinfo(skb)->destructor_arg; | ||
| 1410 | uarg->callback(uarg, true); | ||
| 1411 | skb_shinfo(skb)->destructor_arg = NULL; | ||
| 1412 | |||
| 1413 | skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | ||
| 1414 | kfree_skb(nskb); | ||
| 1415 | |||
| 1416 | return 0; | ||
| 1417 | } | ||
| 1274 | 1418 | ||
| 1275 | static int xenvif_tx_submit(struct xenvif *vif) | 1419 | static int xenvif_tx_submit(struct xenvif *vif) |
| 1276 | { | 1420 | { |
| 1277 | struct gnttab_copy *gop = vif->tx_copy_ops; | 1421 | struct gnttab_map_grant_ref *gop_map = vif->tx_map_ops; |
| 1422 | struct gnttab_copy *gop_copy = vif->tx_copy_ops; | ||
| 1278 | struct sk_buff *skb; | 1423 | struct sk_buff *skb; |
| 1279 | int work_done = 0; | 1424 | int work_done = 0; |
| 1280 | 1425 | ||
| @@ -1283,21 +1428,18 @@ static int xenvif_tx_submit(struct xenvif *vif) | |||
| 1283 | u16 pending_idx; | 1428 | u16 pending_idx; |
| 1284 | unsigned data_len; | 1429 | unsigned data_len; |
| 1285 | 1430 | ||
| 1286 | pending_idx = *((u16 *)skb->data); | 1431 | pending_idx = XENVIF_TX_CB(skb)->pending_idx; |
| 1287 | txp = &vif->pending_tx_info[pending_idx].req; | 1432 | txp = &vif->pending_tx_info[pending_idx].req; |
| 1288 | 1433 | ||
| 1289 | /* Check the remap error code. */ | 1434 | /* Check the remap error code. */ |
| 1290 | if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) { | 1435 | if (unlikely(xenvif_tx_check_gop(vif, skb, &gop_map, &gop_copy))) { |
| 1291 | netdev_dbg(vif->dev, "netback grant failed.\n"); | ||
| 1292 | skb_shinfo(skb)->nr_frags = 0; | 1436 | skb_shinfo(skb)->nr_frags = 0; |
| 1293 | kfree_skb(skb); | 1437 | kfree_skb(skb); |
| 1294 | continue; | 1438 | continue; |
| 1295 | } | 1439 | } |
| 1296 | 1440 | ||
| 1297 | data_len = skb->len; | 1441 | data_len = skb->len; |
| 1298 | memcpy(skb->data, | 1442 | callback_param(vif, pending_idx).ctx = NULL; |
| 1299 | (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset), | ||
| 1300 | data_len); | ||
| 1301 | if (data_len < txp->size) { | 1443 | if (data_len < txp->size) { |
| 1302 | /* Append the packet payload as a fragment. */ | 1444 | /* Append the packet payload as a fragment. */ |
| 1303 | txp->offset += data_len; | 1445 | txp->offset += data_len; |
| @@ -1315,6 +1457,17 @@ static int xenvif_tx_submit(struct xenvif *vif) | |||
| 1315 | 1457 | ||
| 1316 | xenvif_fill_frags(vif, skb); | 1458 | xenvif_fill_frags(vif, skb); |
| 1317 | 1459 | ||
| 1460 | if (unlikely(skb_has_frag_list(skb))) { | ||
| 1461 | if (xenvif_handle_frag_list(vif, skb)) { | ||
| 1462 | if (net_ratelimit()) | ||
| 1463 | netdev_err(vif->dev, | ||
| 1464 | "Not enough memory to consolidate frag_list!\n"); | ||
| 1465 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | ||
| 1466 | kfree_skb(skb); | ||
| 1467 | continue; | ||
| 1468 | } | ||
| 1469 | } | ||
| 1470 | |||
| 1318 | if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { | 1471 | if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { |
| 1319 | int target = min_t(int, skb->len, PKT_PROT_LEN); | 1472 | int target = min_t(int, skb->len, PKT_PROT_LEN); |
| 1320 | __pskb_pull_tail(skb, target - skb_headlen(skb)); | 1473 | __pskb_pull_tail(skb, target - skb_headlen(skb)); |
| @@ -1327,6 +1480,9 @@ static int xenvif_tx_submit(struct xenvif *vif) | |||
| 1327 | if (checksum_setup(vif, skb)) { | 1480 | if (checksum_setup(vif, skb)) { |
| 1328 | netdev_dbg(vif->dev, | 1481 | netdev_dbg(vif->dev, |
| 1329 | "Can't setup checksum in net_tx_action\n"); | 1482 | "Can't setup checksum in net_tx_action\n"); |
| 1483 | /* We have to set this flag to trigger the callback */ | ||
| 1484 | if (skb_shinfo(skb)->destructor_arg) | ||
| 1485 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | ||
| 1330 | kfree_skb(skb); | 1486 | kfree_skb(skb); |
| 1331 | continue; | 1487 | continue; |
| 1332 | } | 1488 | } |
| @@ -1352,27 +1508,143 @@ static int xenvif_tx_submit(struct xenvif *vif) | |||
| 1352 | 1508 | ||
| 1353 | work_done++; | 1509 | work_done++; |
| 1354 | 1510 | ||
| 1511 | /* Set this flag right before netif_receive_skb, otherwise | ||
| 1512 | * someone might think this packet already left netback, and | ||
| 1513 | * do a skb_copy_ubufs while we are still in control of the | ||
| 1514 | * skb. E.g. the __pskb_pull_tail earlier can do such thing. | ||
| 1515 | */ | ||
| 1516 | if (skb_shinfo(skb)->destructor_arg) { | ||
| 1517 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | ||
| 1518 | vif->tx_zerocopy_sent++; | ||
| 1519 | } | ||
| 1520 | |||
| 1355 | netif_receive_skb(skb); | 1521 | netif_receive_skb(skb); |
| 1356 | } | 1522 | } |
| 1357 | 1523 | ||
| 1358 | return work_done; | 1524 | return work_done; |
| 1359 | } | 1525 | } |
| 1360 | 1526 | ||
| 1527 | void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) | ||
| 1528 | { | ||
| 1529 | unsigned long flags; | ||
| 1530 | pending_ring_idx_t index; | ||
| 1531 | struct xenvif *vif = ubuf_to_vif(ubuf); | ||
| 1532 | |||
| 1533 | /* This is the only place where we grab this lock, to protect callbacks | ||
| 1534 | * from each other. | ||
| 1535 | */ | ||
| 1536 | spin_lock_irqsave(&vif->callback_lock, flags); | ||
| 1537 | do { | ||
| 1538 | u16 pending_idx = ubuf->desc; | ||
| 1539 | ubuf = (struct ubuf_info *) ubuf->ctx; | ||
| 1540 | BUG_ON(vif->dealloc_prod - vif->dealloc_cons >= | ||
| 1541 | MAX_PENDING_REQS); | ||
| 1542 | index = pending_index(vif->dealloc_prod); | ||
| 1543 | vif->dealloc_ring[index] = pending_idx; | ||
| 1544 | /* Sync with xenvif_tx_dealloc_action: | ||
| 1545 | * insert idx then incr producer. | ||
| 1546 | */ | ||
| 1547 | smp_wmb(); | ||
| 1548 | vif->dealloc_prod++; | ||
| 1549 | } while (ubuf); | ||
| 1550 | wake_up(&vif->dealloc_wq); | ||
| 1551 | spin_unlock_irqrestore(&vif->callback_lock, flags); | ||
| 1552 | |||
| 1553 | if (likely(zerocopy_success)) | ||
| 1554 | vif->tx_zerocopy_success++; | ||
| 1555 | else | ||
| 1556 | vif->tx_zerocopy_fail++; | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | static inline void xenvif_tx_dealloc_action(struct xenvif *vif) | ||
| 1560 | { | ||
| 1561 | struct gnttab_unmap_grant_ref *gop; | ||
| 1562 | pending_ring_idx_t dc, dp; | ||
| 1563 | u16 pending_idx, pending_idx_release[MAX_PENDING_REQS]; | ||
| 1564 | unsigned int i = 0; | ||
| 1565 | |||
| 1566 | dc = vif->dealloc_cons; | ||
| 1567 | gop = vif->tx_unmap_ops; | ||
| 1568 | |||
| 1569 | /* Free up any grants we have finished using */ | ||
| 1570 | do { | ||
| 1571 | dp = vif->dealloc_prod; | ||
| 1572 | |||
| 1573 | /* Ensure we see all indices enqueued by all | ||
| 1574 | * xenvif_zerocopy_callback(). | ||
| 1575 | */ | ||
| 1576 | smp_rmb(); | ||
| 1577 | |||
| 1578 | while (dc != dp) { | ||
| 1579 | BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS); | ||
| 1580 | pending_idx = | ||
| 1581 | vif->dealloc_ring[pending_index(dc++)]; | ||
| 1582 | |||
| 1583 | pending_idx_release[gop-vif->tx_unmap_ops] = | ||
| 1584 | pending_idx; | ||
| 1585 | vif->pages_to_unmap[gop-vif->tx_unmap_ops] = | ||
| 1586 | vif->mmap_pages[pending_idx]; | ||
| 1587 | gnttab_set_unmap_op(gop, | ||
| 1588 | idx_to_kaddr(vif, pending_idx), | ||
| 1589 | GNTMAP_host_map, | ||
| 1590 | vif->grant_tx_handle[pending_idx]); | ||
| 1591 | xenvif_grant_handle_reset(vif, pending_idx); | ||
| 1592 | ++gop; | ||
| 1593 | } | ||
| 1594 | |||
| 1595 | } while (dp != vif->dealloc_prod); | ||
| 1596 | |||
| 1597 | vif->dealloc_cons = dc; | ||
| 1598 | |||
| 1599 | if (gop - vif->tx_unmap_ops > 0) { | ||
| 1600 | int ret; | ||
| 1601 | ret = gnttab_unmap_refs(vif->tx_unmap_ops, | ||
| 1602 | NULL, | ||
| 1603 | vif->pages_to_unmap, | ||
| 1604 | gop - vif->tx_unmap_ops); | ||
| 1605 | if (ret) { | ||
| 1606 | netdev_err(vif->dev, "Unmap fail: nr_ops %tx ret %d\n", | ||
| 1607 | gop - vif->tx_unmap_ops, ret); | ||
| 1608 | for (i = 0; i < gop - vif->tx_unmap_ops; ++i) { | ||
| 1609 | if (gop[i].status != GNTST_okay) | ||
| 1610 | netdev_err(vif->dev, | ||
| 1611 | " host_addr: %llx handle: %x status: %d\n", | ||
| 1612 | gop[i].host_addr, | ||
| 1613 | gop[i].handle, | ||
| 1614 | gop[i].status); | ||
| 1615 | } | ||
| 1616 | BUG(); | ||
| 1617 | } | ||
| 1618 | } | ||
| 1619 | |||
| 1620 | for (i = 0; i < gop - vif->tx_unmap_ops; ++i) | ||
| 1621 | xenvif_idx_release(vif, pending_idx_release[i], | ||
| 1622 | XEN_NETIF_RSP_OKAY); | ||
| 1623 | } | ||
| 1624 | |||
| 1625 | |||
| 1361 | /* Called after netfront has transmitted */ | 1626 | /* Called after netfront has transmitted */ |
| 1362 | int xenvif_tx_action(struct xenvif *vif, int budget) | 1627 | int xenvif_tx_action(struct xenvif *vif, int budget) |
| 1363 | { | 1628 | { |
| 1364 | unsigned nr_gops; | 1629 | unsigned nr_mops, nr_cops = 0; |
| 1365 | int work_done; | 1630 | int work_done, ret; |
| 1366 | 1631 | ||
| 1367 | if (unlikely(!tx_work_todo(vif))) | 1632 | if (unlikely(!tx_work_todo(vif))) |
| 1368 | return 0; | 1633 | return 0; |
| 1369 | 1634 | ||
| 1370 | nr_gops = xenvif_tx_build_gops(vif, budget); | 1635 | xenvif_tx_build_gops(vif, budget, &nr_cops, &nr_mops); |
| 1371 | 1636 | ||
| 1372 | if (nr_gops == 0) | 1637 | if (nr_cops == 0) |
| 1373 | return 0; | 1638 | return 0; |
| 1374 | 1639 | ||
| 1375 | gnttab_batch_copy(vif->tx_copy_ops, nr_gops); | 1640 | gnttab_batch_copy(vif->tx_copy_ops, nr_cops); |
| 1641 | if (nr_mops != 0) { | ||
| 1642 | ret = gnttab_map_refs(vif->tx_map_ops, | ||
| 1643 | NULL, | ||
| 1644 | vif->pages_to_map, | ||
| 1645 | nr_mops); | ||
| 1646 | BUG_ON(ret); | ||
| 1647 | } | ||
| 1376 | 1648 | ||
| 1377 | work_done = xenvif_tx_submit(vif); | 1649 | work_done = xenvif_tx_submit(vif); |
| 1378 | 1650 | ||
| @@ -1383,45 +1655,18 @@ static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, | |||
| 1383 | u8 status) | 1655 | u8 status) |
| 1384 | { | 1656 | { |
| 1385 | struct pending_tx_info *pending_tx_info; | 1657 | struct pending_tx_info *pending_tx_info; |
| 1386 | pending_ring_idx_t head; | 1658 | pending_ring_idx_t index; |
| 1387 | u16 peek; /* peek into next tx request */ | 1659 | unsigned long flags; |
| 1388 | |||
| 1389 | BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL)); | ||
| 1390 | |||
| 1391 | /* Already complete? */ | ||
| 1392 | if (vif->mmap_pages[pending_idx] == NULL) | ||
| 1393 | return; | ||
| 1394 | 1660 | ||
| 1395 | pending_tx_info = &vif->pending_tx_info[pending_idx]; | 1661 | pending_tx_info = &vif->pending_tx_info[pending_idx]; |
| 1396 | 1662 | spin_lock_irqsave(&vif->response_lock, flags); | |
| 1397 | head = pending_tx_info->head; | 1663 | make_tx_response(vif, &pending_tx_info->req, status); |
| 1398 | 1664 | index = pending_index(vif->pending_prod); | |
| 1399 | BUG_ON(!pending_tx_is_head(vif, head)); | 1665 | vif->pending_ring[index] = pending_idx; |
| 1400 | BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx); | 1666 | /* TX shouldn't use the index before we give it back here */ |
| 1401 | 1667 | mb(); | |
| 1402 | do { | 1668 | vif->pending_prod++; |
| 1403 | pending_ring_idx_t index; | 1669 | spin_unlock_irqrestore(&vif->response_lock, flags); |
| 1404 | pending_ring_idx_t idx = pending_index(head); | ||
| 1405 | u16 info_idx = vif->pending_ring[idx]; | ||
| 1406 | |||
| 1407 | pending_tx_info = &vif->pending_tx_info[info_idx]; | ||
| 1408 | make_tx_response(vif, &pending_tx_info->req, status); | ||
| 1409 | |||
| 1410 | /* Setting any number other than | ||
| 1411 | * INVALID_PENDING_RING_IDX indicates this slot is | ||
| 1412 | * starting a new packet / ending a previous packet. | ||
| 1413 | */ | ||
| 1414 | pending_tx_info->head = 0; | ||
| 1415 | |||
| 1416 | index = pending_index(vif->pending_prod++); | ||
| 1417 | vif->pending_ring[index] = vif->pending_ring[info_idx]; | ||
| 1418 | |||
| 1419 | peek = vif->pending_ring[pending_index(++head)]; | ||
| 1420 | |||
| 1421 | } while (!pending_tx_is_head(vif, peek)); | ||
| 1422 | |||
| 1423 | put_page(vif->mmap_pages[pending_idx]); | ||
| 1424 | vif->mmap_pages[pending_idx] = NULL; | ||
| 1425 | } | 1670 | } |
| 1426 | 1671 | ||
| 1427 | 1672 | ||
| @@ -1469,23 +1714,54 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, | |||
| 1469 | return resp; | 1714 | return resp; |
| 1470 | } | 1715 | } |
| 1471 | 1716 | ||
| 1717 | void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx) | ||
| 1718 | { | ||
| 1719 | int ret; | ||
| 1720 | struct gnttab_unmap_grant_ref tx_unmap_op; | ||
| 1721 | |||
| 1722 | gnttab_set_unmap_op(&tx_unmap_op, | ||
| 1723 | idx_to_kaddr(vif, pending_idx), | ||
| 1724 | GNTMAP_host_map, | ||
| 1725 | vif->grant_tx_handle[pending_idx]); | ||
| 1726 | xenvif_grant_handle_reset(vif, pending_idx); | ||
| 1727 | |||
| 1728 | ret = gnttab_unmap_refs(&tx_unmap_op, NULL, | ||
| 1729 | &vif->mmap_pages[pending_idx], 1); | ||
| 1730 | if (ret) { | ||
| 1731 | netdev_err(vif->dev, | ||
| 1732 | "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n", | ||
| 1733 | ret, | ||
| 1734 | pending_idx, | ||
| 1735 | tx_unmap_op.host_addr, | ||
| 1736 | tx_unmap_op.handle, | ||
| 1737 | tx_unmap_op.status); | ||
| 1738 | BUG(); | ||
| 1739 | } | ||
| 1740 | |||
| 1741 | xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); | ||
| 1742 | } | ||
| 1743 | |||
| 1472 | static inline int rx_work_todo(struct xenvif *vif) | 1744 | static inline int rx_work_todo(struct xenvif *vif) |
| 1473 | { | 1745 | { |
| 1474 | return !skb_queue_empty(&vif->rx_queue) && | 1746 | return (!skb_queue_empty(&vif->rx_queue) && |
| 1475 | xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots); | 1747 | xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) || |
| 1748 | vif->rx_queue_purge; | ||
| 1476 | } | 1749 | } |
| 1477 | 1750 | ||
| 1478 | static inline int tx_work_todo(struct xenvif *vif) | 1751 | static inline int tx_work_todo(struct xenvif *vif) |
| 1479 | { | 1752 | { |
| 1480 | 1753 | ||
| 1481 | if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) && | 1754 | if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))) |
| 1482 | (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX | ||
| 1483 | < MAX_PENDING_REQS)) | ||
| 1484 | return 1; | 1755 | return 1; |
| 1485 | 1756 | ||
| 1486 | return 0; | 1757 | return 0; |
| 1487 | } | 1758 | } |
| 1488 | 1759 | ||
| 1760 | static inline bool tx_dealloc_work_todo(struct xenvif *vif) | ||
| 1761 | { | ||
| 1762 | return vif->dealloc_cons != vif->dealloc_prod; | ||
| 1763 | } | ||
| 1764 | |||
| 1489 | void xenvif_unmap_frontend_rings(struct xenvif *vif) | 1765 | void xenvif_unmap_frontend_rings(struct xenvif *vif) |
| 1490 | { | 1766 | { |
| 1491 | if (vif->tx.sring) | 1767 | if (vif->tx.sring) |
| @@ -1543,7 +1819,7 @@ static void xenvif_start_queue(struct xenvif *vif) | |||
| 1543 | netif_wake_queue(vif->dev); | 1819 | netif_wake_queue(vif->dev); |
| 1544 | } | 1820 | } |
| 1545 | 1821 | ||
| 1546 | int xenvif_kthread(void *data) | 1822 | int xenvif_kthread_guest_rx(void *data) |
| 1547 | { | 1823 | { |
| 1548 | struct xenvif *vif = data; | 1824 | struct xenvif *vif = data; |
| 1549 | struct sk_buff *skb; | 1825 | struct sk_buff *skb; |
| @@ -1551,16 +1827,34 @@ int xenvif_kthread(void *data) | |||
| 1551 | while (!kthread_should_stop()) { | 1827 | while (!kthread_should_stop()) { |
| 1552 | wait_event_interruptible(vif->wq, | 1828 | wait_event_interruptible(vif->wq, |
| 1553 | rx_work_todo(vif) || | 1829 | rx_work_todo(vif) || |
| 1830 | vif->disabled || | ||
| 1554 | kthread_should_stop()); | 1831 | kthread_should_stop()); |
| 1832 | |||
| 1833 | /* This frontend is found to be rogue, disable it in | ||
| 1834 | * kthread context. Currently this is only set when | ||
| 1835 | * netback finds out frontend sends malformed packet, | ||
| 1836 | * but we cannot disable the interface in softirq | ||
| 1837 | * context so we defer it here. | ||
| 1838 | */ | ||
| 1839 | if (unlikely(vif->disabled && netif_carrier_ok(vif->dev))) | ||
| 1840 | xenvif_carrier_off(vif); | ||
| 1841 | |||
| 1555 | if (kthread_should_stop()) | 1842 | if (kthread_should_stop()) |
| 1556 | break; | 1843 | break; |
| 1557 | 1844 | ||
| 1845 | if (vif->rx_queue_purge) { | ||
| 1846 | skb_queue_purge(&vif->rx_queue); | ||
| 1847 | vif->rx_queue_purge = false; | ||
| 1848 | } | ||
| 1849 | |||
| 1558 | if (!skb_queue_empty(&vif->rx_queue)) | 1850 | if (!skb_queue_empty(&vif->rx_queue)) |
| 1559 | xenvif_rx_action(vif); | 1851 | xenvif_rx_action(vif); |
| 1560 | 1852 | ||
| 1561 | if (skb_queue_empty(&vif->rx_queue) && | 1853 | if (skb_queue_empty(&vif->rx_queue) && |
| 1562 | netif_queue_stopped(vif->dev)) | 1854 | netif_queue_stopped(vif->dev)) { |
| 1855 | del_timer_sync(&vif->wake_queue); | ||
| 1563 | xenvif_start_queue(vif); | 1856 | xenvif_start_queue(vif); |
| 1857 | } | ||
| 1564 | 1858 | ||
| 1565 | cond_resched(); | 1859 | cond_resched(); |
| 1566 | } | 1860 | } |
| @@ -1572,6 +1866,28 @@ int xenvif_kthread(void *data) | |||
| 1572 | return 0; | 1866 | return 0; |
| 1573 | } | 1867 | } |
| 1574 | 1868 | ||
| 1869 | int xenvif_dealloc_kthread(void *data) | ||
| 1870 | { | ||
| 1871 | struct xenvif *vif = data; | ||
| 1872 | |||
| 1873 | while (!kthread_should_stop()) { | ||
| 1874 | wait_event_interruptible(vif->dealloc_wq, | ||
| 1875 | tx_dealloc_work_todo(vif) || | ||
| 1876 | kthread_should_stop()); | ||
| 1877 | if (kthread_should_stop()) | ||
| 1878 | break; | ||
| 1879 | |||
| 1880 | xenvif_tx_dealloc_action(vif); | ||
| 1881 | cond_resched(); | ||
| 1882 | } | ||
| 1883 | |||
| 1884 | /* Unmap anything remaining*/ | ||
| 1885 | if (tx_dealloc_work_todo(vif)) | ||
| 1886 | xenvif_tx_dealloc_action(vif); | ||
| 1887 | |||
| 1888 | return 0; | ||
| 1889 | } | ||
| 1890 | |||
| 1575 | static int __init netback_init(void) | 1891 | static int __init netback_init(void) |
| 1576 | { | 1892 | { |
| 1577 | int rc = 0; | 1893 | int rc = 0; |
| @@ -1589,6 +1905,8 @@ static int __init netback_init(void) | |||
| 1589 | if (rc) | 1905 | if (rc) |
| 1590 | goto failed_init; | 1906 | goto failed_init; |
| 1591 | 1907 | ||
| 1908 | rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs); | ||
| 1909 | |||
| 1592 | return 0; | 1910 | return 0; |
| 1593 | 1911 | ||
| 1594 | failed_init: | 1912 | failed_init: |
