aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/sfc/rx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/sfc/rx.c')
-rw-r--r--drivers/net/sfc/rx.c249
1 files changed, 130 insertions, 119 deletions
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
index 799c461ce7b8..62e43649466e 100644
--- a/drivers/net/sfc/rx.c
+++ b/drivers/net/sfc/rx.c
@@ -1,7 +1,7 @@
1/**************************************************************************** 1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards 2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd. 3 * Copyright 2005-2006 Fen Systems Ltd.
4 * Copyright 2005-2009 Solarflare Communications Inc. 4 * Copyright 2005-2011 Solarflare Communications Inc.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published 7 * under the terms of the GNU General Public License version 2 as published
@@ -14,6 +14,7 @@
14#include <linux/ip.h> 14#include <linux/ip.h>
15#include <linux/tcp.h> 15#include <linux/tcp.h>
16#include <linux/udp.h> 16#include <linux/udp.h>
17#include <linux/prefetch.h>
17#include <net/ip.h> 18#include <net/ip.h>
18#include <net/checksum.h> 19#include <net/checksum.h>
19#include "net_driver.h" 20#include "net_driver.h"
@@ -37,7 +38,7 @@
37 * This driver supports two methods for allocating and using RX buffers: 38 * This driver supports two methods for allocating and using RX buffers:
38 * each RX buffer may be backed by an skb or by an order-n page. 39 * each RX buffer may be backed by an skb or by an order-n page.
39 * 40 *
40 * When LRO is in use then the second method has a lower overhead, 41 * When GRO is in use then the second method has a lower overhead,
41 * since we don't have to allocate then free skbs on reassembled frames. 42 * since we don't have to allocate then free skbs on reassembled frames.
42 * 43 *
43 * Values: 44 * Values:
@@ -50,25 +51,25 @@
50 * 51 *
51 * - Since pushing and popping descriptors are separated by the rx_queue 52 * - Since pushing and popping descriptors are separated by the rx_queue
52 * size, so the watermarks should be ~rxd_size. 53 * size, so the watermarks should be ~rxd_size.
53 * - The performance win by using page-based allocation for LRO is less 54 * - The performance win by using page-based allocation for GRO is less
54 * than the performance hit of using page-based allocation of non-LRO, 55 * than the performance hit of using page-based allocation of non-GRO,
55 * so the watermarks should reflect this. 56 * so the watermarks should reflect this.
56 * 57 *
57 * Per channel we maintain a single variable, updated by each channel: 58 * Per channel we maintain a single variable, updated by each channel:
58 * 59 *
59 * rx_alloc_level += (lro_performed ? RX_ALLOC_FACTOR_LRO : 60 * rx_alloc_level += (gro_performed ? RX_ALLOC_FACTOR_GRO :
60 * RX_ALLOC_FACTOR_SKB) 61 * RX_ALLOC_FACTOR_SKB)
61 * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which 62 * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which
62 * limits the hysteresis), and update the allocation strategy: 63 * limits the hysteresis), and update the allocation strategy:
63 * 64 *
64 * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_LRO ? 65 * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_GRO ?
65 * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB) 66 * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB)
66 */ 67 */
67static int rx_alloc_method = RX_ALLOC_METHOD_AUTO; 68static int rx_alloc_method = RX_ALLOC_METHOD_AUTO;
68 69
69#define RX_ALLOC_LEVEL_LRO 0x2000 70#define RX_ALLOC_LEVEL_GRO 0x2000
70#define RX_ALLOC_LEVEL_MAX 0x3000 71#define RX_ALLOC_LEVEL_MAX 0x3000
71#define RX_ALLOC_FACTOR_LRO 1 72#define RX_ALLOC_FACTOR_GRO 1
72#define RX_ALLOC_FACTOR_SKB (-2) 73#define RX_ALLOC_FACTOR_SKB (-2)
73 74
74/* This is the percentage fill level below which new RX descriptors 75/* This is the percentage fill level below which new RX descriptors
@@ -89,24 +90,37 @@ static unsigned int rx_refill_limit = 95;
89 */ 90 */
90#define EFX_RXD_HEAD_ROOM 2 91#define EFX_RXD_HEAD_ROOM 2
91 92
92static inline unsigned int efx_rx_buf_offset(struct efx_rx_buffer *buf) 93/* Offset of ethernet header within page */
94static inline unsigned int efx_rx_buf_offset(struct efx_nic *efx,
95 struct efx_rx_buffer *buf)
93{ 96{
94 /* Offset is always within one page, so we don't need to consider 97 /* Offset is always within one page, so we don't need to consider
95 * the page order. 98 * the page order.
96 */ 99 */
97 return (__force unsigned long) buf->data & (PAGE_SIZE - 1); 100 return (((__force unsigned long) buf->dma_addr & (PAGE_SIZE - 1)) +
101 efx->type->rx_buffer_hash_size);
98} 102}
99static inline unsigned int efx_rx_buf_size(struct efx_nic *efx) 103static inline unsigned int efx_rx_buf_size(struct efx_nic *efx)
100{ 104{
101 return PAGE_SIZE << efx->rx_buffer_order; 105 return PAGE_SIZE << efx->rx_buffer_order;
102} 106}
103 107
104static inline u32 efx_rx_buf_hash(struct efx_rx_buffer *buf) 108static u8 *efx_rx_buf_eh(struct efx_nic *efx, struct efx_rx_buffer *buf)
105{ 109{
110 if (buf->is_page)
111 return page_address(buf->u.page) + efx_rx_buf_offset(efx, buf);
112 else
113 return ((u8 *)buf->u.skb->data +
114 efx->type->rx_buffer_hash_size);
115}
116
117static inline u32 efx_rx_buf_hash(const u8 *eh)
118{
119 /* The ethernet header is always directly after any hash. */
106#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || NET_IP_ALIGN % 4 == 0 120#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || NET_IP_ALIGN % 4 == 0
107 return __le32_to_cpup((const __le32 *)(buf->data - 4)); 121 return __le32_to_cpup((const __le32 *)(eh - 4));
108#else 122#else
109 const u8 *data = (const u8 *)(buf->data - 4); 123 const u8 *data = eh - 4;
110 return ((u32)data[0] | 124 return ((u32)data[0] |
111 (u32)data[1] << 8 | 125 (u32)data[1] << 8 |
112 (u32)data[2] << 16 | 126 (u32)data[2] << 16 |
@@ -129,31 +143,31 @@ static int efx_init_rx_buffers_skb(struct efx_rx_queue *rx_queue)
129 struct efx_nic *efx = rx_queue->efx; 143 struct efx_nic *efx = rx_queue->efx;
130 struct net_device *net_dev = efx->net_dev; 144 struct net_device *net_dev = efx->net_dev;
131 struct efx_rx_buffer *rx_buf; 145 struct efx_rx_buffer *rx_buf;
146 struct sk_buff *skb;
132 int skb_len = efx->rx_buffer_len; 147 int skb_len = efx->rx_buffer_len;
133 unsigned index, count; 148 unsigned index, count;
134 149
135 for (count = 0; count < EFX_RX_BATCH; ++count) { 150 for (count = 0; count < EFX_RX_BATCH; ++count) {
136 index = rx_queue->added_count & EFX_RXQ_MASK; 151 index = rx_queue->added_count & rx_queue->ptr_mask;
137 rx_buf = efx_rx_buffer(rx_queue, index); 152 rx_buf = efx_rx_buffer(rx_queue, index);
138 153
139 rx_buf->skb = netdev_alloc_skb(net_dev, skb_len); 154 rx_buf->u.skb = skb = netdev_alloc_skb(net_dev, skb_len);
140 if (unlikely(!rx_buf->skb)) 155 if (unlikely(!skb))
141 return -ENOMEM; 156 return -ENOMEM;
142 rx_buf->page = NULL;
143 157
144 /* Adjust the SKB for padding and checksum */ 158 /* Adjust the SKB for padding and checksum */
145 skb_reserve(rx_buf->skb, NET_IP_ALIGN); 159 skb_reserve(skb, NET_IP_ALIGN);
146 rx_buf->len = skb_len - NET_IP_ALIGN; 160 rx_buf->len = skb_len - NET_IP_ALIGN;
147 rx_buf->data = (char *)rx_buf->skb->data; 161 rx_buf->is_page = false;
148 rx_buf->skb->ip_summed = CHECKSUM_UNNECESSARY; 162 skb->ip_summed = CHECKSUM_UNNECESSARY;
149 163
150 rx_buf->dma_addr = pci_map_single(efx->pci_dev, 164 rx_buf->dma_addr = pci_map_single(efx->pci_dev,
151 rx_buf->data, rx_buf->len, 165 skb->data, rx_buf->len,
152 PCI_DMA_FROMDEVICE); 166 PCI_DMA_FROMDEVICE);
153 if (unlikely(pci_dma_mapping_error(efx->pci_dev, 167 if (unlikely(pci_dma_mapping_error(efx->pci_dev,
154 rx_buf->dma_addr))) { 168 rx_buf->dma_addr))) {
155 dev_kfree_skb_any(rx_buf->skb); 169 dev_kfree_skb_any(skb);
156 rx_buf->skb = NULL; 170 rx_buf->u.skb = NULL;
157 return -EIO; 171 return -EIO;
158 } 172 }
159 173
@@ -208,13 +222,12 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue)
208 dma_addr += sizeof(struct efx_rx_page_state); 222 dma_addr += sizeof(struct efx_rx_page_state);
209 223
210 split: 224 split:
211 index = rx_queue->added_count & EFX_RXQ_MASK; 225 index = rx_queue->added_count & rx_queue->ptr_mask;
212 rx_buf = efx_rx_buffer(rx_queue, index); 226 rx_buf = efx_rx_buffer(rx_queue, index);
213 rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN; 227 rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN;
214 rx_buf->skb = NULL; 228 rx_buf->u.page = page;
215 rx_buf->page = page;
216 rx_buf->data = page_addr + EFX_PAGE_IP_ALIGN;
217 rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN; 229 rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN;
230 rx_buf->is_page = true;
218 ++rx_queue->added_count; 231 ++rx_queue->added_count;
219 ++rx_queue->alloc_page_count; 232 ++rx_queue->alloc_page_count;
220 ++state->refcnt; 233 ++state->refcnt;
@@ -235,19 +248,17 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue)
235static void efx_unmap_rx_buffer(struct efx_nic *efx, 248static void efx_unmap_rx_buffer(struct efx_nic *efx,
236 struct efx_rx_buffer *rx_buf) 249 struct efx_rx_buffer *rx_buf)
237{ 250{
238 if (rx_buf->page) { 251 if (rx_buf->is_page && rx_buf->u.page) {
239 struct efx_rx_page_state *state; 252 struct efx_rx_page_state *state;
240 253
241 EFX_BUG_ON_PARANOID(rx_buf->skb); 254 state = page_address(rx_buf->u.page);
242
243 state = page_address(rx_buf->page);
244 if (--state->refcnt == 0) { 255 if (--state->refcnt == 0) {
245 pci_unmap_page(efx->pci_dev, 256 pci_unmap_page(efx->pci_dev,
246 state->dma_addr, 257 state->dma_addr,
247 efx_rx_buf_size(efx), 258 efx_rx_buf_size(efx),
248 PCI_DMA_FROMDEVICE); 259 PCI_DMA_FROMDEVICE);
249 } 260 }
250 } else if (likely(rx_buf->skb)) { 261 } else if (!rx_buf->is_page && rx_buf->u.skb) {
251 pci_unmap_single(efx->pci_dev, rx_buf->dma_addr, 262 pci_unmap_single(efx->pci_dev, rx_buf->dma_addr,
252 rx_buf->len, PCI_DMA_FROMDEVICE); 263 rx_buf->len, PCI_DMA_FROMDEVICE);
253 } 264 }
@@ -256,12 +267,12 @@ static void efx_unmap_rx_buffer(struct efx_nic *efx,
256static void efx_free_rx_buffer(struct efx_nic *efx, 267static void efx_free_rx_buffer(struct efx_nic *efx,
257 struct efx_rx_buffer *rx_buf) 268 struct efx_rx_buffer *rx_buf)
258{ 269{
259 if (rx_buf->page) { 270 if (rx_buf->is_page && rx_buf->u.page) {
260 __free_pages(rx_buf->page, efx->rx_buffer_order); 271 __free_pages(rx_buf->u.page, efx->rx_buffer_order);
261 rx_buf->page = NULL; 272 rx_buf->u.page = NULL;
262 } else if (likely(rx_buf->skb)) { 273 } else if (!rx_buf->is_page && rx_buf->u.skb) {
263 dev_kfree_skb_any(rx_buf->skb); 274 dev_kfree_skb_any(rx_buf->u.skb);
264 rx_buf->skb = NULL; 275 rx_buf->u.skb = NULL;
265 } 276 }
266} 277}
267 278
@@ -277,7 +288,7 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
277static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue, 288static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
278 struct efx_rx_buffer *rx_buf) 289 struct efx_rx_buffer *rx_buf)
279{ 290{
280 struct efx_rx_page_state *state = page_address(rx_buf->page); 291 struct efx_rx_page_state *state = page_address(rx_buf->u.page);
281 struct efx_rx_buffer *new_buf; 292 struct efx_rx_buffer *new_buf;
282 unsigned fill_level, index; 293 unsigned fill_level, index;
283 294
@@ -285,23 +296,21 @@ static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
285 * we'd like to insert an additional descriptor whilst leaving 296 * we'd like to insert an additional descriptor whilst leaving
286 * EFX_RXD_HEAD_ROOM for the non-recycle path */ 297 * EFX_RXD_HEAD_ROOM for the non-recycle path */
287 fill_level = (rx_queue->added_count - rx_queue->removed_count + 2); 298 fill_level = (rx_queue->added_count - rx_queue->removed_count + 2);
288 if (unlikely(fill_level >= EFX_RXQ_SIZE - EFX_RXD_HEAD_ROOM)) { 299 if (unlikely(fill_level > rx_queue->max_fill)) {
289 /* We could place "state" on a list, and drain the list in 300 /* We could place "state" on a list, and drain the list in
290 * efx_fast_push_rx_descriptors(). For now, this will do. */ 301 * efx_fast_push_rx_descriptors(). For now, this will do. */
291 return; 302 return;
292 } 303 }
293 304
294 ++state->refcnt; 305 ++state->refcnt;
295 get_page(rx_buf->page); 306 get_page(rx_buf->u.page);
296 307
297 index = rx_queue->added_count & EFX_RXQ_MASK; 308 index = rx_queue->added_count & rx_queue->ptr_mask;
298 new_buf = efx_rx_buffer(rx_queue, index); 309 new_buf = efx_rx_buffer(rx_queue, index);
299 new_buf->dma_addr = rx_buf->dma_addr ^ (PAGE_SIZE >> 1); 310 new_buf->dma_addr = rx_buf->dma_addr ^ (PAGE_SIZE >> 1);
300 new_buf->skb = NULL; 311 new_buf->u.page = rx_buf->u.page;
301 new_buf->page = rx_buf->page;
302 new_buf->data = (void *)
303 ((__force unsigned long)rx_buf->data ^ (PAGE_SIZE >> 1));
304 new_buf->len = rx_buf->len; 312 new_buf->len = rx_buf->len;
313 new_buf->is_page = true;
305 ++rx_queue->added_count; 314 ++rx_queue->added_count;
306} 315}
307 316
@@ -311,20 +320,19 @@ static void efx_recycle_rx_buffer(struct efx_channel *channel,
311 struct efx_rx_buffer *rx_buf) 320 struct efx_rx_buffer *rx_buf)
312{ 321{
313 struct efx_nic *efx = channel->efx; 322 struct efx_nic *efx = channel->efx;
314 struct efx_rx_queue *rx_queue = &efx->rx_queue[channel->channel]; 323 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
315 struct efx_rx_buffer *new_buf; 324 struct efx_rx_buffer *new_buf;
316 unsigned index; 325 unsigned index;
317 326
318 if (rx_buf->page != NULL && efx->rx_buffer_len <= EFX_RX_HALF_PAGE && 327 if (rx_buf->is_page && efx->rx_buffer_len <= EFX_RX_HALF_PAGE &&
319 page_count(rx_buf->page) == 1) 328 page_count(rx_buf->u.page) == 1)
320 efx_resurrect_rx_buffer(rx_queue, rx_buf); 329 efx_resurrect_rx_buffer(rx_queue, rx_buf);
321 330
322 index = rx_queue->added_count & EFX_RXQ_MASK; 331 index = rx_queue->added_count & rx_queue->ptr_mask;
323 new_buf = efx_rx_buffer(rx_queue, index); 332 new_buf = efx_rx_buffer(rx_queue, index);
324 333
325 memcpy(new_buf, rx_buf, sizeof(*new_buf)); 334 memcpy(new_buf, rx_buf, sizeof(*new_buf));
326 rx_buf->page = NULL; 335 rx_buf->u.page = NULL;
327 rx_buf->skb = NULL;
328 ++rx_queue->added_count; 336 ++rx_queue->added_count;
329} 337}
330 338
@@ -341,13 +349,13 @@ static void efx_recycle_rx_buffer(struct efx_channel *channel,
341 */ 349 */
342void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) 350void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
343{ 351{
344 struct efx_channel *channel = rx_queue->channel; 352 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
345 unsigned fill_level; 353 unsigned fill_level;
346 int space, rc = 0; 354 int space, rc = 0;
347 355
348 /* Calculate current fill level, and exit if we don't need to fill */ 356 /* Calculate current fill level, and exit if we don't need to fill */
349 fill_level = (rx_queue->added_count - rx_queue->removed_count); 357 fill_level = (rx_queue->added_count - rx_queue->removed_count);
350 EFX_BUG_ON_PARANOID(fill_level > EFX_RXQ_SIZE); 358 EFX_BUG_ON_PARANOID(fill_level > rx_queue->efx->rxq_entries);
351 if (fill_level >= rx_queue->fast_fill_trigger) 359 if (fill_level >= rx_queue->fast_fill_trigger)
352 goto out; 360 goto out;
353 361
@@ -364,7 +372,8 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
364 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, 372 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
365 "RX queue %d fast-filling descriptor ring from" 373 "RX queue %d fast-filling descriptor ring from"
366 " level %d to level %d using %s allocation\n", 374 " level %d to level %d using %s allocation\n",
367 rx_queue->queue, fill_level, rx_queue->fast_fill_limit, 375 efx_rx_queue_index(rx_queue), fill_level,
376 rx_queue->fast_fill_limit,
368 channel->rx_alloc_push_pages ? "page" : "skb"); 377 channel->rx_alloc_push_pages ? "page" : "skb");
369 378
370 do { 379 do {
@@ -382,7 +391,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
382 391
383 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, 392 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
384 "RX queue %d fast-filled descriptor ring " 393 "RX queue %d fast-filled descriptor ring "
385 "to level %d\n", rx_queue->queue, 394 "to level %d\n", efx_rx_queue_index(rx_queue),
386 rx_queue->added_count - rx_queue->removed_count); 395 rx_queue->added_count - rx_queue->removed_count);
387 396
388 out: 397 out:
@@ -393,7 +402,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
393void efx_rx_slow_fill(unsigned long context) 402void efx_rx_slow_fill(unsigned long context)
394{ 403{
395 struct efx_rx_queue *rx_queue = (struct efx_rx_queue *)context; 404 struct efx_rx_queue *rx_queue = (struct efx_rx_queue *)context;
396 struct efx_channel *channel = rx_queue->channel; 405 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
397 406
398 /* Post an event to cause NAPI to run and refill the queue */ 407 /* Post an event to cause NAPI to run and refill the queue */
399 efx_nic_generate_fill_event(channel); 408 efx_nic_generate_fill_event(channel);
@@ -421,45 +430,44 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
421 netif_err(efx, rx_err, efx->net_dev, 430 netif_err(efx, rx_err, efx->net_dev,
422 " RX queue %d seriously overlength " 431 " RX queue %d seriously overlength "
423 "RX event (0x%x > 0x%x+0x%x). Leaking\n", 432 "RX event (0x%x > 0x%x+0x%x). Leaking\n",
424 rx_queue->queue, len, max_len, 433 efx_rx_queue_index(rx_queue), len, max_len,
425 efx->type->rx_buffer_padding); 434 efx->type->rx_buffer_padding);
426 /* If this buffer was skb-allocated, then the meta 435 /* If this buffer was skb-allocated, then the meta
427 * data at the end of the skb will be trashed. So 436 * data at the end of the skb will be trashed. So
428 * we have no choice but to leak the fragment. 437 * we have no choice but to leak the fragment.
429 */ 438 */
430 *leak_packet = (rx_buf->skb != NULL); 439 *leak_packet = !rx_buf->is_page;
431 efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY); 440 efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
432 } else { 441 } else {
433 if (net_ratelimit()) 442 if (net_ratelimit())
434 netif_err(efx, rx_err, efx->net_dev, 443 netif_err(efx, rx_err, efx->net_dev,
435 " RX queue %d overlength RX event " 444 " RX queue %d overlength RX event "
436 "(0x%x > 0x%x)\n", 445 "(0x%x > 0x%x)\n",
437 rx_queue->queue, len, max_len); 446 efx_rx_queue_index(rx_queue), len, max_len);
438 } 447 }
439 448
440 rx_queue->channel->n_rx_overlength++; 449 efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
441} 450}
442 451
443/* Pass a received packet up through the generic LRO stack 452/* Pass a received packet up through the generic GRO stack
444 * 453 *
445 * Handles driverlink veto, and passes the fragment up via 454 * Handles driverlink veto, and passes the fragment up via
446 * the appropriate LRO method 455 * the appropriate GRO method
447 */ 456 */
448static void efx_rx_packet_lro(struct efx_channel *channel, 457static void efx_rx_packet_gro(struct efx_channel *channel,
449 struct efx_rx_buffer *rx_buf, 458 struct efx_rx_buffer *rx_buf,
450 bool checksummed) 459 const u8 *eh, bool checksummed)
451{ 460{
452 struct napi_struct *napi = &channel->napi_str; 461 struct napi_struct *napi = &channel->napi_str;
453 gro_result_t gro_result; 462 gro_result_t gro_result;
454 463
455 /* Pass the skb/page into the LRO engine */ 464 /* Pass the skb/page into the GRO engine */
456 if (rx_buf->page) { 465 if (rx_buf->is_page) {
457 struct efx_nic *efx = channel->efx; 466 struct efx_nic *efx = channel->efx;
458 struct page *page = rx_buf->page; 467 struct page *page = rx_buf->u.page;
459 struct sk_buff *skb; 468 struct sk_buff *skb;
460 469
461 EFX_BUG_ON_PARANOID(rx_buf->skb); 470 rx_buf->u.page = NULL;
462 rx_buf->page = NULL;
463 471
464 skb = napi_get_frags(napi); 472 skb = napi_get_frags(napi);
465 if (!skb) { 473 if (!skb) {
@@ -468,11 +476,11 @@ static void efx_rx_packet_lro(struct efx_channel *channel,
468 } 476 }
469 477
470 if (efx->net_dev->features & NETIF_F_RXHASH) 478 if (efx->net_dev->features & NETIF_F_RXHASH)
471 skb->rxhash = efx_rx_buf_hash(rx_buf); 479 skb->rxhash = efx_rx_buf_hash(eh);
472 480
473 skb_shinfo(skb)->frags[0].page = page; 481 skb_shinfo(skb)->frags[0].page = page;
474 skb_shinfo(skb)->frags[0].page_offset = 482 skb_shinfo(skb)->frags[0].page_offset =
475 efx_rx_buf_offset(rx_buf); 483 efx_rx_buf_offset(efx, rx_buf);
476 skb_shinfo(skb)->frags[0].size = rx_buf->len; 484 skb_shinfo(skb)->frags[0].size = rx_buf->len;
477 skb_shinfo(skb)->nr_frags = 1; 485 skb_shinfo(skb)->nr_frags = 1;
478 486
@@ -486,11 +494,10 @@ static void efx_rx_packet_lro(struct efx_channel *channel,
486 494
487 gro_result = napi_gro_frags(napi); 495 gro_result = napi_gro_frags(napi);
488 } else { 496 } else {
489 struct sk_buff *skb = rx_buf->skb; 497 struct sk_buff *skb = rx_buf->u.skb;
490 498
491 EFX_BUG_ON_PARANOID(!skb);
492 EFX_BUG_ON_PARANOID(!checksummed); 499 EFX_BUG_ON_PARANOID(!checksummed);
493 rx_buf->skb = NULL; 500 rx_buf->u.skb = NULL;
494 501
495 gro_result = napi_gro_receive(napi, skb); 502 gro_result = napi_gro_receive(napi, skb);
496 } 503 }
@@ -498,7 +505,7 @@ static void efx_rx_packet_lro(struct efx_channel *channel,
498 if (gro_result == GRO_NORMAL) { 505 if (gro_result == GRO_NORMAL) {
499 channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; 506 channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
500 } else if (gro_result != GRO_DROP) { 507 } else if (gro_result != GRO_DROP) {
501 channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO; 508 channel->rx_alloc_level += RX_ALLOC_FACTOR_GRO;
502 channel->irq_mod_score += 2; 509 channel->irq_mod_score += 2;
503 } 510 }
504} 511}
@@ -507,14 +514,11 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
507 unsigned int len, bool checksummed, bool discard) 514 unsigned int len, bool checksummed, bool discard)
508{ 515{
509 struct efx_nic *efx = rx_queue->efx; 516 struct efx_nic *efx = rx_queue->efx;
510 struct efx_channel *channel = rx_queue->channel; 517 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
511 struct efx_rx_buffer *rx_buf; 518 struct efx_rx_buffer *rx_buf;
512 bool leak_packet = false; 519 bool leak_packet = false;
513 520
514 rx_buf = efx_rx_buffer(rx_queue, index); 521 rx_buf = efx_rx_buffer(rx_queue, index);
515 EFX_BUG_ON_PARANOID(!rx_buf->data);
516 EFX_BUG_ON_PARANOID(rx_buf->skb && rx_buf->page);
517 EFX_BUG_ON_PARANOID(!(rx_buf->skb || rx_buf->page));
518 522
519 /* This allows the refill path to post another buffer. 523 /* This allows the refill path to post another buffer.
520 * EFX_RXD_HEAD_ROOM ensures that the slot we are using 524 * EFX_RXD_HEAD_ROOM ensures that the slot we are using
@@ -528,7 +532,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
528 532
529 netif_vdbg(efx, rx_status, efx->net_dev, 533 netif_vdbg(efx, rx_status, efx->net_dev,
530 "RX queue %d received id %x at %llx+%x %s%s\n", 534 "RX queue %d received id %x at %llx+%x %s%s\n",
531 rx_queue->queue, index, 535 efx_rx_queue_index(rx_queue), index,
532 (unsigned long long)rx_buf->dma_addr, len, 536 (unsigned long long)rx_buf->dma_addr, len,
533 (checksummed ? " [SUMMED]" : ""), 537 (checksummed ? " [SUMMED]" : ""),
534 (discard ? " [DISCARD]" : "")); 538 (discard ? " [DISCARD]" : ""));
@@ -553,19 +557,18 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
553 /* Prefetch nice and early so data will (hopefully) be in cache by 557 /* Prefetch nice and early so data will (hopefully) be in cache by
554 * the time we look at it. 558 * the time we look at it.
555 */ 559 */
556 prefetch(rx_buf->data); 560 prefetch(efx_rx_buf_eh(efx, rx_buf));
557 561
558 /* Pipeline receives so that we give time for packet headers to be 562 /* Pipeline receives so that we give time for packet headers to be
559 * prefetched into cache. 563 * prefetched into cache.
560 */ 564 */
561 rx_buf->len = len; 565 rx_buf->len = len - efx->type->rx_buffer_hash_size;
562out: 566out:
563 if (rx_queue->channel->rx_pkt) 567 if (channel->rx_pkt)
564 __efx_rx_packet(rx_queue->channel, 568 __efx_rx_packet(channel,
565 rx_queue->channel->rx_pkt, 569 channel->rx_pkt, channel->rx_pkt_csummed);
566 rx_queue->channel->rx_pkt_csummed); 570 channel->rx_pkt = rx_buf;
567 rx_queue->channel->rx_pkt = rx_buf; 571 channel->rx_pkt_csummed = checksummed;
568 rx_queue->channel->rx_pkt_csummed = checksummed;
569} 572}
570 573
571/* Handle a received packet. Second half: Touches packet payload. */ 574/* Handle a received packet. Second half: Touches packet payload. */
@@ -574,48 +577,49 @@ void __efx_rx_packet(struct efx_channel *channel,
574{ 577{
575 struct efx_nic *efx = channel->efx; 578 struct efx_nic *efx = channel->efx;
576 struct sk_buff *skb; 579 struct sk_buff *skb;
577 580 u8 *eh = efx_rx_buf_eh(efx, rx_buf);
578 rx_buf->data += efx->type->rx_buffer_hash_size;
579 rx_buf->len -= efx->type->rx_buffer_hash_size;
580 581
581 /* If we're in loopback test, then pass the packet directly to the 582 /* If we're in loopback test, then pass the packet directly to the
582 * loopback layer, and free the rx_buf here 583 * loopback layer, and free the rx_buf here
583 */ 584 */
584 if (unlikely(efx->loopback_selftest)) { 585 if (unlikely(efx->loopback_selftest)) {
585 efx_loopback_rx_packet(efx, rx_buf->data, rx_buf->len); 586 efx_loopback_rx_packet(efx, eh, rx_buf->len);
586 efx_free_rx_buffer(efx, rx_buf); 587 efx_free_rx_buffer(efx, rx_buf);
587 return; 588 return;
588 } 589 }
589 590
590 if (rx_buf->skb) { 591 if (!rx_buf->is_page) {
591 prefetch(skb_shinfo(rx_buf->skb)); 592 skb = rx_buf->u.skb;
592 593
593 skb_reserve(rx_buf->skb, efx->type->rx_buffer_hash_size); 594 prefetch(skb_shinfo(skb));
594 skb_put(rx_buf->skb, rx_buf->len); 595
596 skb_reserve(skb, efx->type->rx_buffer_hash_size);
597 skb_put(skb, rx_buf->len);
595 598
596 if (efx->net_dev->features & NETIF_F_RXHASH) 599 if (efx->net_dev->features & NETIF_F_RXHASH)
597 rx_buf->skb->rxhash = efx_rx_buf_hash(rx_buf); 600 skb->rxhash = efx_rx_buf_hash(eh);
598 601
599 /* Move past the ethernet header. rx_buf->data still points 602 /* Move past the ethernet header. rx_buf->data still points
600 * at the ethernet header */ 603 * at the ethernet header */
601 rx_buf->skb->protocol = eth_type_trans(rx_buf->skb, 604 skb->protocol = eth_type_trans(skb, efx->net_dev);
602 efx->net_dev);
603 605
604 skb_record_rx_queue(rx_buf->skb, channel->channel); 606 skb_record_rx_queue(skb, channel->channel);
605 } 607 }
606 608
607 if (likely(checksummed || rx_buf->page)) { 609 if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
608 efx_rx_packet_lro(channel, rx_buf, checksummed); 610 checksummed = false;
611
612 if (likely(checksummed || rx_buf->is_page)) {
613 efx_rx_packet_gro(channel, rx_buf, eh, checksummed);
609 return; 614 return;
610 } 615 }
611 616
612 /* We now own the SKB */ 617 /* We now own the SKB */
613 skb = rx_buf->skb; 618 skb = rx_buf->u.skb;
614 rx_buf->skb = NULL; 619 rx_buf->u.skb = NULL;
615 EFX_BUG_ON_PARANOID(!skb);
616 620
617 /* Set the SKB flags */ 621 /* Set the SKB flags */
618 skb->ip_summed = CHECKSUM_NONE; 622 skb_checksum_none_assert(skb);
619 623
620 /* Pass the packet up */ 624 /* Pass the packet up */
621 netif_receive_skb(skb); 625 netif_receive_skb(skb);
@@ -628,7 +632,7 @@ void efx_rx_strategy(struct efx_channel *channel)
628{ 632{
629 enum efx_rx_alloc_method method = rx_alloc_method; 633 enum efx_rx_alloc_method method = rx_alloc_method;
630 634
631 /* Only makes sense to use page based allocation if LRO is enabled */ 635 /* Only makes sense to use page based allocation if GRO is enabled */
632 if (!(channel->efx->net_dev->features & NETIF_F_GRO)) { 636 if (!(channel->efx->net_dev->features & NETIF_F_GRO)) {
633 method = RX_ALLOC_METHOD_SKB; 637 method = RX_ALLOC_METHOD_SKB;
634 } else if (method == RX_ALLOC_METHOD_AUTO) { 638 } else if (method == RX_ALLOC_METHOD_AUTO) {
@@ -639,7 +643,7 @@ void efx_rx_strategy(struct efx_channel *channel)
639 channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX; 643 channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX;
640 644
641 /* Decide on the allocation method */ 645 /* Decide on the allocation method */
642 method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_LRO) ? 646 method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_GRO) ?
643 RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB); 647 RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB);
644 } 648 }
645 649
@@ -650,15 +654,22 @@ void efx_rx_strategy(struct efx_channel *channel)
650int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) 654int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
651{ 655{
652 struct efx_nic *efx = rx_queue->efx; 656 struct efx_nic *efx = rx_queue->efx;
653 unsigned int rxq_size; 657 unsigned int entries;
654 int rc; 658 int rc;
655 659
660 /* Create the smallest power-of-two aligned ring */
661 entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
662 EFX_BUG_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
663 rx_queue->ptr_mask = entries - 1;
664
656 netif_dbg(efx, probe, efx->net_dev, 665 netif_dbg(efx, probe, efx->net_dev,
657 "creating RX queue %d\n", rx_queue->queue); 666 "creating RX queue %d size %#x mask %#x\n",
667 efx_rx_queue_index(rx_queue), efx->rxq_entries,
668 rx_queue->ptr_mask);
658 669
659 /* Allocate RX buffers */ 670 /* Allocate RX buffers */
660 rxq_size = EFX_RXQ_SIZE * sizeof(*rx_queue->buffer); 671 rx_queue->buffer = kzalloc(entries * sizeof(*rx_queue->buffer),
661 rx_queue->buffer = kzalloc(rxq_size, GFP_KERNEL); 672 GFP_KERNEL);
662 if (!rx_queue->buffer) 673 if (!rx_queue->buffer)
663 return -ENOMEM; 674 return -ENOMEM;
664 675
@@ -672,20 +683,20 @@ int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
672 683
673void efx_init_rx_queue(struct efx_rx_queue *rx_queue) 684void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
674{ 685{
686 struct efx_nic *efx = rx_queue->efx;
675 unsigned int max_fill, trigger, limit; 687 unsigned int max_fill, trigger, limit;
676 688
677 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, 689 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
678 "initialising RX queue %d\n", rx_queue->queue); 690 "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
679 691
680 /* Initialise ptr fields */ 692 /* Initialise ptr fields */
681 rx_queue->added_count = 0; 693 rx_queue->added_count = 0;
682 rx_queue->notified_count = 0; 694 rx_queue->notified_count = 0;
683 rx_queue->removed_count = 0; 695 rx_queue->removed_count = 0;
684 rx_queue->min_fill = -1U; 696 rx_queue->min_fill = -1U;
685 rx_queue->min_overfill = -1U;
686 697
687 /* Initialise limit fields */ 698 /* Initialise limit fields */
688 max_fill = EFX_RXQ_SIZE - EFX_RXD_HEAD_ROOM; 699 max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
689 trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; 700 trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
690 limit = max_fill * min(rx_refill_limit, 100U) / 100U; 701 limit = max_fill * min(rx_refill_limit, 100U) / 100U;
691 702
@@ -703,14 +714,14 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
703 struct efx_rx_buffer *rx_buf; 714 struct efx_rx_buffer *rx_buf;
704 715
705 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, 716 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
706 "shutting down RX queue %d\n", rx_queue->queue); 717 "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
707 718
708 del_timer_sync(&rx_queue->slow_fill); 719 del_timer_sync(&rx_queue->slow_fill);
709 efx_nic_fini_rx(rx_queue); 720 efx_nic_fini_rx(rx_queue);
710 721
711 /* Release RX buffers NB start at index 0 not current HW ptr */ 722 /* Release RX buffers NB start at index 0 not current HW ptr */
712 if (rx_queue->buffer) { 723 if (rx_queue->buffer) {
713 for (i = 0; i <= EFX_RXQ_MASK; i++) { 724 for (i = 0; i <= rx_queue->ptr_mask; i++) {
714 rx_buf = efx_rx_buffer(rx_queue, i); 725 rx_buf = efx_rx_buffer(rx_queue, i);
715 efx_fini_rx_buffer(rx_queue, rx_buf); 726 efx_fini_rx_buffer(rx_queue, rx_buf);
716 } 727 }
@@ -720,7 +731,7 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
720void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) 731void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
721{ 732{
722 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, 733 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
723 "destroying RX queue %d\n", rx_queue->queue); 734 "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
724 735
725 efx_nic_remove_rx(rx_queue); 736 efx_nic_remove_rx(rx_queue);
726 737