aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Pieczko <dpieczko@solarflare.com>2013-02-13 05:54:41 -0500
committerBen Hutchings <bhutchings@solarflare.com>2013-03-07 15:22:13 -0500
commit2768935a46603bb9bdd121864b1f2b2e8a71cccc (patch)
tree4d5704c97f096cdf0d160bc18694b89927639cfd
parent85740cdf0b84224a9fce62dc9150008ef8d6ab4e (diff)
sfc: reuse pages to avoid DMA mapping/unmapping costs
On POWER systems, DMA mapping/unmapping operations are very expensive. These changes reduce these costs by trying to reuse DMA mapped pages. After all the buffers associated with a page have been processed and passed up, the page is placed into a ring (if there is room). For each page that is required for a refill operation, a page in the ring is examined to determine if its page count has fallen to 1, ie. the kernel has released its reference to these packets. If this is the case, the page can be immediately added back into the RX descriptor ring, without having to re-map it for DMA. If the kernel is still holding a reference to this page, it is removed from the ring and unmapped for DMA. Then a new page, which can immediately be used by RX buffers in the descriptor ring, is allocated and DMA mapped. The time a page needs to spend in the recycle ring before the kernel has released its page references is based on the number of buffers that use this page. As large pages can hold more RX buffers, the RX recycle ring can be shorter. This reduces memory usage on POWER systems, while maintaining the performance gain achieved by recycling pages, following the driver change to pack more than two RX buffers into large pages. When an IOMMU is not present, the recycle ring can be small to reduce memory usage, since DMA mapping operations are inexpensive. With a small recycle ring, attempting to refill the descriptor queue with more buffers than the equivalent size of the recycle ring could ultimately lead to memory leaks if page entries in the recycle ring were overwritten. To prevent this, the check to see if the recycle ring is full is changed to check if the next entry to be written is NULL. [bwh: Combine and rebase several commits so this is complete before the following buffer-packing changes. Remove module parameter.] Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
-rw-r--r--drivers/net/ethernet/sfc/efx.c2
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h19
-rw-r--r--drivers/net/ethernet/sfc/rx.c299
3 files changed, 226 insertions, 94 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 1213af5024d1..a70c458f3cef 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -661,6 +661,8 @@ static void efx_start_datapath(struct efx_nic *efx)
661 efx->rx_buffer_truesize = PAGE_SIZE << efx->rx_buffer_order; 661 efx->rx_buffer_truesize = PAGE_SIZE << efx->rx_buffer_order;
662 } 662 }
663 663
664 efx->rx_bufs_per_page = (rx_buf_len <= PAGE_SIZE / 2) ? 2 : 1;
665
664 /* RX filters also have scatter-enabled flags */ 666 /* RX filters also have scatter-enabled flags */
665 if (efx->rx_scatter != old_rx_scatter) 667 if (efx->rx_scatter != old_rx_scatter)
666 efx_filter_update_rx_scatter(efx); 668 efx_filter_update_rx_scatter(efx);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index e41b54bada7c..370c5bcebad9 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -264,12 +264,22 @@ struct efx_rx_page_state {
264 * @notified_count: Number of buffers given to NIC (<= @added_count). 264 * @notified_count: Number of buffers given to NIC (<= @added_count).
265 * @removed_count: Number of buffers removed from the receive queue. 265 * @removed_count: Number of buffers removed from the receive queue.
266 * @scatter_n: Number of buffers used by current packet 266 * @scatter_n: Number of buffers used by current packet
267 * @page_ring: The ring to store DMA mapped pages for reuse.
268 * @page_add: Counter to calculate the write pointer for the recycle ring.
269 * @page_remove: Counter to calculate the read pointer for the recycle ring.
270 * @page_recycle_count: The number of pages that have been recycled.
271 * @page_recycle_failed: The number of pages that couldn't be recycled because
272 * the kernel still held a reference to them.
273 * @page_recycle_full: The number of pages that were released because the
274 * recycle ring was full.
275 * @page_ptr_mask: The number of pages in the RX recycle ring minus 1.
267 * @max_fill: RX descriptor maximum fill level (<= ring size) 276 * @max_fill: RX descriptor maximum fill level (<= ring size)
268 * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill 277 * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
269 * (<= @max_fill) 278 * (<= @max_fill)
270 * @min_fill: RX descriptor minimum non-zero fill level. 279 * @min_fill: RX descriptor minimum non-zero fill level.
271 * This records the minimum fill level observed when a ring 280 * This records the minimum fill level observed when a ring
272 * refill was triggered. 281 * refill was triggered.
282 * @recycle_count: RX buffer recycle counter.
273 * @slow_fill: Timer used to defer efx_nic_generate_fill_event(). 283 * @slow_fill: Timer used to defer efx_nic_generate_fill_event().
274 */ 284 */
275struct efx_rx_queue { 285struct efx_rx_queue {
@@ -285,10 +295,18 @@ struct efx_rx_queue {
285 unsigned int notified_count; 295 unsigned int notified_count;
286 unsigned int removed_count; 296 unsigned int removed_count;
287 unsigned int scatter_n; 297 unsigned int scatter_n;
298 struct page **page_ring;
299 unsigned int page_add;
300 unsigned int page_remove;
301 unsigned int page_recycle_count;
302 unsigned int page_recycle_failed;
303 unsigned int page_recycle_full;
304 unsigned int page_ptr_mask;
288 unsigned int max_fill; 305 unsigned int max_fill;
289 unsigned int fast_fill_trigger; 306 unsigned int fast_fill_trigger;
290 unsigned int min_fill; 307 unsigned int min_fill;
291 unsigned int min_overfill; 308 unsigned int min_overfill;
309 unsigned int recycle_count;
292 struct timer_list slow_fill; 310 struct timer_list slow_fill;
293 unsigned int slow_fill_count; 311 unsigned int slow_fill_count;
294}; 312};
@@ -806,6 +824,7 @@ struct efx_nic {
806 unsigned int rx_dma_len; 824 unsigned int rx_dma_len;
807 unsigned int rx_buffer_order; 825 unsigned int rx_buffer_order;
808 unsigned int rx_buffer_truesize; 826 unsigned int rx_buffer_truesize;
827 unsigned int rx_bufs_per_page;
809 u8 rx_hash_key[40]; 828 u8 rx_hash_key[40];
810 u32 rx_indir_table[128]; 829 u32 rx_indir_table[128];
811 bool rx_scatter; 830 bool rx_scatter;
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 88aa1ff01e3f..eea56f3ec81c 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -16,6 +16,7 @@
16#include <linux/udp.h> 16#include <linux/udp.h>
17#include <linux/prefetch.h> 17#include <linux/prefetch.h>
18#include <linux/moduleparam.h> 18#include <linux/moduleparam.h>
19#include <linux/iommu.h>
19#include <net/ip.h> 20#include <net/ip.h>
20#include <net/checksum.h> 21#include <net/checksum.h>
21#include "net_driver.h" 22#include "net_driver.h"
@@ -27,6 +28,13 @@
27/* Number of RX descriptors pushed at once. */ 28/* Number of RX descriptors pushed at once. */
28#define EFX_RX_BATCH 8 29#define EFX_RX_BATCH 8
29 30
31/* Number of RX buffers to recycle pages for. When creating the RX page recycle
32 * ring, this number is divided by the number of buffers per page to calculate
33 * the number of pages to store in the RX page recycle ring.
34 */
35#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
36#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_BATCH)
37
30/* Maximum length for an RX descriptor sharing a page */ 38/* Maximum length for an RX descriptor sharing a page */
31#define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state) \ 39#define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state) \
32 - EFX_PAGE_IP_ALIGN) 40 - EFX_PAGE_IP_ALIGN)
@@ -79,6 +87,56 @@ efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
79 return rx_buf + 1; 87 return rx_buf + 1;
80} 88}
81 89
90static inline void efx_sync_rx_buffer(struct efx_nic *efx,
91 struct efx_rx_buffer *rx_buf,
92 unsigned int len)
93{
94 dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
95 DMA_FROM_DEVICE);
96}
97
98/* Return true if this is the last RX buffer using a page. */
99static inline bool efx_rx_is_last_buffer(struct efx_nic *efx,
100 struct efx_rx_buffer *rx_buf)
101{
102 return (rx_buf->page_offset >= (PAGE_SIZE >> 1) ||
103 efx->rx_dma_len > EFX_RX_HALF_PAGE);
104}
105
106/* Check the RX page recycle ring for a page that can be reused. */
107static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
108{
109 struct efx_nic *efx = rx_queue->efx;
110 struct page *page;
111 struct efx_rx_page_state *state;
112 unsigned index;
113
114 index = rx_queue->page_remove & rx_queue->page_ptr_mask;
115 page = rx_queue->page_ring[index];
116 if (page == NULL)
117 return NULL;
118
119 rx_queue->page_ring[index] = NULL;
120 /* page_remove cannot exceed page_add. */
121 if (rx_queue->page_remove != rx_queue->page_add)
122 ++rx_queue->page_remove;
123
124 /* If page_count is 1 then we hold the only reference to this page. */
125 if (page_count(page) == 1) {
126 ++rx_queue->page_recycle_count;
127 return page;
128 } else {
129 state = page_address(page);
130 dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
131 PAGE_SIZE << efx->rx_buffer_order,
132 DMA_FROM_DEVICE);
133 put_page(page);
134 ++rx_queue->page_recycle_failed;
135 }
136
137 return NULL;
138}
139
82/** 140/**
83 * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers 141 * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
84 * 142 *
@@ -103,20 +161,28 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
103 BUILD_BUG_ON(EFX_RX_BATCH & 1); 161 BUILD_BUG_ON(EFX_RX_BATCH & 1);
104 162
105 for (count = 0; count < EFX_RX_BATCH; ++count) { 163 for (count = 0; count < EFX_RX_BATCH; ++count) {
106 page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, 164 page = efx_reuse_page(rx_queue);
107 efx->rx_buffer_order); 165 if (page == NULL) {
108 if (unlikely(page == NULL)) 166 page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC,
109 return -ENOMEM; 167 efx->rx_buffer_order);
110 dma_addr = dma_map_page(&efx->pci_dev->dev, page, 0, 168 if (unlikely(page == NULL))
111 PAGE_SIZE << efx->rx_buffer_order, 169 return -ENOMEM;
112 DMA_FROM_DEVICE); 170 dma_addr =
113 if (unlikely(dma_mapping_error(&efx->pci_dev->dev, dma_addr))) { 171 dma_map_page(&efx->pci_dev->dev, page, 0,
114 __free_pages(page, efx->rx_buffer_order); 172 PAGE_SIZE << efx->rx_buffer_order,
115 return -EIO; 173 DMA_FROM_DEVICE);
174 if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
175 dma_addr))) {
176 __free_pages(page, efx->rx_buffer_order);
177 return -EIO;
178 }
179 state = page_address(page);
180 state->dma_addr = dma_addr;
181 } else {
182 state = page_address(page);
183 dma_addr = state->dma_addr;
116 } 184 }
117 state = page_address(page); 185 get_page(page);
118 state->refcnt = 0;
119 state->dma_addr = dma_addr;
120 186
121 dma_addr += sizeof(struct efx_rx_page_state); 187 dma_addr += sizeof(struct efx_rx_page_state);
122 page_offset = sizeof(struct efx_rx_page_state); 188 page_offset = sizeof(struct efx_rx_page_state);
@@ -128,9 +194,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
128 rx_buf->page = page; 194 rx_buf->page = page;
129 rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN; 195 rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
130 rx_buf->len = efx->rx_dma_len; 196 rx_buf->len = efx->rx_dma_len;
131 rx_buf->flags = 0;
132 ++rx_queue->added_count; 197 ++rx_queue->added_count;
133 ++state->refcnt;
134 198
135 if ((~count & 1) && (efx->rx_dma_len <= EFX_RX_HALF_PAGE)) { 199 if ((~count & 1) && (efx->rx_dma_len <= EFX_RX_HALF_PAGE)) {
136 /* Use the second half of the page */ 200 /* Use the second half of the page */
@@ -145,99 +209,91 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
145 return 0; 209 return 0;
146} 210}
147 211
212/* Unmap a DMA-mapped page. This function is only called for the final RX
213 * buffer in a page.
214 */
148static void efx_unmap_rx_buffer(struct efx_nic *efx, 215static void efx_unmap_rx_buffer(struct efx_nic *efx,
149 struct efx_rx_buffer *rx_buf, 216 struct efx_rx_buffer *rx_buf)
150 unsigned int used_len)
151{ 217{
152 if (rx_buf->page) { 218 struct page *page = rx_buf->page;
153 struct efx_rx_page_state *state; 219
154 220 if (page) {
155 state = page_address(rx_buf->page); 221 struct efx_rx_page_state *state = page_address(page);
156 if (--state->refcnt == 0) { 222 dma_unmap_page(&efx->pci_dev->dev,
157 dma_unmap_page(&efx->pci_dev->dev, 223 state->dma_addr,
158 state->dma_addr, 224 PAGE_SIZE << efx->rx_buffer_order,
159 PAGE_SIZE << efx->rx_buffer_order, 225 DMA_FROM_DEVICE);
160 DMA_FROM_DEVICE);
161 } else if (used_len) {
162 dma_sync_single_for_cpu(&efx->pci_dev->dev,
163 rx_buf->dma_addr, used_len,
164 DMA_FROM_DEVICE);
165 }
166 } 226 }
167} 227}
168 228
169static void efx_free_rx_buffer(struct efx_nic *efx, 229static void efx_free_rx_buffer(struct efx_rx_buffer *rx_buf)
170 struct efx_rx_buffer *rx_buf)
171{ 230{
172 if (rx_buf->page) { 231 if (rx_buf->page) {
173 __free_pages(rx_buf->page, efx->rx_buffer_order); 232 put_page(rx_buf->page);
174 rx_buf->page = NULL; 233 rx_buf->page = NULL;
175 } 234 }
176} 235}
177 236
178static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, 237/* Attempt to recycle the page if there is an RX recycle ring; the page can
179 struct efx_rx_buffer *rx_buf) 238 * only be added if this is the final RX buffer, to prevent pages being used in
239 * the descriptor ring and appearing in the recycle ring simultaneously.
240 */
241static void efx_recycle_rx_page(struct efx_channel *channel,
242 struct efx_rx_buffer *rx_buf)
180{ 243{
181 efx_unmap_rx_buffer(rx_queue->efx, rx_buf, 0); 244 struct page *page = rx_buf->page;
182 efx_free_rx_buffer(rx_queue->efx, rx_buf); 245 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
183} 246 struct efx_nic *efx = rx_queue->efx;
247 unsigned index;
184 248
185/* Attempt to resurrect the other receive buffer that used to share this page, 249 /* Only recycle the page after processing the final buffer. */
186 * which had previously been passed up to the kernel and freed. */ 250 if (!efx_rx_is_last_buffer(efx, rx_buf))
187static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
188 struct efx_rx_buffer *rx_buf)
189{
190 struct efx_rx_page_state *state = page_address(rx_buf->page);
191 struct efx_rx_buffer *new_buf;
192 unsigned fill_level, index;
193
194 /* +1 because efx_rx_packet() incremented removed_count. +1 because
195 * we'd like to insert an additional descriptor whilst leaving
196 * EFX_RXD_HEAD_ROOM for the non-recycle path */
197 fill_level = (rx_queue->added_count - rx_queue->removed_count + 2);
198 if (unlikely(fill_level > rx_queue->max_fill)) {
199 /* We could place "state" on a list, and drain the list in
200 * efx_fast_push_rx_descriptors(). For now, this will do. */
201 return; 251 return;
202 }
203 252
204 ++state->refcnt; 253 index = rx_queue->page_add & rx_queue->page_ptr_mask;
205 get_page(rx_buf->page); 254 if (rx_queue->page_ring[index] == NULL) {
255 unsigned read_index = rx_queue->page_remove &
256 rx_queue->page_ptr_mask;
206 257
207 index = rx_queue->added_count & rx_queue->ptr_mask; 258 /* The next slot in the recycle ring is available, but
208 new_buf = efx_rx_buffer(rx_queue, index); 259 * increment page_remove if the read pointer currently
209 new_buf->dma_addr = rx_buf->dma_addr ^ (PAGE_SIZE >> 1); 260 * points here.
210 new_buf->page = rx_buf->page; 261 */
211 new_buf->len = rx_buf->len; 262 if (read_index == index)
212 ++rx_queue->added_count; 263 ++rx_queue->page_remove;
264 rx_queue->page_ring[index] = page;
265 ++rx_queue->page_add;
266 return;
267 }
268 ++rx_queue->page_recycle_full;
269 efx_unmap_rx_buffer(efx, rx_buf);
270 put_page(rx_buf->page);
213} 271}
214 272
215/* Recycle buffers directly back into the rx_queue. There is always 273static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
216 * room to add these buffer, because we've just popped them. 274 struct efx_rx_buffer *rx_buf)
217 */ 275{
276 /* Release the page reference we hold for the buffer. */
277 if (rx_buf->page)
278 put_page(rx_buf->page);
279
280 /* If this is the last buffer in a page, unmap and free it. */
281 if (efx_rx_is_last_buffer(rx_queue->efx, rx_buf)) {
282 efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
283 efx_free_rx_buffer(rx_buf);
284 }
285 rx_buf->page = NULL;
286}
287
288/* Recycle the pages that are used by buffers that have just been received. */
218static void efx_recycle_rx_buffers(struct efx_channel *channel, 289static void efx_recycle_rx_buffers(struct efx_channel *channel,
219 struct efx_rx_buffer *rx_buf, 290 struct efx_rx_buffer *rx_buf,
220 unsigned int n_frags) 291 unsigned int n_frags)
221{ 292{
222 struct efx_nic *efx = channel->efx;
223 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); 293 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
224 struct efx_rx_buffer *new_buf;
225 unsigned index;
226 294
227 do { 295 do {
228 rx_buf->flags = 0; 296 efx_recycle_rx_page(channel, rx_buf);
229
230 if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
231 page_count(rx_buf->page) == 1)
232 efx_resurrect_rx_buffer(rx_queue, rx_buf);
233
234 index = rx_queue->added_count & rx_queue->ptr_mask;
235 new_buf = efx_rx_buffer(rx_queue, index);
236
237 memcpy(new_buf, rx_buf, sizeof(*new_buf));
238 rx_buf->page = NULL;
239 ++rx_queue->added_count;
240
241 rx_buf = efx_rx_buf_next(rx_queue, rx_buf); 297 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
242 } while (--n_frags); 298 } while (--n_frags);
243} 299}
@@ -451,7 +507,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
451 struct efx_rx_buffer *rx_buf; 507 struct efx_rx_buffer *rx_buf;
452 508
453 rx_buf = efx_rx_buffer(rx_queue, index); 509 rx_buf = efx_rx_buffer(rx_queue, index);
454 rx_buf->flags |= flags; 510 rx_buf->flags = flags;
455 511
456 /* Validate the number of fragments and completed length */ 512 /* Validate the number of fragments and completed length */
457 if (n_frags == 1) { 513 if (n_frags == 1) {
@@ -479,6 +535,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
479 */ 535 */
480 if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { 536 if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
481 efx_rx_flush_packet(channel); 537 efx_rx_flush_packet(channel);
538 put_page(rx_buf->page);
482 efx_recycle_rx_buffers(channel, rx_buf, n_frags); 539 efx_recycle_rx_buffers(channel, rx_buf, n_frags);
483 return; 540 return;
484 } 541 }
@@ -486,10 +543,10 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
486 if (n_frags == 1) 543 if (n_frags == 1)
487 rx_buf->len = len; 544 rx_buf->len = len;
488 545
489 /* Release and/or sync DMA mapping - assumes all RX buffers 546 /* Release and/or sync the DMA mapping - assumes all RX buffers
490 * consumed in-order per RX queue 547 * consumed in-order per RX queue.
491 */ 548 */
492 efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len); 549 efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
493 550
494 /* Prefetch nice and early so data will (hopefully) be in cache by 551 /* Prefetch nice and early so data will (hopefully) be in cache by
495 * the time we look at it. 552 * the time we look at it.
@@ -509,12 +566,16 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
509 rx_buf = efx_rx_buf_next(rx_queue, rx_buf); 566 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
510 if (--tail_frags == 0) 567 if (--tail_frags == 0)
511 break; 568 break;
512 efx_unmap_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE); 569 efx_sync_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE);
513 } 570 }
514 rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE; 571 rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE;
515 efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len); 572 efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
516 } 573 }
517 574
575 /* All fragments have been DMA-synced, so recycle buffers and pages. */
576 rx_buf = efx_rx_buffer(rx_queue, index);
577 efx_recycle_rx_buffers(channel, rx_buf, n_frags);
578
518 /* Pipeline receives so that we give time for packet headers to be 579 /* Pipeline receives so that we give time for packet headers to be
519 * prefetched into cache. 580 * prefetched into cache.
520 */ 581 */
@@ -532,7 +593,7 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
532 593
533 skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); 594 skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
534 if (unlikely(skb == NULL)) { 595 if (unlikely(skb == NULL)) {
535 efx_free_rx_buffer(channel->efx, rx_buf); 596 efx_free_rx_buffer(rx_buf);
536 return; 597 return;
537 } 598 }
538 skb_record_rx_queue(skb, channel->rx_queue.core_index); 599 skb_record_rx_queue(skb, channel->rx_queue.core_index);
@@ -561,7 +622,7 @@ void __efx_rx_packet(struct efx_channel *channel)
561 */ 622 */
562 if (unlikely(efx->loopback_selftest)) { 623 if (unlikely(efx->loopback_selftest)) {
563 efx_loopback_rx_packet(efx, eh, rx_buf->len); 624 efx_loopback_rx_packet(efx, eh, rx_buf->len);
564 efx_free_rx_buffer(efx, rx_buf); 625 efx_free_rx_buffer(rx_buf);
565 goto out; 626 goto out;
566 } 627 }
567 628
@@ -603,9 +664,32 @@ int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
603 kfree(rx_queue->buffer); 664 kfree(rx_queue->buffer);
604 rx_queue->buffer = NULL; 665 rx_queue->buffer = NULL;
605 } 666 }
667
606 return rc; 668 return rc;
607} 669}
608 670
671void efx_init_rx_recycle_ring(struct efx_nic *efx,
672 struct efx_rx_queue *rx_queue)
673{
674 unsigned int bufs_in_recycle_ring, page_ring_size;
675
676 /* Set the RX recycle ring size */
677#ifdef CONFIG_PPC64
678 bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
679#else
680 if (efx->pci_dev->dev.iommu_group)
681 bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
682 else
683 bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
684#endif /* CONFIG_PPC64 */
685
686 page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
687 efx->rx_bufs_per_page);
688 rx_queue->page_ring = kcalloc(page_ring_size,
689 sizeof(*rx_queue->page_ring), GFP_KERNEL);
690 rx_queue->page_ptr_mask = page_ring_size - 1;
691}
692
609void efx_init_rx_queue(struct efx_rx_queue *rx_queue) 693void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
610{ 694{
611 struct efx_nic *efx = rx_queue->efx; 695 struct efx_nic *efx = rx_queue->efx;
@@ -619,6 +703,13 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
619 rx_queue->notified_count = 0; 703 rx_queue->notified_count = 0;
620 rx_queue->removed_count = 0; 704 rx_queue->removed_count = 0;
621 rx_queue->min_fill = -1U; 705 rx_queue->min_fill = -1U;
706 efx_init_rx_recycle_ring(efx, rx_queue);
707
708 rx_queue->page_remove = 0;
709 rx_queue->page_add = rx_queue->page_ptr_mask + 1;
710 rx_queue->page_recycle_count = 0;
711 rx_queue->page_recycle_failed = 0;
712 rx_queue->page_recycle_full = 0;
622 713
623 /* Initialise limit fields */ 714 /* Initialise limit fields */
624 max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; 715 max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
@@ -642,6 +733,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
642void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) 733void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
643{ 734{
644 int i; 735 int i;
736 struct efx_nic *efx = rx_queue->efx;
645 struct efx_rx_buffer *rx_buf; 737 struct efx_rx_buffer *rx_buf;
646 738
647 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, 739 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
@@ -653,13 +745,32 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
653 del_timer_sync(&rx_queue->slow_fill); 745 del_timer_sync(&rx_queue->slow_fill);
654 efx_nic_fini_rx(rx_queue); 746 efx_nic_fini_rx(rx_queue);
655 747
656 /* Release RX buffers NB start at index 0 not current HW ptr */ 748 /* Release RX buffers from the current read ptr to the write ptr */
657 if (rx_queue->buffer) { 749 if (rx_queue->buffer) {
658 for (i = 0; i <= rx_queue->ptr_mask; i++) { 750 for (i = rx_queue->removed_count; i < rx_queue->added_count;
659 rx_buf = efx_rx_buffer(rx_queue, i); 751 i++) {
752 unsigned index = i & rx_queue->ptr_mask;
753 rx_buf = efx_rx_buffer(rx_queue, index);
660 efx_fini_rx_buffer(rx_queue, rx_buf); 754 efx_fini_rx_buffer(rx_queue, rx_buf);
661 } 755 }
662 } 756 }
757
758 /* Unmap and release the pages in the recycle ring. Remove the ring. */
759 for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
760 struct page *page = rx_queue->page_ring[i];
761 struct efx_rx_page_state *state;
762
763 if (page == NULL)
764 continue;
765
766 state = page_address(page);
767 dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
768 PAGE_SIZE << efx->rx_buffer_order,
769 DMA_FROM_DEVICE);
770 put_page(page);
771 }
772 kfree(rx_queue->page_ring);
773 rx_queue->page_ring = NULL;
663} 774}
664 775
665void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) 776void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)