aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet
diff options
context:
space:
mode:
authorDaniel Pieczko <dpieczko@solarflare.com>2013-02-13 05:54:41 -0500
committerBen Hutchings <bhutchings@solarflare.com>2013-03-07 15:22:15 -0500
commit1648a23fa159e5c433aac06dc5e0d9db36146016 (patch)
treec60a40e25d368f34e73b6abffa9a62a890dc9bbf /drivers/net/ethernet
parent179ea7f039f68ae4247a340bfb59fd861e7def12 (diff)
sfc: allocate more RX buffers per page
Allocating 2 buffers per page is insanely inefficient when MTU is 1500 and PAGE_SIZE is 64K (as it usually is on POWER). Allocate as many as we can fit, and choose the refill batch size at run-time so that we still always use a whole page at once. [bwh: Fix loop condition to allow for compound pages; rebase] Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r--drivers/net/ethernet/sfc/efx.c18
-rw-r--r--drivers/net/ethernet/sfc/efx.h1
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h2
-rw-r--r--drivers/net/ethernet/sfc/rx.c80
4 files changed, 56 insertions, 45 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index a70c458f3cef..f050248e9fba 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -643,10 +643,6 @@ static void efx_start_datapath(struct efx_nic *efx)
643 if (rx_buf_len <= PAGE_SIZE) { 643 if (rx_buf_len <= PAGE_SIZE) {
644 efx->rx_scatter = false; 644 efx->rx_scatter = false;
645 efx->rx_buffer_order = 0; 645 efx->rx_buffer_order = 0;
646 if (rx_buf_len <= PAGE_SIZE / 2)
647 efx->rx_buffer_truesize = PAGE_SIZE / 2;
648 else
649 efx->rx_buffer_truesize = PAGE_SIZE;
650 } else if (efx->type->can_rx_scatter) { 646 } else if (efx->type->can_rx_scatter) {
651 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + 647 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
652 EFX_PAGE_IP_ALIGN + EFX_RX_USR_BUF_SIZE > 648 EFX_PAGE_IP_ALIGN + EFX_RX_USR_BUF_SIZE >
@@ -654,14 +650,22 @@ static void efx_start_datapath(struct efx_nic *efx)
654 efx->rx_scatter = true; 650 efx->rx_scatter = true;
655 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; 651 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
656 efx->rx_buffer_order = 0; 652 efx->rx_buffer_order = 0;
657 efx->rx_buffer_truesize = PAGE_SIZE / 2;
658 } else { 653 } else {
659 efx->rx_scatter = false; 654 efx->rx_scatter = false;
660 efx->rx_buffer_order = get_order(rx_buf_len); 655 efx->rx_buffer_order = get_order(rx_buf_len);
661 efx->rx_buffer_truesize = PAGE_SIZE << efx->rx_buffer_order;
662 } 656 }
663 657
664 efx->rx_bufs_per_page = (rx_buf_len <= PAGE_SIZE / 2) ? 2 : 1; 658 efx_rx_config_page_split(efx);
659 if (efx->rx_buffer_order)
660 netif_dbg(efx, drv, efx->net_dev,
661 "RX buf len=%u; page order=%u batch=%u\n",
662 efx->rx_dma_len, efx->rx_buffer_order,
663 efx->rx_pages_per_batch);
664 else
665 netif_dbg(efx, drv, efx->net_dev,
666 "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
667 efx->rx_dma_len, efx->rx_page_buf_step,
668 efx->rx_bufs_per_page, efx->rx_pages_per_batch);
665 669
666 /* RX filters also have scatter-enabled flags */ 670 /* RX filters also have scatter-enabled flags */
667 if (efx->rx_scatter != old_rx_scatter) 671 if (efx->rx_scatter != old_rx_scatter)
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 211da79a65e8..8372da239b43 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -33,6 +33,7 @@ extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc);
33extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); 33extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
34 34
35/* RX */ 35/* RX */
36extern void efx_rx_config_page_split(struct efx_nic *efx);
36extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); 37extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
37extern void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); 38extern void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
38extern void efx_init_rx_queue(struct efx_rx_queue *rx_queue); 39extern void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index e22e75c8f635..9bd433a095c5 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -825,7 +825,9 @@ struct efx_nic {
825 unsigned int rx_dma_len; 825 unsigned int rx_dma_len;
826 unsigned int rx_buffer_order; 826 unsigned int rx_buffer_order;
827 unsigned int rx_buffer_truesize; 827 unsigned int rx_buffer_truesize;
828 unsigned int rx_page_buf_step;
828 unsigned int rx_bufs_per_page; 829 unsigned int rx_bufs_per_page;
830 unsigned int rx_pages_per_batch;
829 u8 rx_hash_key[40]; 831 u8 rx_hash_key[40];
830 u32 rx_indir_table[128]; 832 u32 rx_indir_table[128];
831 bool rx_scatter; 833 bool rx_scatter;
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 4cc2ba48a912..a948b36c1910 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -25,19 +25,15 @@
25#include "selftest.h" 25#include "selftest.h"
26#include "workarounds.h" 26#include "workarounds.h"
27 27
28/* Number of RX descriptors pushed at once. */ 28/* Preferred number of descriptors to fill at once */
29#define EFX_RX_BATCH 8 29#define EFX_RX_PREFERRED_BATCH 8U
30 30
31/* Number of RX buffers to recycle pages for. When creating the RX page recycle 31/* Number of RX buffers to recycle pages for. When creating the RX page recycle
32 * ring, this number is divided by the number of buffers per page to calculate 32 * ring, this number is divided by the number of buffers per page to calculate
33 * the number of pages to store in the RX page recycle ring. 33 * the number of pages to store in the RX page recycle ring.
34 */ 34 */
35#define EFX_RECYCLE_RING_SIZE_IOMMU 4096 35#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
36#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_BATCH) 36#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
37
38/* Maximum length for an RX descriptor sharing a page */
39#define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state) \
40 - EFX_PAGE_IP_ALIGN)
41 37
42/* Size of buffer allocated for skb header area. */ 38/* Size of buffer allocated for skb header area. */
43#define EFX_SKB_HEADERS 64u 39#define EFX_SKB_HEADERS 64u
@@ -95,6 +91,19 @@ static inline void efx_sync_rx_buffer(struct efx_nic *efx,
95 DMA_FROM_DEVICE); 91 DMA_FROM_DEVICE);
96} 92}
97 93
94void efx_rx_config_page_split(struct efx_nic *efx)
95{
96 efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + EFX_PAGE_IP_ALIGN,
97 L1_CACHE_BYTES);
98 efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
99 ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
100 efx->rx_page_buf_step);
101 efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
102 efx->rx_bufs_per_page;
103 efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
104 efx->rx_bufs_per_page);
105}
106
98/* Check the RX page recycle ring for a page that can be reused. */ 107/* Check the RX page recycle ring for a page that can be reused. */
99static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) 108static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
100{ 109{
@@ -134,10 +143,10 @@ static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
134 * 143 *
135 * @rx_queue: Efx RX queue 144 * @rx_queue: Efx RX queue
136 * 145 *
137 * This allocates memory for EFX_RX_BATCH receive buffers, maps them for DMA, 146 * This allocates a batch of pages, maps them for DMA, and populates
138 * and populates struct efx_rx_buffers for each one. Return a negative error 147 * struct efx_rx_buffers for each one. Return a negative error code or
139 * code or 0 on success. If a single page can be split between two buffers, 148 * 0 on success. If a single page can be used for multiple buffers,
140 * then the page will either be inserted fully, or not at at all. 149 * then the page will either be inserted fully, or not at all.
141 */ 150 */
142static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue) 151static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
143{ 152{
@@ -149,10 +158,8 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
149 dma_addr_t dma_addr; 158 dma_addr_t dma_addr;
150 unsigned index, count; 159 unsigned index, count;
151 160
152 /* We can split a page between two buffers */ 161 count = 0;
153 BUILD_BUG_ON(EFX_RX_BATCH & 1); 162 do {
154
155 for (count = 0; count < EFX_RX_BATCH; ++count) {
156 page = efx_reuse_page(rx_queue); 163 page = efx_reuse_page(rx_queue);
157 if (page == NULL) { 164 if (page == NULL) {
158 page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, 165 page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC,
@@ -174,32 +181,26 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
174 state = page_address(page); 181 state = page_address(page);
175 dma_addr = state->dma_addr; 182 dma_addr = state->dma_addr;
176 } 183 }
177 get_page(page);
178 184
179 dma_addr += sizeof(struct efx_rx_page_state); 185 dma_addr += sizeof(struct efx_rx_page_state);
180 page_offset = sizeof(struct efx_rx_page_state); 186 page_offset = sizeof(struct efx_rx_page_state);
181 187
182 split: 188 do {
183 index = rx_queue->added_count & rx_queue->ptr_mask; 189 index = rx_queue->added_count & rx_queue->ptr_mask;
184 rx_buf = efx_rx_buffer(rx_queue, index); 190 rx_buf = efx_rx_buffer(rx_queue, index);
185 rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN; 191 rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN;
186 rx_buf->page = page; 192 rx_buf->page = page;
187 rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN; 193 rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
188 rx_buf->len = efx->rx_dma_len; 194 rx_buf->len = efx->rx_dma_len;
189 ++rx_queue->added_count;
190
191 if ((~count & 1) && (efx->rx_dma_len <= EFX_RX_HALF_PAGE)) {
192 /* Use the second half of the page */
193 get_page(page);
194 rx_buf->flags = 0; 195 rx_buf->flags = 0;
195 dma_addr += (PAGE_SIZE >> 1); 196 ++rx_queue->added_count;
196 page_offset += (PAGE_SIZE >> 1); 197 get_page(page);
197 ++count; 198 dma_addr += efx->rx_page_buf_step;
198 goto split; 199 page_offset += efx->rx_page_buf_step;
199 } 200 } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
200 201
201 rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; 202 rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
202 } 203 } while (++count < efx->rx_pages_per_batch);
203 204
204 return 0; 205 return 0;
205} 206}
@@ -307,7 +308,8 @@ static void efx_recycle_rx_buffers(struct efx_channel *channel,
307 */ 308 */
308void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) 309void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
309{ 310{
310 unsigned fill_level; 311 struct efx_nic *efx = rx_queue->efx;
312 unsigned int fill_level, batch_size;
311 int space, rc = 0; 313 int space, rc = 0;
312 314
313 /* Calculate current fill level, and exit if we don't need to fill */ 315 /* Calculate current fill level, and exit if we don't need to fill */
@@ -322,8 +324,9 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
322 rx_queue->min_fill = fill_level; 324 rx_queue->min_fill = fill_level;
323 } 325 }
324 326
327 batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
325 space = rx_queue->max_fill - fill_level; 328 space = rx_queue->max_fill - fill_level;
326 EFX_BUG_ON_PARANOID(space < EFX_RX_BATCH); 329 EFX_BUG_ON_PARANOID(space < batch_size);
327 330
328 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, 331 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
329 "RX queue %d fast-filling descriptor ring from" 332 "RX queue %d fast-filling descriptor ring from"
@@ -340,7 +343,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
340 efx_schedule_slow_fill(rx_queue); 343 efx_schedule_slow_fill(rx_queue);
341 goto out; 344 goto out;
342 } 345 }
343 } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH); 346 } while ((space -= batch_size) >= batch_size);
344 347
345 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, 348 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
346 "RX queue %d fast-filled descriptor ring " 349 "RX queue %d fast-filled descriptor ring "
@@ -708,7 +711,8 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
708 711
709 /* Initialise limit fields */ 712 /* Initialise limit fields */
710 max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; 713 max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
711 max_trigger = max_fill - EFX_RX_BATCH; 714 max_trigger =
715 max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
712 if (rx_refill_threshold != 0) { 716 if (rx_refill_threshold != 0) {
713 trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; 717 trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
714 if (trigger > max_trigger) 718 if (trigger > max_trigger)