diff options
author | Ben Hutchings <bhutchings@solarflare.com> | 2008-04-27 07:55:59 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@redhat.com> | 2008-04-29 01:42:43 -0400 |
commit | 8ceee660aacb29721e26f08e336c58dc4847d1bd (patch) | |
tree | 158122642e6f21fe85d072c50d6185a0d0cf6834 /drivers/net/sfc/rx.c | |
parent | 358c12953b88c5a06a57c33eb27c753b2e7934d1 (diff) |
New driver "sfc" for Solarstorm SFC4000 controller.
The driver supports the 10Xpress PHY and XFP modules on our reference
designs SFE4001 and SFE4002 and the SMC models SMC10GPCIe-XFP and
SMC10GPCIe-10BT.
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Diffstat (limited to 'drivers/net/sfc/rx.c')
-rw-r--r-- | drivers/net/sfc/rx.c | 875 |
1 files changed, 875 insertions, 0 deletions
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c new file mode 100644 index 000000000000..551299b462ae --- /dev/null +++ b/drivers/net/sfc/rx.c | |||
@@ -0,0 +1,875 @@ | |||
1 | /**************************************************************************** | ||
2 | * Driver for Solarflare Solarstorm network controllers and boards | ||
3 | * Copyright 2005-2006 Fen Systems Ltd. | ||
4 | * Copyright 2005-2008 Solarflare Communications Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation, incorporated herein by reference. | ||
9 | */ | ||
10 | |||
11 | #include <linux/socket.h> | ||
12 | #include <linux/in.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/tcp.h> | ||
15 | #include <linux/udp.h> | ||
16 | #include <net/ip.h> | ||
17 | #include <net/checksum.h> | ||
18 | #include "net_driver.h" | ||
19 | #include "rx.h" | ||
20 | #include "efx.h" | ||
21 | #include "falcon.h" | ||
22 | #include "workarounds.h" | ||
23 | |||
24 | /* Number of RX descriptors pushed at once. */ | ||
25 | #define EFX_RX_BATCH 8 | ||
26 | |||
27 | /* Size of buffer allocated for skb header area. */ | ||
28 | #define EFX_SKB_HEADERS 64u | ||
29 | |||
30 | /* | ||
31 | * rx_alloc_method - RX buffer allocation method | ||
32 | * | ||
33 | * This driver supports two methods for allocating and using RX buffers: | ||
34 | * each RX buffer may be backed by an skb or by an order-n page. | ||
35 | * | ||
36 | * When LRO is in use then the second method has a lower overhead, | ||
37 | * since we don't have to allocate then free skbs on reassembled frames. | ||
38 | * | ||
39 | * Values: | ||
40 | * - RX_ALLOC_METHOD_AUTO = 0 | ||
41 | * - RX_ALLOC_METHOD_SKB = 1 | ||
42 | * - RX_ALLOC_METHOD_PAGE = 2 | ||
43 | * | ||
44 | * The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count | ||
45 | * controlled by the parameters below. | ||
46 | * | ||
47 | * - Since pushing and popping descriptors are separated by the rx_queue | ||
48 | * size, so the watermarks should be ~rxd_size. | ||
49 | * - The performance win by using page-based allocation for LRO is less | ||
50 | * than the performance hit of using page-based allocation of non-LRO, | ||
51 | * so the watermarks should reflect this. | ||
52 | * | ||
53 | * Per channel we maintain a single variable, updated by each channel: | ||
54 | * | ||
55 | * rx_alloc_level += (lro_performed ? RX_ALLOC_FACTOR_LRO : | ||
56 | * RX_ALLOC_FACTOR_SKB) | ||
57 | * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which | ||
58 | * limits the hysteresis), and update the allocation strategy: | ||
59 | * | ||
60 | * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_LRO ? | ||
61 | * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB) | ||
62 | */ | ||
63 | static int rx_alloc_method = RX_ALLOC_METHOD_PAGE; | ||
64 | |||
65 | #define RX_ALLOC_LEVEL_LRO 0x2000 | ||
66 | #define RX_ALLOC_LEVEL_MAX 0x3000 | ||
67 | #define RX_ALLOC_FACTOR_LRO 1 | ||
68 | #define RX_ALLOC_FACTOR_SKB (-2) | ||
69 | |||
70 | /* This is the percentage fill level below which new RX descriptors | ||
71 | * will be added to the RX descriptor ring. | ||
72 | */ | ||
73 | static unsigned int rx_refill_threshold = 90; | ||
74 | |||
75 | /* This is the percentage fill level to which an RX queue will be refilled | ||
76 | * when the "RX refill threshold" is reached. | ||
77 | */ | ||
78 | static unsigned int rx_refill_limit = 95; | ||
79 | |||
80 | /* | ||
81 | * RX maximum head room required. | ||
82 | * | ||
83 | * This must be at least 1 to prevent overflow and at least 2 to allow | ||
84 | * pipelined receives. | ||
85 | */ | ||
86 | #define EFX_RXD_HEAD_ROOM 2 | ||
87 | |||
88 | /* Macros for zero-order pages (potentially) containing multiple RX buffers */ | ||
89 | #define RX_DATA_OFFSET(_data) \ | ||
90 | (((unsigned long) (_data)) & (PAGE_SIZE-1)) | ||
91 | #define RX_BUF_OFFSET(_rx_buf) \ | ||
92 | RX_DATA_OFFSET((_rx_buf)->data) | ||
93 | |||
94 | #define RX_PAGE_SIZE(_efx) \ | ||
95 | (PAGE_SIZE * (1u << (_efx)->rx_buffer_order)) | ||
96 | |||
97 | |||
98 | /************************************************************************** | ||
99 | * | ||
100 | * Linux generic LRO handling | ||
101 | * | ||
102 | ************************************************************************** | ||
103 | */ | ||
104 | |||
105 | static int efx_lro_get_skb_hdr(struct sk_buff *skb, void **ip_hdr, | ||
106 | void **tcpudp_hdr, u64 *hdr_flags, void *priv) | ||
107 | { | ||
108 | struct efx_channel *channel = (struct efx_channel *)priv; | ||
109 | struct iphdr *iph; | ||
110 | struct tcphdr *th; | ||
111 | |||
112 | iph = (struct iphdr *)skb->data; | ||
113 | if (skb->protocol != htons(ETH_P_IP) || iph->protocol != IPPROTO_TCP) | ||
114 | goto fail; | ||
115 | |||
116 | th = (struct tcphdr *)(skb->data + iph->ihl * 4); | ||
117 | |||
118 | *tcpudp_hdr = th; | ||
119 | *ip_hdr = iph; | ||
120 | *hdr_flags = LRO_IPV4 | LRO_TCP; | ||
121 | |||
122 | channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO; | ||
123 | return 0; | ||
124 | fail: | ||
125 | channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; | ||
126 | return -1; | ||
127 | } | ||
128 | |||
129 | static int efx_get_frag_hdr(struct skb_frag_struct *frag, void **mac_hdr, | ||
130 | void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags, | ||
131 | void *priv) | ||
132 | { | ||
133 | struct efx_channel *channel = (struct efx_channel *)priv; | ||
134 | struct ethhdr *eh; | ||
135 | struct iphdr *iph; | ||
136 | |||
137 | /* We support EtherII and VLAN encapsulated IPv4 */ | ||
138 | eh = (struct ethhdr *)(page_address(frag->page) + frag->page_offset); | ||
139 | *mac_hdr = eh; | ||
140 | |||
141 | if (eh->h_proto == htons(ETH_P_IP)) { | ||
142 | iph = (struct iphdr *)(eh + 1); | ||
143 | } else { | ||
144 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)eh; | ||
145 | if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP)) | ||
146 | goto fail; | ||
147 | |||
148 | iph = (struct iphdr *)(veh + 1); | ||
149 | } | ||
150 | *ip_hdr = iph; | ||
151 | |||
152 | /* We can only do LRO over TCP */ | ||
153 | if (iph->protocol != IPPROTO_TCP) | ||
154 | goto fail; | ||
155 | |||
156 | *hdr_flags = LRO_IPV4 | LRO_TCP; | ||
157 | *tcpudp_hdr = (struct tcphdr *)((u8 *) iph + iph->ihl * 4); | ||
158 | |||
159 | channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO; | ||
160 | return 0; | ||
161 | fail: | ||
162 | channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; | ||
163 | return -1; | ||
164 | } | ||
165 | |||
166 | int efx_lro_init(struct net_lro_mgr *lro_mgr, struct efx_nic *efx) | ||
167 | { | ||
168 | size_t s = sizeof(struct net_lro_desc) * EFX_MAX_LRO_DESCRIPTORS; | ||
169 | struct net_lro_desc *lro_arr; | ||
170 | |||
171 | /* Allocate the LRO descriptors structure */ | ||
172 | lro_arr = kzalloc(s, GFP_KERNEL); | ||
173 | if (lro_arr == NULL) | ||
174 | return -ENOMEM; | ||
175 | |||
176 | lro_mgr->lro_arr = lro_arr; | ||
177 | lro_mgr->max_desc = EFX_MAX_LRO_DESCRIPTORS; | ||
178 | lro_mgr->max_aggr = EFX_MAX_LRO_AGGR; | ||
179 | lro_mgr->frag_align_pad = EFX_PAGE_SKB_ALIGN; | ||
180 | |||
181 | lro_mgr->get_skb_header = efx_lro_get_skb_hdr; | ||
182 | lro_mgr->get_frag_header = efx_get_frag_hdr; | ||
183 | lro_mgr->dev = efx->net_dev; | ||
184 | |||
185 | lro_mgr->features = LRO_F_NAPI; | ||
186 | |||
187 | /* We can pass packets up with the checksum intact */ | ||
188 | lro_mgr->ip_summed = CHECKSUM_UNNECESSARY; | ||
189 | |||
190 | lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | void efx_lro_fini(struct net_lro_mgr *lro_mgr) | ||
196 | { | ||
197 | kfree(lro_mgr->lro_arr); | ||
198 | lro_mgr->lro_arr = NULL; | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * efx_init_rx_buffer_skb - create new RX buffer using skb-based allocation | ||
203 | * | ||
204 | * @rx_queue: Efx RX queue | ||
205 | * @rx_buf: RX buffer structure to populate | ||
206 | * | ||
207 | * This allocates memory for a new receive buffer, maps it for DMA, | ||
208 | * and populates a struct efx_rx_buffer with the relevant | ||
209 | * information. Return a negative error code or 0 on success. | ||
210 | */ | ||
211 | static inline int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue, | ||
212 | struct efx_rx_buffer *rx_buf) | ||
213 | { | ||
214 | struct efx_nic *efx = rx_queue->efx; | ||
215 | struct net_device *net_dev = efx->net_dev; | ||
216 | int skb_len = efx->rx_buffer_len; | ||
217 | |||
218 | rx_buf->skb = netdev_alloc_skb(net_dev, skb_len); | ||
219 | if (unlikely(!rx_buf->skb)) | ||
220 | return -ENOMEM; | ||
221 | |||
222 | /* Adjust the SKB for padding and checksum */ | ||
223 | skb_reserve(rx_buf->skb, NET_IP_ALIGN); | ||
224 | rx_buf->len = skb_len - NET_IP_ALIGN; | ||
225 | rx_buf->data = (char *)rx_buf->skb->data; | ||
226 | rx_buf->skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
227 | |||
228 | rx_buf->dma_addr = pci_map_single(efx->pci_dev, | ||
229 | rx_buf->data, rx_buf->len, | ||
230 | PCI_DMA_FROMDEVICE); | ||
231 | |||
232 | if (unlikely(pci_dma_mapping_error(rx_buf->dma_addr))) { | ||
233 | dev_kfree_skb_any(rx_buf->skb); | ||
234 | rx_buf->skb = NULL; | ||
235 | return -EIO; | ||
236 | } | ||
237 | |||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | /** | ||
242 | * efx_init_rx_buffer_page - create new RX buffer using page-based allocation | ||
243 | * | ||
244 | * @rx_queue: Efx RX queue | ||
245 | * @rx_buf: RX buffer structure to populate | ||
246 | * | ||
247 | * This allocates memory for a new receive buffer, maps it for DMA, | ||
248 | * and populates a struct efx_rx_buffer with the relevant | ||
249 | * information. Return a negative error code or 0 on success. | ||
250 | */ | ||
251 | static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue, | ||
252 | struct efx_rx_buffer *rx_buf) | ||
253 | { | ||
254 | struct efx_nic *efx = rx_queue->efx; | ||
255 | int bytes, space, offset; | ||
256 | |||
257 | bytes = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN; | ||
258 | |||
259 | /* If there is space left in the previously allocated page, | ||
260 | * then use it. Otherwise allocate a new one */ | ||
261 | rx_buf->page = rx_queue->buf_page; | ||
262 | if (rx_buf->page == NULL) { | ||
263 | dma_addr_t dma_addr; | ||
264 | |||
265 | rx_buf->page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, | ||
266 | efx->rx_buffer_order); | ||
267 | if (unlikely(rx_buf->page == NULL)) | ||
268 | return -ENOMEM; | ||
269 | |||
270 | dma_addr = pci_map_page(efx->pci_dev, rx_buf->page, | ||
271 | 0, RX_PAGE_SIZE(efx), | ||
272 | PCI_DMA_FROMDEVICE); | ||
273 | |||
274 | if (unlikely(pci_dma_mapping_error(dma_addr))) { | ||
275 | __free_pages(rx_buf->page, efx->rx_buffer_order); | ||
276 | rx_buf->page = NULL; | ||
277 | return -EIO; | ||
278 | } | ||
279 | |||
280 | rx_queue->buf_page = rx_buf->page; | ||
281 | rx_queue->buf_dma_addr = dma_addr; | ||
282 | rx_queue->buf_data = ((char *) page_address(rx_buf->page) + | ||
283 | EFX_PAGE_IP_ALIGN); | ||
284 | } | ||
285 | |||
286 | offset = RX_DATA_OFFSET(rx_queue->buf_data); | ||
287 | rx_buf->len = bytes; | ||
288 | rx_buf->dma_addr = rx_queue->buf_dma_addr + offset; | ||
289 | rx_buf->data = rx_queue->buf_data; | ||
290 | |||
291 | /* Try to pack multiple buffers per page */ | ||
292 | if (efx->rx_buffer_order == 0) { | ||
293 | /* The next buffer starts on the next 512 byte boundary */ | ||
294 | rx_queue->buf_data += ((bytes + 0x1ff) & ~0x1ff); | ||
295 | offset += ((bytes + 0x1ff) & ~0x1ff); | ||
296 | |||
297 | space = RX_PAGE_SIZE(efx) - offset; | ||
298 | if (space >= bytes) { | ||
299 | /* Refs dropped on kernel releasing each skb */ | ||
300 | get_page(rx_queue->buf_page); | ||
301 | goto out; | ||
302 | } | ||
303 | } | ||
304 | |||
305 | /* This is the final RX buffer for this page, so mark it for | ||
306 | * unmapping */ | ||
307 | rx_queue->buf_page = NULL; | ||
308 | rx_buf->unmap_addr = rx_queue->buf_dma_addr; | ||
309 | |||
310 | out: | ||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | /* This allocates memory for a new receive buffer, maps it for DMA, | ||
315 | * and populates a struct efx_rx_buffer with the relevant | ||
316 | * information. | ||
317 | */ | ||
318 | static inline int efx_init_rx_buffer(struct efx_rx_queue *rx_queue, | ||
319 | struct efx_rx_buffer *new_rx_buf) | ||
320 | { | ||
321 | int rc = 0; | ||
322 | |||
323 | if (rx_queue->channel->rx_alloc_push_pages) { | ||
324 | new_rx_buf->skb = NULL; | ||
325 | rc = efx_init_rx_buffer_page(rx_queue, new_rx_buf); | ||
326 | rx_queue->alloc_page_count++; | ||
327 | } else { | ||
328 | new_rx_buf->page = NULL; | ||
329 | rc = efx_init_rx_buffer_skb(rx_queue, new_rx_buf); | ||
330 | rx_queue->alloc_skb_count++; | ||
331 | } | ||
332 | |||
333 | if (unlikely(rc < 0)) | ||
334 | EFX_LOG_RL(rx_queue->efx, "%s RXQ[%d] =%d\n", __func__, | ||
335 | rx_queue->queue, rc); | ||
336 | return rc; | ||
337 | } | ||
338 | |||
339 | static inline void efx_unmap_rx_buffer(struct efx_nic *efx, | ||
340 | struct efx_rx_buffer *rx_buf) | ||
341 | { | ||
342 | if (rx_buf->page) { | ||
343 | EFX_BUG_ON_PARANOID(rx_buf->skb); | ||
344 | if (rx_buf->unmap_addr) { | ||
345 | pci_unmap_page(efx->pci_dev, rx_buf->unmap_addr, | ||
346 | RX_PAGE_SIZE(efx), PCI_DMA_FROMDEVICE); | ||
347 | rx_buf->unmap_addr = 0; | ||
348 | } | ||
349 | } else if (likely(rx_buf->skb)) { | ||
350 | pci_unmap_single(efx->pci_dev, rx_buf->dma_addr, | ||
351 | rx_buf->len, PCI_DMA_FROMDEVICE); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | static inline void efx_free_rx_buffer(struct efx_nic *efx, | ||
356 | struct efx_rx_buffer *rx_buf) | ||
357 | { | ||
358 | if (rx_buf->page) { | ||
359 | __free_pages(rx_buf->page, efx->rx_buffer_order); | ||
360 | rx_buf->page = NULL; | ||
361 | } else if (likely(rx_buf->skb)) { | ||
362 | dev_kfree_skb_any(rx_buf->skb); | ||
363 | rx_buf->skb = NULL; | ||
364 | } | ||
365 | } | ||
366 | |||
367 | static inline void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, | ||
368 | struct efx_rx_buffer *rx_buf) | ||
369 | { | ||
370 | efx_unmap_rx_buffer(rx_queue->efx, rx_buf); | ||
371 | efx_free_rx_buffer(rx_queue->efx, rx_buf); | ||
372 | } | ||
373 | |||
374 | /** | ||
375 | * efx_fast_push_rx_descriptors - push new RX descriptors quickly | ||
376 | * @rx_queue: RX descriptor queue | ||
377 | * @retry: Recheck the fill level | ||
378 | * This will aim to fill the RX descriptor queue up to | ||
379 | * @rx_queue->@fast_fill_limit. If there is insufficient atomic | ||
380 | * memory to do so, the caller should retry. | ||
381 | */ | ||
382 | static int __efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, | ||
383 | int retry) | ||
384 | { | ||
385 | struct efx_rx_buffer *rx_buf; | ||
386 | unsigned fill_level, index; | ||
387 | int i, space, rc = 0; | ||
388 | |||
389 | /* Calculate current fill level. Do this outside the lock, | ||
390 | * because most of the time we'll end up not wanting to do the | ||
391 | * fill anyway. | ||
392 | */ | ||
393 | fill_level = (rx_queue->added_count - rx_queue->removed_count); | ||
394 | EFX_BUG_ON_PARANOID(fill_level > | ||
395 | rx_queue->efx->type->rxd_ring_mask + 1); | ||
396 | |||
397 | /* Don't fill if we don't need to */ | ||
398 | if (fill_level >= rx_queue->fast_fill_trigger) | ||
399 | return 0; | ||
400 | |||
401 | /* Record minimum fill level */ | ||
402 | if (unlikely(fill_level < rx_queue->min_fill)) | ||
403 | if (fill_level) | ||
404 | rx_queue->min_fill = fill_level; | ||
405 | |||
406 | /* Acquire RX add lock. If this lock is contended, then a fast | ||
407 | * fill must already be in progress (e.g. in the refill | ||
408 | * tasklet), so we don't need to do anything | ||
409 | */ | ||
410 | if (!spin_trylock_bh(&rx_queue->add_lock)) | ||
411 | return -1; | ||
412 | |||
413 | retry: | ||
414 | /* Recalculate current fill level now that we have the lock */ | ||
415 | fill_level = (rx_queue->added_count - rx_queue->removed_count); | ||
416 | EFX_BUG_ON_PARANOID(fill_level > | ||
417 | rx_queue->efx->type->rxd_ring_mask + 1); | ||
418 | space = rx_queue->fast_fill_limit - fill_level; | ||
419 | if (space < EFX_RX_BATCH) | ||
420 | goto out_unlock; | ||
421 | |||
422 | EFX_TRACE(rx_queue->efx, "RX queue %d fast-filling descriptor ring from" | ||
423 | " level %d to level %d using %s allocation\n", | ||
424 | rx_queue->queue, fill_level, rx_queue->fast_fill_limit, | ||
425 | rx_queue->channel->rx_alloc_push_pages ? "page" : "skb"); | ||
426 | |||
427 | do { | ||
428 | for (i = 0; i < EFX_RX_BATCH; ++i) { | ||
429 | index = (rx_queue->added_count & | ||
430 | rx_queue->efx->type->rxd_ring_mask); | ||
431 | rx_buf = efx_rx_buffer(rx_queue, index); | ||
432 | rc = efx_init_rx_buffer(rx_queue, rx_buf); | ||
433 | if (unlikely(rc)) | ||
434 | goto out; | ||
435 | ++rx_queue->added_count; | ||
436 | } | ||
437 | } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH); | ||
438 | |||
439 | EFX_TRACE(rx_queue->efx, "RX queue %d fast-filled descriptor ring " | ||
440 | "to level %d\n", rx_queue->queue, | ||
441 | rx_queue->added_count - rx_queue->removed_count); | ||
442 | |||
443 | out: | ||
444 | /* Send write pointer to card. */ | ||
445 | falcon_notify_rx_desc(rx_queue); | ||
446 | |||
447 | /* If the fast fill is running inside from the refill tasklet, then | ||
448 | * for SMP systems it may be running on a different CPU to | ||
449 | * RX event processing, which means that the fill level may now be | ||
450 | * out of date. */ | ||
451 | if (unlikely(retry && (rc == 0))) | ||
452 | goto retry; | ||
453 | |||
454 | out_unlock: | ||
455 | spin_unlock_bh(&rx_queue->add_lock); | ||
456 | |||
457 | return rc; | ||
458 | } | ||
459 | |||
460 | /** | ||
461 | * efx_fast_push_rx_descriptors - push new RX descriptors quickly | ||
462 | * @rx_queue: RX descriptor queue | ||
463 | * | ||
464 | * This will aim to fill the RX descriptor queue up to | ||
465 | * @rx_queue->@fast_fill_limit. If there is insufficient memory to do so, | ||
466 | * it will schedule a work item to immediately continue the fast fill | ||
467 | */ | ||
468 | void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) | ||
469 | { | ||
470 | int rc; | ||
471 | |||
472 | rc = __efx_fast_push_rx_descriptors(rx_queue, 0); | ||
473 | if (unlikely(rc)) { | ||
474 | /* Schedule the work item to run immediately. The hope is | ||
475 | * that work is immediately pending to free some memory | ||
476 | * (e.g. an RX event or TX completion) | ||
477 | */ | ||
478 | efx_schedule_slow_fill(rx_queue, 0); | ||
479 | } | ||
480 | } | ||
481 | |||
482 | void efx_rx_work(struct work_struct *data) | ||
483 | { | ||
484 | struct efx_rx_queue *rx_queue; | ||
485 | int rc; | ||
486 | |||
487 | rx_queue = container_of(data, struct efx_rx_queue, work.work); | ||
488 | |||
489 | if (unlikely(!rx_queue->channel->enabled)) | ||
490 | return; | ||
491 | |||
492 | EFX_TRACE(rx_queue->efx, "RX queue %d worker thread executing on CPU " | ||
493 | "%d\n", rx_queue->queue, raw_smp_processor_id()); | ||
494 | |||
495 | ++rx_queue->slow_fill_count; | ||
496 | /* Push new RX descriptors, allowing at least 1 jiffy for | ||
497 | * the kernel to free some more memory. */ | ||
498 | rc = __efx_fast_push_rx_descriptors(rx_queue, 1); | ||
499 | if (rc) | ||
500 | efx_schedule_slow_fill(rx_queue, 1); | ||
501 | } | ||
502 | |||
503 | static inline void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, | ||
504 | struct efx_rx_buffer *rx_buf, | ||
505 | int len, int *discard, | ||
506 | int *leak_packet) | ||
507 | { | ||
508 | struct efx_nic *efx = rx_queue->efx; | ||
509 | unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; | ||
510 | |||
511 | if (likely(len <= max_len)) | ||
512 | return; | ||
513 | |||
514 | /* The packet must be discarded, but this is only a fatal error | ||
515 | * if the caller indicated it was | ||
516 | */ | ||
517 | *discard = 1; | ||
518 | |||
519 | if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) { | ||
520 | EFX_ERR_RL(efx, " RX queue %d seriously overlength " | ||
521 | "RX event (0x%x > 0x%x+0x%x). Leaking\n", | ||
522 | rx_queue->queue, len, max_len, | ||
523 | efx->type->rx_buffer_padding); | ||
524 | /* If this buffer was skb-allocated, then the meta | ||
525 | * data at the end of the skb will be trashed. So | ||
526 | * we have no choice but to leak the fragment. | ||
527 | */ | ||
528 | *leak_packet = (rx_buf->skb != NULL); | ||
529 | efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY); | ||
530 | } else { | ||
531 | EFX_ERR_RL(efx, " RX queue %d overlength RX event " | ||
532 | "(0x%x > 0x%x)\n", rx_queue->queue, len, max_len); | ||
533 | } | ||
534 | |||
535 | rx_queue->channel->n_rx_overlength++; | ||
536 | } | ||
537 | |||
538 | /* Pass a received packet up through the generic LRO stack | ||
539 | * | ||
540 | * Handles driverlink veto, and passes the fragment up via | ||
541 | * the appropriate LRO method | ||
542 | */ | ||
543 | static inline void efx_rx_packet_lro(struct efx_channel *channel, | ||
544 | struct efx_rx_buffer *rx_buf) | ||
545 | { | ||
546 | struct net_lro_mgr *lro_mgr = &channel->lro_mgr; | ||
547 | void *priv = channel; | ||
548 | |||
549 | /* Pass the skb/page into the LRO engine */ | ||
550 | if (rx_buf->page) { | ||
551 | struct skb_frag_struct frags; | ||
552 | |||
553 | frags.page = rx_buf->page; | ||
554 | frags.page_offset = RX_BUF_OFFSET(rx_buf); | ||
555 | frags.size = rx_buf->len; | ||
556 | |||
557 | lro_receive_frags(lro_mgr, &frags, rx_buf->len, | ||
558 | rx_buf->len, priv, 0); | ||
559 | |||
560 | EFX_BUG_ON_PARANOID(rx_buf->skb); | ||
561 | rx_buf->page = NULL; | ||
562 | } else { | ||
563 | EFX_BUG_ON_PARANOID(!rx_buf->skb); | ||
564 | |||
565 | lro_receive_skb(lro_mgr, rx_buf->skb, priv); | ||
566 | rx_buf->skb = NULL; | ||
567 | } | ||
568 | } | ||
569 | |||
570 | /* Allocate and construct an SKB around a struct page.*/ | ||
571 | static inline struct sk_buff *efx_rx_mk_skb(struct efx_rx_buffer *rx_buf, | ||
572 | struct efx_nic *efx, | ||
573 | int hdr_len) | ||
574 | { | ||
575 | struct sk_buff *skb; | ||
576 | |||
577 | /* Allocate an SKB to store the headers */ | ||
578 | skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN); | ||
579 | if (unlikely(skb == NULL)) { | ||
580 | EFX_ERR_RL(efx, "RX out of memory for skb\n"); | ||
581 | return NULL; | ||
582 | } | ||
583 | |||
584 | EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags); | ||
585 | EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len); | ||
586 | |||
587 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
588 | skb_reserve(skb, EFX_PAGE_SKB_ALIGN); | ||
589 | |||
590 | skb->len = rx_buf->len; | ||
591 | skb->truesize = rx_buf->len + sizeof(struct sk_buff); | ||
592 | memcpy(skb->data, rx_buf->data, hdr_len); | ||
593 | skb->tail += hdr_len; | ||
594 | |||
595 | /* Append the remaining page onto the frag list */ | ||
596 | if (unlikely(rx_buf->len > hdr_len)) { | ||
597 | struct skb_frag_struct *frag = skb_shinfo(skb)->frags; | ||
598 | frag->page = rx_buf->page; | ||
599 | frag->page_offset = RX_BUF_OFFSET(rx_buf) + hdr_len; | ||
600 | frag->size = skb->len - hdr_len; | ||
601 | skb_shinfo(skb)->nr_frags = 1; | ||
602 | skb->data_len = frag->size; | ||
603 | } else { | ||
604 | __free_pages(rx_buf->page, efx->rx_buffer_order); | ||
605 | skb->data_len = 0; | ||
606 | } | ||
607 | |||
608 | /* Ownership has transferred from the rx_buf to skb */ | ||
609 | rx_buf->page = NULL; | ||
610 | |||
611 | /* Move past the ethernet header */ | ||
612 | skb->protocol = eth_type_trans(skb, efx->net_dev); | ||
613 | |||
614 | return skb; | ||
615 | } | ||
616 | |||
617 | void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, | ||
618 | unsigned int len, int checksummed, int discard) | ||
619 | { | ||
620 | struct efx_nic *efx = rx_queue->efx; | ||
621 | struct efx_rx_buffer *rx_buf; | ||
622 | int leak_packet = 0; | ||
623 | |||
624 | rx_buf = efx_rx_buffer(rx_queue, index); | ||
625 | EFX_BUG_ON_PARANOID(!rx_buf->data); | ||
626 | EFX_BUG_ON_PARANOID(rx_buf->skb && rx_buf->page); | ||
627 | EFX_BUG_ON_PARANOID(!(rx_buf->skb || rx_buf->page)); | ||
628 | |||
629 | /* This allows the refill path to post another buffer. | ||
630 | * EFX_RXD_HEAD_ROOM ensures that the slot we are using | ||
631 | * isn't overwritten yet. | ||
632 | */ | ||
633 | rx_queue->removed_count++; | ||
634 | |||
635 | /* Validate the length encoded in the event vs the descriptor pushed */ | ||
636 | efx_rx_packet__check_len(rx_queue, rx_buf, len, | ||
637 | &discard, &leak_packet); | ||
638 | |||
639 | EFX_TRACE(efx, "RX queue %d received id %x at %llx+%x %s%s\n", | ||
640 | rx_queue->queue, index, | ||
641 | (unsigned long long)rx_buf->dma_addr, len, | ||
642 | (checksummed ? " [SUMMED]" : ""), | ||
643 | (discard ? " [DISCARD]" : "")); | ||
644 | |||
645 | /* Discard packet, if instructed to do so */ | ||
646 | if (unlikely(discard)) { | ||
647 | if (unlikely(leak_packet)) | ||
648 | rx_queue->channel->n_skbuff_leaks++; | ||
649 | else | ||
650 | /* We haven't called efx_unmap_rx_buffer yet, | ||
651 | * so fini the entire rx_buffer here */ | ||
652 | efx_fini_rx_buffer(rx_queue, rx_buf); | ||
653 | return; | ||
654 | } | ||
655 | |||
656 | /* Release card resources - assumes all RX buffers consumed in-order | ||
657 | * per RX queue | ||
658 | */ | ||
659 | efx_unmap_rx_buffer(efx, rx_buf); | ||
660 | |||
661 | /* Prefetch nice and early so data will (hopefully) be in cache by | ||
662 | * the time we look at it. | ||
663 | */ | ||
664 | prefetch(rx_buf->data); | ||
665 | |||
666 | /* Pipeline receives so that we give time for packet headers to be | ||
667 | * prefetched into cache. | ||
668 | */ | ||
669 | rx_buf->len = len; | ||
670 | if (rx_queue->channel->rx_pkt) | ||
671 | __efx_rx_packet(rx_queue->channel, | ||
672 | rx_queue->channel->rx_pkt, | ||
673 | rx_queue->channel->rx_pkt_csummed); | ||
674 | rx_queue->channel->rx_pkt = rx_buf; | ||
675 | rx_queue->channel->rx_pkt_csummed = checksummed; | ||
676 | } | ||
677 | |||
678 | /* Handle a received packet. Second half: Touches packet payload. */ | ||
679 | void __efx_rx_packet(struct efx_channel *channel, | ||
680 | struct efx_rx_buffer *rx_buf, int checksummed) | ||
681 | { | ||
682 | struct efx_nic *efx = channel->efx; | ||
683 | struct sk_buff *skb; | ||
684 | int lro = efx->net_dev->features & NETIF_F_LRO; | ||
685 | |||
686 | if (rx_buf->skb) { | ||
687 | prefetch(skb_shinfo(rx_buf->skb)); | ||
688 | |||
689 | skb_put(rx_buf->skb, rx_buf->len); | ||
690 | |||
691 | /* Move past the ethernet header. rx_buf->data still points | ||
692 | * at the ethernet header */ | ||
693 | rx_buf->skb->protocol = eth_type_trans(rx_buf->skb, | ||
694 | efx->net_dev); | ||
695 | } | ||
696 | |||
697 | /* Both our generic-LRO and SFC-SSR support skb and page based | ||
698 | * allocation, but neither support switching from one to the | ||
699 | * other on the fly. If we spot that the allocation mode has | ||
700 | * changed, then flush the LRO state. | ||
701 | */ | ||
702 | if (unlikely(channel->rx_alloc_pop_pages != (rx_buf->page != NULL))) { | ||
703 | efx_flush_lro(channel); | ||
704 | channel->rx_alloc_pop_pages = (rx_buf->page != NULL); | ||
705 | } | ||
706 | if (likely(checksummed && lro)) { | ||
707 | efx_rx_packet_lro(channel, rx_buf); | ||
708 | goto done; | ||
709 | } | ||
710 | |||
711 | /* Form an skb if required */ | ||
712 | if (rx_buf->page) { | ||
713 | int hdr_len = min(rx_buf->len, EFX_SKB_HEADERS); | ||
714 | skb = efx_rx_mk_skb(rx_buf, efx, hdr_len); | ||
715 | if (unlikely(skb == NULL)) { | ||
716 | efx_free_rx_buffer(efx, rx_buf); | ||
717 | goto done; | ||
718 | } | ||
719 | } else { | ||
720 | /* We now own the SKB */ | ||
721 | skb = rx_buf->skb; | ||
722 | rx_buf->skb = NULL; | ||
723 | } | ||
724 | |||
725 | EFX_BUG_ON_PARANOID(rx_buf->page); | ||
726 | EFX_BUG_ON_PARANOID(rx_buf->skb); | ||
727 | EFX_BUG_ON_PARANOID(!skb); | ||
728 | |||
729 | /* Set the SKB flags */ | ||
730 | if (unlikely(!checksummed || !efx->rx_checksum_enabled)) | ||
731 | skb->ip_summed = CHECKSUM_NONE; | ||
732 | |||
733 | /* Pass the packet up */ | ||
734 | netif_receive_skb(skb); | ||
735 | |||
736 | /* Update allocation strategy method */ | ||
737 | channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; | ||
738 | |||
739 | /* fall-thru */ | ||
740 | done: | ||
741 | efx->net_dev->last_rx = jiffies; | ||
742 | } | ||
743 | |||
744 | void efx_rx_strategy(struct efx_channel *channel) | ||
745 | { | ||
746 | enum efx_rx_alloc_method method = rx_alloc_method; | ||
747 | |||
748 | /* Only makes sense to use page based allocation if LRO is enabled */ | ||
749 | if (!(channel->efx->net_dev->features & NETIF_F_LRO)) { | ||
750 | method = RX_ALLOC_METHOD_SKB; | ||
751 | } else if (method == RX_ALLOC_METHOD_AUTO) { | ||
752 | /* Constrain the rx_alloc_level */ | ||
753 | if (channel->rx_alloc_level < 0) | ||
754 | channel->rx_alloc_level = 0; | ||
755 | else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX) | ||
756 | channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX; | ||
757 | |||
758 | /* Decide on the allocation method */ | ||
759 | method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_LRO) ? | ||
760 | RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB); | ||
761 | } | ||
762 | |||
763 | /* Push the option */ | ||
764 | channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE); | ||
765 | } | ||
766 | |||
767 | int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) | ||
768 | { | ||
769 | struct efx_nic *efx = rx_queue->efx; | ||
770 | unsigned int rxq_size; | ||
771 | int rc; | ||
772 | |||
773 | EFX_LOG(efx, "creating RX queue %d\n", rx_queue->queue); | ||
774 | |||
775 | /* Allocate RX buffers */ | ||
776 | rxq_size = (efx->type->rxd_ring_mask + 1) * sizeof(*rx_queue->buffer); | ||
777 | rx_queue->buffer = kzalloc(rxq_size, GFP_KERNEL); | ||
778 | if (!rx_queue->buffer) { | ||
779 | rc = -ENOMEM; | ||
780 | goto fail1; | ||
781 | } | ||
782 | |||
783 | rc = falcon_probe_rx(rx_queue); | ||
784 | if (rc) | ||
785 | goto fail2; | ||
786 | |||
787 | return 0; | ||
788 | |||
789 | fail2: | ||
790 | kfree(rx_queue->buffer); | ||
791 | rx_queue->buffer = NULL; | ||
792 | fail1: | ||
793 | rx_queue->used = 0; | ||
794 | |||
795 | return rc; | ||
796 | } | ||
797 | |||
798 | int efx_init_rx_queue(struct efx_rx_queue *rx_queue) | ||
799 | { | ||
800 | struct efx_nic *efx = rx_queue->efx; | ||
801 | unsigned int max_fill, trigger, limit; | ||
802 | |||
803 | EFX_LOG(rx_queue->efx, "initialising RX queue %d\n", rx_queue->queue); | ||
804 | |||
805 | /* Initialise ptr fields */ | ||
806 | rx_queue->added_count = 0; | ||
807 | rx_queue->notified_count = 0; | ||
808 | rx_queue->removed_count = 0; | ||
809 | rx_queue->min_fill = -1U; | ||
810 | rx_queue->min_overfill = -1U; | ||
811 | |||
812 | /* Initialise limit fields */ | ||
813 | max_fill = efx->type->rxd_ring_mask + 1 - EFX_RXD_HEAD_ROOM; | ||
814 | trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; | ||
815 | limit = max_fill * min(rx_refill_limit, 100U) / 100U; | ||
816 | |||
817 | rx_queue->max_fill = max_fill; | ||
818 | rx_queue->fast_fill_trigger = trigger; | ||
819 | rx_queue->fast_fill_limit = limit; | ||
820 | |||
821 | /* Set up RX descriptor ring */ | ||
822 | return falcon_init_rx(rx_queue); | ||
823 | } | ||
824 | |||
825 | void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) | ||
826 | { | ||
827 | int i; | ||
828 | struct efx_rx_buffer *rx_buf; | ||
829 | |||
830 | EFX_LOG(rx_queue->efx, "shutting down RX queue %d\n", rx_queue->queue); | ||
831 | |||
832 | falcon_fini_rx(rx_queue); | ||
833 | |||
834 | /* Release RX buffers NB start at index 0 not current HW ptr */ | ||
835 | if (rx_queue->buffer) { | ||
836 | for (i = 0; i <= rx_queue->efx->type->rxd_ring_mask; i++) { | ||
837 | rx_buf = efx_rx_buffer(rx_queue, i); | ||
838 | efx_fini_rx_buffer(rx_queue, rx_buf); | ||
839 | } | ||
840 | } | ||
841 | |||
842 | /* For a page that is part-way through splitting into RX buffers */ | ||
843 | if (rx_queue->buf_page != NULL) { | ||
844 | pci_unmap_page(rx_queue->efx->pci_dev, rx_queue->buf_dma_addr, | ||
845 | RX_PAGE_SIZE(rx_queue->efx), PCI_DMA_FROMDEVICE); | ||
846 | __free_pages(rx_queue->buf_page, | ||
847 | rx_queue->efx->rx_buffer_order); | ||
848 | rx_queue->buf_page = NULL; | ||
849 | } | ||
850 | } | ||
851 | |||
852 | void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) | ||
853 | { | ||
854 | EFX_LOG(rx_queue->efx, "destroying RX queue %d\n", rx_queue->queue); | ||
855 | |||
856 | falcon_remove_rx(rx_queue); | ||
857 | |||
858 | kfree(rx_queue->buffer); | ||
859 | rx_queue->buffer = NULL; | ||
860 | rx_queue->used = 0; | ||
861 | } | ||
862 | |||
863 | void efx_flush_lro(struct efx_channel *channel) | ||
864 | { | ||
865 | lro_flush_all(&channel->lro_mgr); | ||
866 | } | ||
867 | |||
868 | |||
869 | module_param(rx_alloc_method, int, 0644); | ||
870 | MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers"); | ||
871 | |||
872 | module_param(rx_refill_threshold, uint, 0444); | ||
873 | MODULE_PARM_DESC(rx_refill_threshold, | ||
874 | "RX descriptor ring fast/slow fill threshold (%)"); | ||
875 | |||