diff options
author | Ben Hutchings <bhutchings@solarflare.com> | 2008-04-27 07:55:59 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@redhat.com> | 2008-04-29 01:42:43 -0400 |
commit | 8ceee660aacb29721e26f08e336c58dc4847d1bd (patch) | |
tree | 158122642e6f21fe85d072c50d6185a0d0cf6834 /drivers/net/sfc/tx.c | |
parent | 358c12953b88c5a06a57c33eb27c753b2e7934d1 (diff) |
New driver "sfc" for Solarstorm SFC4000 controller.
The driver supports the 10Xpress PHY and XFP modules on our reference
designs SFE4001 and SFE4002 and the SMC models SMC10GPCIe-XFP and
SMC10GPCIe-10BT.
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Diffstat (limited to 'drivers/net/sfc/tx.c')
-rw-r--r-- | drivers/net/sfc/tx.c | 452 |
1 files changed, 452 insertions, 0 deletions
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c new file mode 100644 index 000000000000..fbb866b2185e --- /dev/null +++ b/drivers/net/sfc/tx.c | |||
@@ -0,0 +1,452 @@ | |||
1 | /**************************************************************************** | ||
2 | * Driver for Solarflare Solarstorm network controllers and boards | ||
3 | * Copyright 2005-2006 Fen Systems Ltd. | ||
4 | * Copyright 2005-2008 Solarflare Communications Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation, incorporated herein by reference. | ||
9 | */ | ||
10 | |||
11 | #include <linux/pci.h> | ||
12 | #include <linux/tcp.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/in.h> | ||
15 | #include <linux/if_ether.h> | ||
16 | #include <linux/highmem.h> | ||
17 | #include "net_driver.h" | ||
18 | #include "tx.h" | ||
19 | #include "efx.h" | ||
20 | #include "falcon.h" | ||
21 | #include "workarounds.h" | ||
22 | |||
23 | /* | ||
24 | * TX descriptor ring full threshold | ||
25 | * | ||
26 | * The tx_queue descriptor ring fill-level must fall below this value | ||
27 | * before we restart the netif queue | ||
28 | */ | ||
29 | #define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \ | ||
30 | (_tx_queue->efx->type->txd_ring_mask / 2u) | ||
31 | |||
32 | /* We want to be able to nest calls to netif_stop_queue(), since each | ||
33 | * channel can have an individual stop on the queue. | ||
34 | */ | ||
35 | void efx_stop_queue(struct efx_nic *efx) | ||
36 | { | ||
37 | spin_lock_bh(&efx->netif_stop_lock); | ||
38 | EFX_TRACE(efx, "stop TX queue\n"); | ||
39 | |||
40 | atomic_inc(&efx->netif_stop_count); | ||
41 | netif_stop_queue(efx->net_dev); | ||
42 | |||
43 | spin_unlock_bh(&efx->netif_stop_lock); | ||
44 | } | ||
45 | |||
46 | /* Wake netif's TX queue | ||
47 | * We want to be able to nest calls to netif_stop_queue(), since each | ||
48 | * channel can have an individual stop on the queue. | ||
49 | */ | ||
50 | inline void efx_wake_queue(struct efx_nic *efx) | ||
51 | { | ||
52 | local_bh_disable(); | ||
53 | if (atomic_dec_and_lock(&efx->netif_stop_count, | ||
54 | &efx->netif_stop_lock)) { | ||
55 | EFX_TRACE(efx, "waking TX queue\n"); | ||
56 | netif_wake_queue(efx->net_dev); | ||
57 | spin_unlock(&efx->netif_stop_lock); | ||
58 | } | ||
59 | local_bh_enable(); | ||
60 | } | ||
61 | |||
62 | static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, | ||
63 | struct efx_tx_buffer *buffer) | ||
64 | { | ||
65 | if (buffer->unmap_len) { | ||
66 | struct pci_dev *pci_dev = tx_queue->efx->pci_dev; | ||
67 | if (buffer->unmap_single) | ||
68 | pci_unmap_single(pci_dev, buffer->unmap_addr, | ||
69 | buffer->unmap_len, PCI_DMA_TODEVICE); | ||
70 | else | ||
71 | pci_unmap_page(pci_dev, buffer->unmap_addr, | ||
72 | buffer->unmap_len, PCI_DMA_TODEVICE); | ||
73 | buffer->unmap_len = 0; | ||
74 | buffer->unmap_single = 0; | ||
75 | } | ||
76 | |||
77 | if (buffer->skb) { | ||
78 | dev_kfree_skb_any((struct sk_buff *) buffer->skb); | ||
79 | buffer->skb = NULL; | ||
80 | EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x " | ||
81 | "complete\n", tx_queue->queue, read_ptr); | ||
82 | } | ||
83 | } | ||
84 | |||
85 | |||
86 | /* | ||
87 | * Add a socket buffer to a TX queue | ||
88 | * | ||
89 | * This maps all fragments of a socket buffer for DMA and adds them to | ||
90 | * the TX queue. The queue's insert pointer will be incremented by | ||
91 | * the number of fragments in the socket buffer. | ||
92 | * | ||
93 | * If any DMA mapping fails, any mapped fragments will be unmapped, | ||
94 | * the queue's insert pointer will be restored to its original value. | ||
95 | * | ||
96 | * Returns NETDEV_TX_OK or NETDEV_TX_BUSY | ||
97 | * You must hold netif_tx_lock() to call this function. | ||
98 | */ | ||
99 | static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue, | ||
100 | const struct sk_buff *skb) | ||
101 | { | ||
102 | struct efx_nic *efx = tx_queue->efx; | ||
103 | struct pci_dev *pci_dev = efx->pci_dev; | ||
104 | struct efx_tx_buffer *buffer; | ||
105 | skb_frag_t *fragment; | ||
106 | struct page *page; | ||
107 | int page_offset; | ||
108 | unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign; | ||
109 | dma_addr_t dma_addr, unmap_addr = 0; | ||
110 | unsigned int dma_len; | ||
111 | unsigned unmap_single; | ||
112 | int q_space, i = 0; | ||
113 | int rc = NETDEV_TX_OK; | ||
114 | |||
115 | EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); | ||
116 | |||
117 | /* Get size of the initial fragment */ | ||
118 | len = skb_headlen(skb); | ||
119 | |||
120 | fill_level = tx_queue->insert_count - tx_queue->old_read_count; | ||
121 | q_space = efx->type->txd_ring_mask - 1 - fill_level; | ||
122 | |||
123 | /* Map for DMA. Use pci_map_single rather than pci_map_page | ||
124 | * since this is more efficient on machines with sparse | ||
125 | * memory. | ||
126 | */ | ||
127 | unmap_single = 1; | ||
128 | dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE); | ||
129 | |||
130 | /* Process all fragments */ | ||
131 | while (1) { | ||
132 | if (unlikely(pci_dma_mapping_error(dma_addr))) | ||
133 | goto pci_err; | ||
134 | |||
135 | /* Store fields for marking in the per-fragment final | ||
136 | * descriptor */ | ||
137 | unmap_len = len; | ||
138 | unmap_addr = dma_addr; | ||
139 | |||
140 | /* Add to TX queue, splitting across DMA boundaries */ | ||
141 | do { | ||
142 | if (unlikely(q_space-- <= 0)) { | ||
143 | /* It might be that completions have | ||
144 | * happened since the xmit path last | ||
145 | * checked. Update the xmit path's | ||
146 | * copy of read_count. | ||
147 | */ | ||
148 | ++tx_queue->stopped; | ||
149 | /* This memory barrier protects the | ||
150 | * change of stopped from the access | ||
151 | * of read_count. */ | ||
152 | smp_mb(); | ||
153 | tx_queue->old_read_count = | ||
154 | *(volatile unsigned *) | ||
155 | &tx_queue->read_count; | ||
156 | fill_level = (tx_queue->insert_count | ||
157 | - tx_queue->old_read_count); | ||
158 | q_space = (efx->type->txd_ring_mask - 1 - | ||
159 | fill_level); | ||
160 | if (unlikely(q_space-- <= 0)) | ||
161 | goto stop; | ||
162 | smp_mb(); | ||
163 | --tx_queue->stopped; | ||
164 | } | ||
165 | |||
166 | insert_ptr = (tx_queue->insert_count & | ||
167 | efx->type->txd_ring_mask); | ||
168 | buffer = &tx_queue->buffer[insert_ptr]; | ||
169 | EFX_BUG_ON_PARANOID(buffer->skb); | ||
170 | EFX_BUG_ON_PARANOID(buffer->len); | ||
171 | EFX_BUG_ON_PARANOID(buffer->continuation != 1); | ||
172 | EFX_BUG_ON_PARANOID(buffer->unmap_len); | ||
173 | |||
174 | dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1); | ||
175 | if (likely(dma_len > len)) | ||
176 | dma_len = len; | ||
177 | |||
178 | misalign = (unsigned)dma_addr & efx->type->bug5391_mask; | ||
179 | if (misalign && dma_len + misalign > 512) | ||
180 | dma_len = 512 - misalign; | ||
181 | |||
182 | /* Fill out per descriptor fields */ | ||
183 | buffer->len = dma_len; | ||
184 | buffer->dma_addr = dma_addr; | ||
185 | len -= dma_len; | ||
186 | dma_addr += dma_len; | ||
187 | ++tx_queue->insert_count; | ||
188 | } while (len); | ||
189 | |||
190 | /* Transfer ownership of the unmapping to the final buffer */ | ||
191 | buffer->unmap_addr = unmap_addr; | ||
192 | buffer->unmap_single = unmap_single; | ||
193 | buffer->unmap_len = unmap_len; | ||
194 | unmap_len = 0; | ||
195 | |||
196 | /* Get address and size of next fragment */ | ||
197 | if (i >= skb_shinfo(skb)->nr_frags) | ||
198 | break; | ||
199 | fragment = &skb_shinfo(skb)->frags[i]; | ||
200 | len = fragment->size; | ||
201 | page = fragment->page; | ||
202 | page_offset = fragment->page_offset; | ||
203 | i++; | ||
204 | /* Map for DMA */ | ||
205 | unmap_single = 0; | ||
206 | dma_addr = pci_map_page(pci_dev, page, page_offset, len, | ||
207 | PCI_DMA_TODEVICE); | ||
208 | } | ||
209 | |||
210 | /* Transfer ownership of the skb to the final buffer */ | ||
211 | buffer->skb = skb; | ||
212 | buffer->continuation = 0; | ||
213 | |||
214 | /* Pass off to hardware */ | ||
215 | falcon_push_buffers(tx_queue); | ||
216 | |||
217 | return NETDEV_TX_OK; | ||
218 | |||
219 | pci_err: | ||
220 | EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d " | ||
221 | "fragments for DMA\n", tx_queue->queue, skb->len, | ||
222 | skb_shinfo(skb)->nr_frags + 1); | ||
223 | |||
224 | /* Mark the packet as transmitted, and free the SKB ourselves */ | ||
225 | dev_kfree_skb_any((struct sk_buff *)skb); | ||
226 | goto unwind; | ||
227 | |||
228 | stop: | ||
229 | rc = NETDEV_TX_BUSY; | ||
230 | |||
231 | if (tx_queue->stopped == 1) | ||
232 | efx_stop_queue(efx); | ||
233 | |||
234 | unwind: | ||
235 | /* Work backwards until we hit the original insert pointer value */ | ||
236 | while (tx_queue->insert_count != tx_queue->write_count) { | ||
237 | --tx_queue->insert_count; | ||
238 | insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask; | ||
239 | buffer = &tx_queue->buffer[insert_ptr]; | ||
240 | efx_dequeue_buffer(tx_queue, buffer); | ||
241 | buffer->len = 0; | ||
242 | } | ||
243 | |||
244 | /* Free the fragment we were mid-way through pushing */ | ||
245 | if (unmap_len) | ||
246 | pci_unmap_page(pci_dev, unmap_addr, unmap_len, | ||
247 | PCI_DMA_TODEVICE); | ||
248 | |||
249 | return rc; | ||
250 | } | ||
251 | |||
252 | /* Remove packets from the TX queue | ||
253 | * | ||
254 | * This removes packets from the TX queue, up to and including the | ||
255 | * specified index. | ||
256 | */ | ||
257 | static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, | ||
258 | unsigned int index) | ||
259 | { | ||
260 | struct efx_nic *efx = tx_queue->efx; | ||
261 | unsigned int stop_index, read_ptr; | ||
262 | unsigned int mask = tx_queue->efx->type->txd_ring_mask; | ||
263 | |||
264 | stop_index = (index + 1) & mask; | ||
265 | read_ptr = tx_queue->read_count & mask; | ||
266 | |||
267 | while (read_ptr != stop_index) { | ||
268 | struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; | ||
269 | if (unlikely(buffer->len == 0)) { | ||
270 | EFX_ERR(tx_queue->efx, "TX queue %d spurious TX " | ||
271 | "completion id %x\n", tx_queue->queue, | ||
272 | read_ptr); | ||
273 | efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | efx_dequeue_buffer(tx_queue, buffer); | ||
278 | buffer->continuation = 1; | ||
279 | buffer->len = 0; | ||
280 | |||
281 | ++tx_queue->read_count; | ||
282 | read_ptr = tx_queue->read_count & mask; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | /* Initiate a packet transmission on the specified TX queue. | ||
287 | * Note that returning anything other than NETDEV_TX_OK will cause the | ||
288 | * OS to free the skb. | ||
289 | * | ||
290 | * This function is split out from efx_hard_start_xmit to allow the | ||
291 | * loopback test to direct packets via specific TX queues. It is | ||
292 | * therefore a non-static inline, so as not to penalise performance | ||
293 | * for non-loopback transmissions. | ||
294 | * | ||
295 | * Context: netif_tx_lock held | ||
296 | */ | ||
297 | inline int efx_xmit(struct efx_nic *efx, | ||
298 | struct efx_tx_queue *tx_queue, struct sk_buff *skb) | ||
299 | { | ||
300 | int rc; | ||
301 | |||
302 | /* Map fragments for DMA and add to TX queue */ | ||
303 | rc = efx_enqueue_skb(tx_queue, skb); | ||
304 | if (unlikely(rc != NETDEV_TX_OK)) | ||
305 | goto out; | ||
306 | |||
307 | /* Update last TX timer */ | ||
308 | efx->net_dev->trans_start = jiffies; | ||
309 | |||
310 | out: | ||
311 | return rc; | ||
312 | } | ||
313 | |||
314 | /* Initiate a packet transmission. We use one channel per CPU | ||
315 | * (sharing when we have more CPUs than channels). On Falcon, the TX | ||
316 | * completion events will be directed back to the CPU that transmitted | ||
317 | * the packet, which should be cache-efficient. | ||
318 | * | ||
319 | * Context: non-blocking. | ||
320 | * Note that returning anything other than NETDEV_TX_OK will cause the | ||
321 | * OS to free the skb. | ||
322 | */ | ||
323 | int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) | ||
324 | { | ||
325 | struct efx_nic *efx = net_dev->priv; | ||
326 | return efx_xmit(efx, &efx->tx_queue[0], skb); | ||
327 | } | ||
328 | |||
329 | void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) | ||
330 | { | ||
331 | unsigned fill_level; | ||
332 | struct efx_nic *efx = tx_queue->efx; | ||
333 | |||
334 | EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask); | ||
335 | |||
336 | efx_dequeue_buffers(tx_queue, index); | ||
337 | |||
338 | /* See if we need to restart the netif queue. This barrier | ||
339 | * separates the update of read_count from the test of | ||
340 | * stopped. */ | ||
341 | smp_mb(); | ||
342 | if (unlikely(tx_queue->stopped)) { | ||
343 | fill_level = tx_queue->insert_count - tx_queue->read_count; | ||
344 | if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) { | ||
345 | EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx)); | ||
346 | |||
347 | /* Do this under netif_tx_lock(), to avoid racing | ||
348 | * with efx_xmit(). */ | ||
349 | netif_tx_lock(efx->net_dev); | ||
350 | if (tx_queue->stopped) { | ||
351 | tx_queue->stopped = 0; | ||
352 | efx_wake_queue(efx); | ||
353 | } | ||
354 | netif_tx_unlock(efx->net_dev); | ||
355 | } | ||
356 | } | ||
357 | } | ||
358 | |||
359 | int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) | ||
360 | { | ||
361 | struct efx_nic *efx = tx_queue->efx; | ||
362 | unsigned int txq_size; | ||
363 | int i, rc; | ||
364 | |||
365 | EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue); | ||
366 | |||
367 | /* Allocate software ring */ | ||
368 | txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer); | ||
369 | tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL); | ||
370 | if (!tx_queue->buffer) { | ||
371 | rc = -ENOMEM; | ||
372 | goto fail1; | ||
373 | } | ||
374 | for (i = 0; i <= efx->type->txd_ring_mask; ++i) | ||
375 | tx_queue->buffer[i].continuation = 1; | ||
376 | |||
377 | /* Allocate hardware ring */ | ||
378 | rc = falcon_probe_tx(tx_queue); | ||
379 | if (rc) | ||
380 | goto fail2; | ||
381 | |||
382 | return 0; | ||
383 | |||
384 | fail2: | ||
385 | kfree(tx_queue->buffer); | ||
386 | tx_queue->buffer = NULL; | ||
387 | fail1: | ||
388 | tx_queue->used = 0; | ||
389 | |||
390 | return rc; | ||
391 | } | ||
392 | |||
393 | int efx_init_tx_queue(struct efx_tx_queue *tx_queue) | ||
394 | { | ||
395 | EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue); | ||
396 | |||
397 | tx_queue->insert_count = 0; | ||
398 | tx_queue->write_count = 0; | ||
399 | tx_queue->read_count = 0; | ||
400 | tx_queue->old_read_count = 0; | ||
401 | BUG_ON(tx_queue->stopped); | ||
402 | |||
403 | /* Set up TX descriptor ring */ | ||
404 | return falcon_init_tx(tx_queue); | ||
405 | } | ||
406 | |||
407 | void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) | ||
408 | { | ||
409 | struct efx_tx_buffer *buffer; | ||
410 | |||
411 | if (!tx_queue->buffer) | ||
412 | return; | ||
413 | |||
414 | /* Free any buffers left in the ring */ | ||
415 | while (tx_queue->read_count != tx_queue->write_count) { | ||
416 | buffer = &tx_queue->buffer[tx_queue->read_count & | ||
417 | tx_queue->efx->type->txd_ring_mask]; | ||
418 | efx_dequeue_buffer(tx_queue, buffer); | ||
419 | buffer->continuation = 1; | ||
420 | buffer->len = 0; | ||
421 | |||
422 | ++tx_queue->read_count; | ||
423 | } | ||
424 | } | ||
425 | |||
426 | void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) | ||
427 | { | ||
428 | EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue); | ||
429 | |||
430 | /* Flush TX queue, remove descriptor ring */ | ||
431 | falcon_fini_tx(tx_queue); | ||
432 | |||
433 | efx_release_tx_buffers(tx_queue); | ||
434 | |||
435 | /* Release queue's stop on port, if any */ | ||
436 | if (tx_queue->stopped) { | ||
437 | tx_queue->stopped = 0; | ||
438 | efx_wake_queue(tx_queue->efx); | ||
439 | } | ||
440 | } | ||
441 | |||
442 | void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) | ||
443 | { | ||
444 | EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue); | ||
445 | falcon_remove_tx(tx_queue); | ||
446 | |||
447 | kfree(tx_queue->buffer); | ||
448 | tx_queue->buffer = NULL; | ||
449 | tx_queue->used = 0; | ||
450 | } | ||
451 | |||
452 | |||