diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-17 15:34:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-17 15:34:54 -0400 |
commit | b5b131c7473e17275debcdf1c226f452dc3876ed (patch) | |
tree | a272e947c38213d4ee989bb3f863a8091d50426b /drivers/dma/ioat | |
parent | c7eec380e85a427983782df744f0fb745d867170 (diff) | |
parent | 896e041e8e8efb34520d033a693ef25391f9c9f0 (diff) |
Merge tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma
Pull dmaengine updates from Vinod Koul:
"This is smallish update with minor changes to core and new driver and
usual updates. Nothing super exciting here..
- We have made slave address as physical to enable driver to do the
mapping.
- We now expose the maxburst for slave dma as new capability so
clients can know this and program accordingly
- addition of device synchronize callbacks on omap and edma.
- pl330 updates to support DMAFLUSHP for Rockchip platforms.
- Updates and improved sg handling in Xilinx VDMA driver.
- New hidma qualcomm dma driver, though some bits are still in
progress"
* tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (40 commits)
dmaengine: IOATDMA: revise channel reset workaround on CB3.3 platforms
dmaengine: add Qualcomm Technologies HIDMA channel driver
dmaengine: add Qualcomm Technologies HIDMA management driver
dmaengine: hidma: Add Device Tree binding
dmaengine: qcom_bam_dma: move to qcom directory
dmaengine: tegra: Move of_device_id table near to its user
dmaengine: xilinx_vdma: Remove unnecessary variable initializations
dmaengine: sirf: use __maybe_unused to hide pm functions
dmaengine: rcar-dmac: clear pertinence number of channels
dmaengine: sh: shdmac: don't open code of_device_get_match_data()
dmaengine: tegra: don't open code of_device_get_match_data()
dmaengine: qcom_bam_dma: Make driver work for BE
dmaengine: sun4i: support module autoloading
dma/mic_x100_dma: IS_ERR() vs PTR_ERR() typo
dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's
dmaengine: xilinx_vdma: Simplify spin lock handling
dmaengine: xilinx_vdma: Fix issues with non-parking mode
dmaengine: xilinx_vdma: Improve SG engine handling
dmaengine: pl330: fix to support the burst mode
dmaengine: make slave address physical
...
Diffstat (limited to 'drivers/dma/ioat')
-rw-r--r-- | drivers/dma/ioat/dma.c | 268 | ||||
-rw-r--r-- | drivers/dma/ioat/dma.h | 23 | ||||
-rw-r--r-- | drivers/dma/ioat/hw.h | 2 | ||||
-rw-r--r-- | drivers/dma/ioat/init.c | 49 | ||||
-rw-r--r-- | drivers/dma/ioat/prep.c | 2 |
5 files changed, 109 insertions, 235 deletions
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 21539d5c54c3..bd09961443b1 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/dma-mapping.h> | 31 | #include <linux/dma-mapping.h> |
32 | #include <linux/workqueue.h> | 32 | #include <linux/workqueue.h> |
33 | #include <linux/prefetch.h> | 33 | #include <linux/prefetch.h> |
34 | #include <linux/sizes.h> | ||
34 | #include "dma.h" | 35 | #include "dma.h" |
35 | #include "registers.h" | 36 | #include "registers.h" |
36 | #include "hw.h" | 37 | #include "hw.h" |
@@ -290,24 +291,30 @@ static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx) | |||
290 | } | 291 | } |
291 | 292 | ||
292 | static struct ioat_ring_ent * | 293 | static struct ioat_ring_ent * |
293 | ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) | 294 | ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags) |
294 | { | 295 | { |
295 | struct ioat_dma_descriptor *hw; | 296 | struct ioat_dma_descriptor *hw; |
296 | struct ioat_ring_ent *desc; | 297 | struct ioat_ring_ent *desc; |
297 | struct ioatdma_device *ioat_dma; | 298 | struct ioatdma_device *ioat_dma; |
299 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); | ||
300 | int chunk; | ||
298 | dma_addr_t phys; | 301 | dma_addr_t phys; |
302 | u8 *pos; | ||
303 | off_t offs; | ||
299 | 304 | ||
300 | ioat_dma = to_ioatdma_device(chan->device); | 305 | ioat_dma = to_ioatdma_device(chan->device); |
301 | hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys); | 306 | |
302 | if (!hw) | 307 | chunk = idx / IOAT_DESCS_PER_2M; |
303 | return NULL; | 308 | idx &= (IOAT_DESCS_PER_2M - 1); |
309 | offs = idx * IOAT_DESC_SZ; | ||
310 | pos = (u8 *)ioat_chan->descs[chunk].virt + offs; | ||
311 | phys = ioat_chan->descs[chunk].hw + offs; | ||
312 | hw = (struct ioat_dma_descriptor *)pos; | ||
304 | memset(hw, 0, sizeof(*hw)); | 313 | memset(hw, 0, sizeof(*hw)); |
305 | 314 | ||
306 | desc = kmem_cache_zalloc(ioat_cache, flags); | 315 | desc = kmem_cache_zalloc(ioat_cache, flags); |
307 | if (!desc) { | 316 | if (!desc) |
308 | pci_pool_free(ioat_dma->dma_pool, hw, phys); | ||
309 | return NULL; | 317 | return NULL; |
310 | } | ||
311 | 318 | ||
312 | dma_async_tx_descriptor_init(&desc->txd, chan); | 319 | dma_async_tx_descriptor_init(&desc->txd, chan); |
313 | desc->txd.tx_submit = ioat_tx_submit_unlock; | 320 | desc->txd.tx_submit = ioat_tx_submit_unlock; |
@@ -318,32 +325,63 @@ ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) | |||
318 | 325 | ||
319 | void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) | 326 | void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) |
320 | { | 327 | { |
321 | struct ioatdma_device *ioat_dma; | ||
322 | |||
323 | ioat_dma = to_ioatdma_device(chan->device); | ||
324 | pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys); | ||
325 | kmem_cache_free(ioat_cache, desc); | 328 | kmem_cache_free(ioat_cache, desc); |
326 | } | 329 | } |
327 | 330 | ||
328 | struct ioat_ring_ent ** | 331 | struct ioat_ring_ent ** |
329 | ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) | 332 | ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) |
330 | { | 333 | { |
334 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | ||
331 | struct ioat_ring_ent **ring; | 335 | struct ioat_ring_ent **ring; |
332 | int descs = 1 << order; | 336 | int total_descs = 1 << order; |
333 | int i; | 337 | int i, chunks; |
334 | |||
335 | if (order > ioat_get_max_alloc_order()) | ||
336 | return NULL; | ||
337 | 338 | ||
338 | /* allocate the array to hold the software ring */ | 339 | /* allocate the array to hold the software ring */ |
339 | ring = kcalloc(descs, sizeof(*ring), flags); | 340 | ring = kcalloc(total_descs, sizeof(*ring), flags); |
340 | if (!ring) | 341 | if (!ring) |
341 | return NULL; | 342 | return NULL; |
342 | for (i = 0; i < descs; i++) { | 343 | |
343 | ring[i] = ioat_alloc_ring_ent(c, flags); | 344 | ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M; |
345 | |||
346 | for (i = 0; i < chunks; i++) { | ||
347 | struct ioat_descs *descs = &ioat_chan->descs[i]; | ||
348 | |||
349 | descs->virt = dma_alloc_coherent(to_dev(ioat_chan), | ||
350 | SZ_2M, &descs->hw, flags); | ||
351 | if (!descs->virt && (i > 0)) { | ||
352 | int idx; | ||
353 | |||
354 | for (idx = 0; idx < i; idx++) { | ||
355 | dma_free_coherent(to_dev(ioat_chan), SZ_2M, | ||
356 | descs->virt, descs->hw); | ||
357 | descs->virt = NULL; | ||
358 | descs->hw = 0; | ||
359 | } | ||
360 | |||
361 | ioat_chan->desc_chunks = 0; | ||
362 | kfree(ring); | ||
363 | return NULL; | ||
364 | } | ||
365 | } | ||
366 | |||
367 | for (i = 0; i < total_descs; i++) { | ||
368 | ring[i] = ioat_alloc_ring_ent(c, i, flags); | ||
344 | if (!ring[i]) { | 369 | if (!ring[i]) { |
370 | int idx; | ||
371 | |||
345 | while (i--) | 372 | while (i--) |
346 | ioat_free_ring_ent(ring[i], c); | 373 | ioat_free_ring_ent(ring[i], c); |
374 | |||
375 | for (idx = 0; idx < ioat_chan->desc_chunks; idx++) { | ||
376 | dma_free_coherent(to_dev(ioat_chan), | ||
377 | SZ_2M, | ||
378 | ioat_chan->descs[idx].virt, | ||
379 | ioat_chan->descs[idx].hw); | ||
380 | ioat_chan->descs[idx].virt = NULL; | ||
381 | ioat_chan->descs[idx].hw = 0; | ||
382 | } | ||
383 | |||
384 | ioat_chan->desc_chunks = 0; | ||
347 | kfree(ring); | 385 | kfree(ring); |
348 | return NULL; | 386 | return NULL; |
349 | } | 387 | } |
@@ -351,7 +389,7 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) | |||
351 | } | 389 | } |
352 | 390 | ||
353 | /* link descs */ | 391 | /* link descs */ |
354 | for (i = 0; i < descs-1; i++) { | 392 | for (i = 0; i < total_descs-1; i++) { |
355 | struct ioat_ring_ent *next = ring[i+1]; | 393 | struct ioat_ring_ent *next = ring[i+1]; |
356 | struct ioat_dma_descriptor *hw = ring[i]->hw; | 394 | struct ioat_dma_descriptor *hw = ring[i]->hw; |
357 | 395 | ||
@@ -362,114 +400,6 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) | |||
362 | return ring; | 400 | return ring; |
363 | } | 401 | } |
364 | 402 | ||
365 | static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) | ||
366 | { | ||
367 | /* reshape differs from normal ring allocation in that we want | ||
368 | * to allocate a new software ring while only | ||
369 | * extending/truncating the hardware ring | ||
370 | */ | ||
371 | struct dma_chan *c = &ioat_chan->dma_chan; | ||
372 | const u32 curr_size = ioat_ring_size(ioat_chan); | ||
373 | const u16 active = ioat_ring_active(ioat_chan); | ||
374 | const u32 new_size = 1 << order; | ||
375 | struct ioat_ring_ent **ring; | ||
376 | u32 i; | ||
377 | |||
378 | if (order > ioat_get_max_alloc_order()) | ||
379 | return false; | ||
380 | |||
381 | /* double check that we have at least 1 free descriptor */ | ||
382 | if (active == curr_size) | ||
383 | return false; | ||
384 | |||
385 | /* when shrinking, verify that we can hold the current active | ||
386 | * set in the new ring | ||
387 | */ | ||
388 | if (active >= new_size) | ||
389 | return false; | ||
390 | |||
391 | /* allocate the array to hold the software ring */ | ||
392 | ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); | ||
393 | if (!ring) | ||
394 | return false; | ||
395 | |||
396 | /* allocate/trim descriptors as needed */ | ||
397 | if (new_size > curr_size) { | ||
398 | /* copy current descriptors to the new ring */ | ||
399 | for (i = 0; i < curr_size; i++) { | ||
400 | u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); | ||
401 | u16 new_idx = (ioat_chan->tail+i) & (new_size-1); | ||
402 | |||
403 | ring[new_idx] = ioat_chan->ring[curr_idx]; | ||
404 | set_desc_id(ring[new_idx], new_idx); | ||
405 | } | ||
406 | |||
407 | /* add new descriptors to the ring */ | ||
408 | for (i = curr_size; i < new_size; i++) { | ||
409 | u16 new_idx = (ioat_chan->tail+i) & (new_size-1); | ||
410 | |||
411 | ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT); | ||
412 | if (!ring[new_idx]) { | ||
413 | while (i--) { | ||
414 | u16 new_idx = (ioat_chan->tail+i) & | ||
415 | (new_size-1); | ||
416 | |||
417 | ioat_free_ring_ent(ring[new_idx], c); | ||
418 | } | ||
419 | kfree(ring); | ||
420 | return false; | ||
421 | } | ||
422 | set_desc_id(ring[new_idx], new_idx); | ||
423 | } | ||
424 | |||
425 | /* hw link new descriptors */ | ||
426 | for (i = curr_size-1; i < new_size; i++) { | ||
427 | u16 new_idx = (ioat_chan->tail+i) & (new_size-1); | ||
428 | struct ioat_ring_ent *next = | ||
429 | ring[(new_idx+1) & (new_size-1)]; | ||
430 | struct ioat_dma_descriptor *hw = ring[new_idx]->hw; | ||
431 | |||
432 | hw->next = next->txd.phys; | ||
433 | } | ||
434 | } else { | ||
435 | struct ioat_dma_descriptor *hw; | ||
436 | struct ioat_ring_ent *next; | ||
437 | |||
438 | /* copy current descriptors to the new ring, dropping the | ||
439 | * removed descriptors | ||
440 | */ | ||
441 | for (i = 0; i < new_size; i++) { | ||
442 | u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); | ||
443 | u16 new_idx = (ioat_chan->tail+i) & (new_size-1); | ||
444 | |||
445 | ring[new_idx] = ioat_chan->ring[curr_idx]; | ||
446 | set_desc_id(ring[new_idx], new_idx); | ||
447 | } | ||
448 | |||
449 | /* free deleted descriptors */ | ||
450 | for (i = new_size; i < curr_size; i++) { | ||
451 | struct ioat_ring_ent *ent; | ||
452 | |||
453 | ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i); | ||
454 | ioat_free_ring_ent(ent, c); | ||
455 | } | ||
456 | |||
457 | /* fix up hardware ring */ | ||
458 | hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw; | ||
459 | next = ring[(ioat_chan->tail+new_size) & (new_size-1)]; | ||
460 | hw->next = next->txd.phys; | ||
461 | } | ||
462 | |||
463 | dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n", | ||
464 | __func__, new_size); | ||
465 | |||
466 | kfree(ioat_chan->ring); | ||
467 | ioat_chan->ring = ring; | ||
468 | ioat_chan->alloc_order = order; | ||
469 | |||
470 | return true; | ||
471 | } | ||
472 | |||
473 | /** | 403 | /** |
474 | * ioat_check_space_lock - verify space and grab ring producer lock | 404 | * ioat_check_space_lock - verify space and grab ring producer lock |
475 | * @ioat: ioat,3 channel (ring) to operate on | 405 | * @ioat: ioat,3 channel (ring) to operate on |
@@ -478,9 +408,6 @@ static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) | |||
478 | int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) | 408 | int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) |
479 | __acquires(&ioat_chan->prep_lock) | 409 | __acquires(&ioat_chan->prep_lock) |
480 | { | 410 | { |
481 | bool retry; | ||
482 | |||
483 | retry: | ||
484 | spin_lock_bh(&ioat_chan->prep_lock); | 411 | spin_lock_bh(&ioat_chan->prep_lock); |
485 | /* never allow the last descriptor to be consumed, we need at | 412 | /* never allow the last descriptor to be consumed, we need at |
486 | * least one free at all times to allow for on-the-fly ring | 413 | * least one free at all times to allow for on-the-fly ring |
@@ -493,24 +420,8 @@ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) | |||
493 | ioat_chan->produce = num_descs; | 420 | ioat_chan->produce = num_descs; |
494 | return 0; /* with ioat->prep_lock held */ | 421 | return 0; /* with ioat->prep_lock held */ |
495 | } | 422 | } |
496 | retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); | ||
497 | spin_unlock_bh(&ioat_chan->prep_lock); | 423 | spin_unlock_bh(&ioat_chan->prep_lock); |
498 | 424 | ||
499 | /* is another cpu already trying to expand the ring? */ | ||
500 | if (retry) | ||
501 | goto retry; | ||
502 | |||
503 | spin_lock_bh(&ioat_chan->cleanup_lock); | ||
504 | spin_lock_bh(&ioat_chan->prep_lock); | ||
505 | retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1); | ||
506 | clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); | ||
507 | spin_unlock_bh(&ioat_chan->prep_lock); | ||
508 | spin_unlock_bh(&ioat_chan->cleanup_lock); | ||
509 | |||
510 | /* if we were able to expand the ring retry the allocation */ | ||
511 | if (retry) | ||
512 | goto retry; | ||
513 | |||
514 | dev_dbg_ratelimited(to_dev(ioat_chan), | 425 | dev_dbg_ratelimited(to_dev(ioat_chan), |
515 | "%s: ring full! num_descs: %d (%x:%x:%x)\n", | 426 | "%s: ring full! num_descs: %d (%x:%x:%x)\n", |
516 | __func__, num_descs, ioat_chan->head, | 427 | __func__, num_descs, ioat_chan->head, |
@@ -823,19 +734,6 @@ static void check_active(struct ioatdma_chan *ioat_chan) | |||
823 | 734 | ||
824 | if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) | 735 | if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) |
825 | mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); | 736 | mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); |
826 | else if (ioat_chan->alloc_order > ioat_get_alloc_order()) { | ||
827 | /* if the ring is idle, empty, and oversized try to step | ||
828 | * down the size | ||
829 | */ | ||
830 | reshape_ring(ioat_chan, ioat_chan->alloc_order - 1); | ||
831 | |||
832 | /* keep shrinking until we get back to our minimum | ||
833 | * default size | ||
834 | */ | ||
835 | if (ioat_chan->alloc_order > ioat_get_alloc_order()) | ||
836 | mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); | ||
837 | } | ||
838 | |||
839 | } | 737 | } |
840 | 738 | ||
841 | void ioat_timer_event(unsigned long data) | 739 | void ioat_timer_event(unsigned long data) |
@@ -916,40 +814,6 @@ ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, | |||
916 | return dma_cookie_status(c, cookie, txstate); | 814 | return dma_cookie_status(c, cookie, txstate); |
917 | } | 815 | } |
918 | 816 | ||
919 | static int ioat_irq_reinit(struct ioatdma_device *ioat_dma) | ||
920 | { | ||
921 | struct pci_dev *pdev = ioat_dma->pdev; | ||
922 | int irq = pdev->irq, i; | ||
923 | |||
924 | if (!is_bwd_ioat(pdev)) | ||
925 | return 0; | ||
926 | |||
927 | switch (ioat_dma->irq_mode) { | ||
928 | case IOAT_MSIX: | ||
929 | for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) { | ||
930 | struct msix_entry *msix = &ioat_dma->msix_entries[i]; | ||
931 | struct ioatdma_chan *ioat_chan; | ||
932 | |||
933 | ioat_chan = ioat_chan_by_index(ioat_dma, i); | ||
934 | devm_free_irq(&pdev->dev, msix->vector, ioat_chan); | ||
935 | } | ||
936 | |||
937 | pci_disable_msix(pdev); | ||
938 | break; | ||
939 | case IOAT_MSI: | ||
940 | pci_disable_msi(pdev); | ||
941 | /* fall through */ | ||
942 | case IOAT_INTX: | ||
943 | devm_free_irq(&pdev->dev, irq, ioat_dma); | ||
944 | break; | ||
945 | default: | ||
946 | return 0; | ||
947 | } | ||
948 | ioat_dma->irq_mode = IOAT_NOIRQ; | ||
949 | |||
950 | return ioat_dma_setup_interrupts(ioat_dma); | ||
951 | } | ||
952 | |||
953 | int ioat_reset_hw(struct ioatdma_chan *ioat_chan) | 817 | int ioat_reset_hw(struct ioatdma_chan *ioat_chan) |
954 | { | 818 | { |
955 | /* throw away whatever the channel was doing and get it | 819 | /* throw away whatever the channel was doing and get it |
@@ -989,9 +853,21 @@ int ioat_reset_hw(struct ioatdma_chan *ioat_chan) | |||
989 | } | 853 | } |
990 | } | 854 | } |
991 | 855 | ||
856 | if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { | ||
857 | ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000); | ||
858 | ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008); | ||
859 | ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800); | ||
860 | } | ||
861 | |||
862 | |||
992 | err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); | 863 | err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); |
993 | if (!err) | 864 | if (!err) { |
994 | err = ioat_irq_reinit(ioat_dma); | 865 | if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { |
866 | writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000); | ||
867 | writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008); | ||
868 | writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800); | ||
869 | } | ||
870 | } | ||
995 | 871 | ||
996 | if (err) | 872 | if (err) |
997 | dev_err(&pdev->dev, "Failed to reset: %d\n", err); | 873 | dev_err(&pdev->dev, "Failed to reset: %d\n", err); |
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index b8f48074789f..a9bc1a15b0d1 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h | |||
@@ -62,7 +62,6 @@ enum ioat_irq_mode { | |||
62 | * struct ioatdma_device - internal representation of a IOAT device | 62 | * struct ioatdma_device - internal representation of a IOAT device |
63 | * @pdev: PCI-Express device | 63 | * @pdev: PCI-Express device |
64 | * @reg_base: MMIO register space base address | 64 | * @reg_base: MMIO register space base address |
65 | * @dma_pool: for allocating DMA descriptors | ||
66 | * @completion_pool: DMA buffers for completion ops | 65 | * @completion_pool: DMA buffers for completion ops |
67 | * @sed_hw_pool: DMA super descriptor pools | 66 | * @sed_hw_pool: DMA super descriptor pools |
68 | * @dma_dev: embedded struct dma_device | 67 | * @dma_dev: embedded struct dma_device |
@@ -76,8 +75,7 @@ enum ioat_irq_mode { | |||
76 | struct ioatdma_device { | 75 | struct ioatdma_device { |
77 | struct pci_dev *pdev; | 76 | struct pci_dev *pdev; |
78 | void __iomem *reg_base; | 77 | void __iomem *reg_base; |
79 | struct pci_pool *dma_pool; | 78 | struct dma_pool *completion_pool; |
80 | struct pci_pool *completion_pool; | ||
81 | #define MAX_SED_POOLS 5 | 79 | #define MAX_SED_POOLS 5 |
82 | struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; | 80 | struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; |
83 | struct dma_device dma_dev; | 81 | struct dma_device dma_dev; |
@@ -88,6 +86,16 @@ struct ioatdma_device { | |||
88 | struct dca_provider *dca; | 86 | struct dca_provider *dca; |
89 | enum ioat_irq_mode irq_mode; | 87 | enum ioat_irq_mode irq_mode; |
90 | u32 cap; | 88 | u32 cap; |
89 | |||
90 | /* shadow version for CB3.3 chan reset errata workaround */ | ||
91 | u64 msixtba0; | ||
92 | u64 msixdata0; | ||
93 | u32 msixpba; | ||
94 | }; | ||
95 | |||
96 | struct ioat_descs { | ||
97 | void *virt; | ||
98 | dma_addr_t hw; | ||
91 | }; | 99 | }; |
92 | 100 | ||
93 | struct ioatdma_chan { | 101 | struct ioatdma_chan { |
@@ -100,7 +108,6 @@ struct ioatdma_chan { | |||
100 | #define IOAT_COMPLETION_ACK 1 | 108 | #define IOAT_COMPLETION_ACK 1 |
101 | #define IOAT_RESET_PENDING 2 | 109 | #define IOAT_RESET_PENDING 2 |
102 | #define IOAT_KOBJ_INIT_FAIL 3 | 110 | #define IOAT_KOBJ_INIT_FAIL 3 |
103 | #define IOAT_RESHAPE_PENDING 4 | ||
104 | #define IOAT_RUN 5 | 111 | #define IOAT_RUN 5 |
105 | #define IOAT_CHAN_ACTIVE 6 | 112 | #define IOAT_CHAN_ACTIVE 6 |
106 | struct timer_list timer; | 113 | struct timer_list timer; |
@@ -133,6 +140,8 @@ struct ioatdma_chan { | |||
133 | u16 produce; | 140 | u16 produce; |
134 | struct ioat_ring_ent **ring; | 141 | struct ioat_ring_ent **ring; |
135 | spinlock_t prep_lock; | 142 | spinlock_t prep_lock; |
143 | struct ioat_descs descs[2]; | ||
144 | int desc_chunks; | ||
136 | }; | 145 | }; |
137 | 146 | ||
138 | struct ioat_sysfs_entry { | 147 | struct ioat_sysfs_entry { |
@@ -302,10 +311,8 @@ static inline bool is_ioat_bug(unsigned long err) | |||
302 | } | 311 | } |
303 | 312 | ||
304 | #define IOAT_MAX_ORDER 16 | 313 | #define IOAT_MAX_ORDER 16 |
305 | #define ioat_get_alloc_order() \ | 314 | #define IOAT_MAX_DESCS 65536 |
306 | (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) | 315 | #define IOAT_DESCS_PER_2M 32768 |
307 | #define ioat_get_max_alloc_order() \ | ||
308 | (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) | ||
309 | 316 | ||
310 | static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) | 317 | static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) |
311 | { | 318 | { |
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 690e3b4f8202..8e67895bcca3 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h | |||
@@ -73,6 +73,8 @@ | |||
73 | 73 | ||
74 | int system_has_dca_enabled(struct pci_dev *pdev); | 74 | int system_has_dca_enabled(struct pci_dev *pdev); |
75 | 75 | ||
76 | #define IOAT_DESC_SZ 64 | ||
77 | |||
76 | struct ioat_dma_descriptor { | 78 | struct ioat_dma_descriptor { |
77 | uint32_t size; | 79 | uint32_t size; |
78 | union { | 80 | union { |
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 4ef0c5e07912..efdee1a69fc4 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/prefetch.h> | 28 | #include <linux/prefetch.h> |
29 | #include <linux/dca.h> | 29 | #include <linux/dca.h> |
30 | #include <linux/aer.h> | 30 | #include <linux/aer.h> |
31 | #include <linux/sizes.h> | ||
31 | #include "dma.h" | 32 | #include "dma.h" |
32 | #include "registers.h" | 33 | #include "registers.h" |
33 | #include "hw.h" | 34 | #include "hw.h" |
@@ -136,14 +137,6 @@ int ioat_pending_level = 4; | |||
136 | module_param(ioat_pending_level, int, 0644); | 137 | module_param(ioat_pending_level, int, 0644); |
137 | MODULE_PARM_DESC(ioat_pending_level, | 138 | MODULE_PARM_DESC(ioat_pending_level, |
138 | "high-water mark for pushing ioat descriptors (default: 4)"); | 139 | "high-water mark for pushing ioat descriptors (default: 4)"); |
139 | int ioat_ring_alloc_order = 8; | ||
140 | module_param(ioat_ring_alloc_order, int, 0644); | ||
141 | MODULE_PARM_DESC(ioat_ring_alloc_order, | ||
142 | "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)"); | ||
143 | int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; | ||
144 | module_param(ioat_ring_max_alloc_order, int, 0644); | ||
145 | MODULE_PARM_DESC(ioat_ring_max_alloc_order, | ||
146 | "ioat+: upper limit for ring size (default: 16)"); | ||
147 | static char ioat_interrupt_style[32] = "msix"; | 140 | static char ioat_interrupt_style[32] = "msix"; |
148 | module_param_string(ioat_interrupt_style, ioat_interrupt_style, | 141 | module_param_string(ioat_interrupt_style, ioat_interrupt_style, |
149 | sizeof(ioat_interrupt_style), 0644); | 142 | sizeof(ioat_interrupt_style), 0644); |
@@ -504,23 +497,14 @@ static int ioat_probe(struct ioatdma_device *ioat_dma) | |||
504 | struct pci_dev *pdev = ioat_dma->pdev; | 497 | struct pci_dev *pdev = ioat_dma->pdev; |
505 | struct device *dev = &pdev->dev; | 498 | struct device *dev = &pdev->dev; |
506 | 499 | ||
507 | /* DMA coherent memory pool for DMA descriptor allocations */ | 500 | ioat_dma->completion_pool = dma_pool_create("completion_pool", dev, |
508 | ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev, | ||
509 | sizeof(struct ioat_dma_descriptor), | ||
510 | 64, 0); | ||
511 | if (!ioat_dma->dma_pool) { | ||
512 | err = -ENOMEM; | ||
513 | goto err_dma_pool; | ||
514 | } | ||
515 | |||
516 | ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev, | ||
517 | sizeof(u64), | 501 | sizeof(u64), |
518 | SMP_CACHE_BYTES, | 502 | SMP_CACHE_BYTES, |
519 | SMP_CACHE_BYTES); | 503 | SMP_CACHE_BYTES); |
520 | 504 | ||
521 | if (!ioat_dma->completion_pool) { | 505 | if (!ioat_dma->completion_pool) { |
522 | err = -ENOMEM; | 506 | err = -ENOMEM; |
523 | goto err_completion_pool; | 507 | goto err_out; |
524 | } | 508 | } |
525 | 509 | ||
526 | ioat_enumerate_channels(ioat_dma); | 510 | ioat_enumerate_channels(ioat_dma); |
@@ -546,10 +530,8 @@ static int ioat_probe(struct ioatdma_device *ioat_dma) | |||
546 | err_self_test: | 530 | err_self_test: |
547 | ioat_disable_interrupts(ioat_dma); | 531 | ioat_disable_interrupts(ioat_dma); |
548 | err_setup_interrupts: | 532 | err_setup_interrupts: |
549 | pci_pool_destroy(ioat_dma->completion_pool); | 533 | dma_pool_destroy(ioat_dma->completion_pool); |
550 | err_completion_pool: | 534 | err_out: |
551 | pci_pool_destroy(ioat_dma->dma_pool); | ||
552 | err_dma_pool: | ||
553 | return err; | 535 | return err; |
554 | } | 536 | } |
555 | 537 | ||
@@ -559,8 +541,7 @@ static int ioat_register(struct ioatdma_device *ioat_dma) | |||
559 | 541 | ||
560 | if (err) { | 542 | if (err) { |
561 | ioat_disable_interrupts(ioat_dma); | 543 | ioat_disable_interrupts(ioat_dma); |
562 | pci_pool_destroy(ioat_dma->completion_pool); | 544 | dma_pool_destroy(ioat_dma->completion_pool); |
563 | pci_pool_destroy(ioat_dma->dma_pool); | ||
564 | } | 545 | } |
565 | 546 | ||
566 | return err; | 547 | return err; |
@@ -576,8 +557,7 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma) | |||
576 | 557 | ||
577 | dma_async_device_unregister(dma); | 558 | dma_async_device_unregister(dma); |
578 | 559 | ||
579 | pci_pool_destroy(ioat_dma->dma_pool); | 560 | dma_pool_destroy(ioat_dma->completion_pool); |
580 | pci_pool_destroy(ioat_dma->completion_pool); | ||
581 | 561 | ||
582 | INIT_LIST_HEAD(&dma->channels); | 562 | INIT_LIST_HEAD(&dma->channels); |
583 | } | 563 | } |
@@ -666,10 +646,19 @@ static void ioat_free_chan_resources(struct dma_chan *c) | |||
666 | ioat_free_ring_ent(desc, c); | 646 | ioat_free_ring_ent(desc, c); |
667 | } | 647 | } |
668 | 648 | ||
649 | for (i = 0; i < ioat_chan->desc_chunks; i++) { | ||
650 | dma_free_coherent(to_dev(ioat_chan), SZ_2M, | ||
651 | ioat_chan->descs[i].virt, | ||
652 | ioat_chan->descs[i].hw); | ||
653 | ioat_chan->descs[i].virt = NULL; | ||
654 | ioat_chan->descs[i].hw = 0; | ||
655 | } | ||
656 | ioat_chan->desc_chunks = 0; | ||
657 | |||
669 | kfree(ioat_chan->ring); | 658 | kfree(ioat_chan->ring); |
670 | ioat_chan->ring = NULL; | 659 | ioat_chan->ring = NULL; |
671 | ioat_chan->alloc_order = 0; | 660 | ioat_chan->alloc_order = 0; |
672 | pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion, | 661 | dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion, |
673 | ioat_chan->completion_dma); | 662 | ioat_chan->completion_dma); |
674 | spin_unlock_bh(&ioat_chan->prep_lock); | 663 | spin_unlock_bh(&ioat_chan->prep_lock); |
675 | spin_unlock_bh(&ioat_chan->cleanup_lock); | 664 | spin_unlock_bh(&ioat_chan->cleanup_lock); |
@@ -701,7 +690,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) | |||
701 | /* allocate a completion writeback area */ | 690 | /* allocate a completion writeback area */ |
702 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ | 691 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ |
703 | ioat_chan->completion = | 692 | ioat_chan->completion = |
704 | pci_pool_alloc(ioat_chan->ioat_dma->completion_pool, | 693 | dma_pool_alloc(ioat_chan->ioat_dma->completion_pool, |
705 | GFP_KERNEL, &ioat_chan->completion_dma); | 694 | GFP_KERNEL, &ioat_chan->completion_dma); |
706 | if (!ioat_chan->completion) | 695 | if (!ioat_chan->completion) |
707 | return -ENOMEM; | 696 | return -ENOMEM; |
@@ -712,7 +701,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) | |||
712 | writel(((u64)ioat_chan->completion_dma) >> 32, | 701 | writel(((u64)ioat_chan->completion_dma) >> 32, |
713 | ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); | 702 | ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); |
714 | 703 | ||
715 | order = ioat_get_alloc_order(); | 704 | order = IOAT_MAX_ORDER; |
716 | ring = ioat_alloc_ring(c, order, GFP_KERNEL); | 705 | ring = ioat_alloc_ring(c, order, GFP_KERNEL); |
717 | if (!ring) | 706 | if (!ring) |
718 | return -ENOMEM; | 707 | return -ENOMEM; |
diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c index 6bb4a13a8fbd..243421af888f 100644 --- a/drivers/dma/ioat/prep.c +++ b/drivers/dma/ioat/prep.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #include "hw.h" | 26 | #include "hw.h" |
27 | #include "dma.h" | 27 | #include "dma.h" |
28 | 28 | ||
29 | #define MAX_SCF 1024 | 29 | #define MAX_SCF 256 |
30 | 30 | ||
31 | /* provide a lookup table for setting the source address in the base or | 31 | /* provide a lookup table for setting the source address in the base or |
32 | * extended descriptor of an xor or pq descriptor | 32 | * extended descriptor of an xor or pq descriptor |