aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/dma/ioat
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 15:34:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 15:34:54 -0400
commitb5b131c7473e17275debcdf1c226f452dc3876ed (patch)
treea272e947c38213d4ee989bb3f863a8091d50426b /drivers/dma/ioat
parentc7eec380e85a427983782df744f0fb745d867170 (diff)
parent896e041e8e8efb34520d033a693ef25391f9c9f0 (diff)
Merge tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma
Pull dmaengine updates from Vinod Koul: "This is smallish update with minor changes to core and new driver and usual updates. Nothing super exciting here.. - We have made slave address as physical to enable driver to do the mapping. - We now expose the maxburst for slave dma as new capability so clients can know this and program accordingly - addition of device synchronize callbacks on omap and edma. - pl330 updates to support DMAFLUSHP for Rockchip platforms. - Updates and improved sg handling in Xilinx VDMA driver. - New hidma qualcomm dma driver, though some bits are still in progress" * tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (40 commits) dmaengine: IOATDMA: revise channel reset workaround on CB3.3 platforms dmaengine: add Qualcomm Technologies HIDMA channel driver dmaengine: add Qualcomm Technologies HIDMA management driver dmaengine: hidma: Add Device Tree binding dmaengine: qcom_bam_dma: move to qcom directory dmaengine: tegra: Move of_device_id table near to its user dmaengine: xilinx_vdma: Remove unnecessary variable initializations dmaengine: sirf: use __maybe_unused to hide pm functions dmaengine: rcar-dmac: clear pertinence number of channels dmaengine: sh: shdmac: don't open code of_device_get_match_data() dmaengine: tegra: don't open code of_device_get_match_data() dmaengine: qcom_bam_dma: Make driver work for BE dmaengine: sun4i: support module autoloading dma/mic_x100_dma: IS_ERR() vs PTR_ERR() typo dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's dmaengine: xilinx_vdma: Simplify spin lock handling dmaengine: xilinx_vdma: Fix issues with non-parking mode dmaengine: xilinx_vdma: Improve SG engine handling dmaengine: pl330: fix to support the burst mode dmaengine: make slave address physical ...
Diffstat (limited to 'drivers/dma/ioat')
-rw-r--r--drivers/dma/ioat/dma.c268
-rw-r--r--drivers/dma/ioat/dma.h23
-rw-r--r--drivers/dma/ioat/hw.h2
-rw-r--r--drivers/dma/ioat/init.c49
-rw-r--r--drivers/dma/ioat/prep.c2
5 files changed, 109 insertions, 235 deletions
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 21539d5c54c3..bd09961443b1 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -31,6 +31,7 @@
31#include <linux/dma-mapping.h> 31#include <linux/dma-mapping.h>
32#include <linux/workqueue.h> 32#include <linux/workqueue.h>
33#include <linux/prefetch.h> 33#include <linux/prefetch.h>
34#include <linux/sizes.h>
34#include "dma.h" 35#include "dma.h"
35#include "registers.h" 36#include "registers.h"
36#include "hw.h" 37#include "hw.h"
@@ -290,24 +291,30 @@ static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
290} 291}
291 292
292static struct ioat_ring_ent * 293static struct ioat_ring_ent *
293ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) 294ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags)
294{ 295{
295 struct ioat_dma_descriptor *hw; 296 struct ioat_dma_descriptor *hw;
296 struct ioat_ring_ent *desc; 297 struct ioat_ring_ent *desc;
297 struct ioatdma_device *ioat_dma; 298 struct ioatdma_device *ioat_dma;
299 struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
300 int chunk;
298 dma_addr_t phys; 301 dma_addr_t phys;
302 u8 *pos;
303 off_t offs;
299 304
300 ioat_dma = to_ioatdma_device(chan->device); 305 ioat_dma = to_ioatdma_device(chan->device);
301 hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys); 306
302 if (!hw) 307 chunk = idx / IOAT_DESCS_PER_2M;
303 return NULL; 308 idx &= (IOAT_DESCS_PER_2M - 1);
309 offs = idx * IOAT_DESC_SZ;
310 pos = (u8 *)ioat_chan->descs[chunk].virt + offs;
311 phys = ioat_chan->descs[chunk].hw + offs;
312 hw = (struct ioat_dma_descriptor *)pos;
304 memset(hw, 0, sizeof(*hw)); 313 memset(hw, 0, sizeof(*hw));
305 314
306 desc = kmem_cache_zalloc(ioat_cache, flags); 315 desc = kmem_cache_zalloc(ioat_cache, flags);
307 if (!desc) { 316 if (!desc)
308 pci_pool_free(ioat_dma->dma_pool, hw, phys);
309 return NULL; 317 return NULL;
310 }
311 318
312 dma_async_tx_descriptor_init(&desc->txd, chan); 319 dma_async_tx_descriptor_init(&desc->txd, chan);
313 desc->txd.tx_submit = ioat_tx_submit_unlock; 320 desc->txd.tx_submit = ioat_tx_submit_unlock;
@@ -318,32 +325,63 @@ ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
318 325
319void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) 326void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
320{ 327{
321 struct ioatdma_device *ioat_dma;
322
323 ioat_dma = to_ioatdma_device(chan->device);
324 pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys);
325 kmem_cache_free(ioat_cache, desc); 328 kmem_cache_free(ioat_cache, desc);
326} 329}
327 330
328struct ioat_ring_ent ** 331struct ioat_ring_ent **
329ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) 332ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
330{ 333{
334 struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
331 struct ioat_ring_ent **ring; 335 struct ioat_ring_ent **ring;
332 int descs = 1 << order; 336 int total_descs = 1 << order;
333 int i; 337 int i, chunks;
334
335 if (order > ioat_get_max_alloc_order())
336 return NULL;
337 338
338 /* allocate the array to hold the software ring */ 339 /* allocate the array to hold the software ring */
339 ring = kcalloc(descs, sizeof(*ring), flags); 340 ring = kcalloc(total_descs, sizeof(*ring), flags);
340 if (!ring) 341 if (!ring)
341 return NULL; 342 return NULL;
342 for (i = 0; i < descs; i++) { 343
343 ring[i] = ioat_alloc_ring_ent(c, flags); 344 ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M;
345
346 for (i = 0; i < chunks; i++) {
347 struct ioat_descs *descs = &ioat_chan->descs[i];
348
349 descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
350 SZ_2M, &descs->hw, flags);
351 if (!descs->virt && (i > 0)) {
352 int idx;
353
354 for (idx = 0; idx < i; idx++) {
355 dma_free_coherent(to_dev(ioat_chan), SZ_2M,
356 descs->virt, descs->hw);
357 descs->virt = NULL;
358 descs->hw = 0;
359 }
360
361 ioat_chan->desc_chunks = 0;
362 kfree(ring);
363 return NULL;
364 }
365 }
366
367 for (i = 0; i < total_descs; i++) {
368 ring[i] = ioat_alloc_ring_ent(c, i, flags);
344 if (!ring[i]) { 369 if (!ring[i]) {
370 int idx;
371
345 while (i--) 372 while (i--)
346 ioat_free_ring_ent(ring[i], c); 373 ioat_free_ring_ent(ring[i], c);
374
375 for (idx = 0; idx < ioat_chan->desc_chunks; idx++) {
376 dma_free_coherent(to_dev(ioat_chan),
377 SZ_2M,
378 ioat_chan->descs[idx].virt,
379 ioat_chan->descs[idx].hw);
380 ioat_chan->descs[idx].virt = NULL;
381 ioat_chan->descs[idx].hw = 0;
382 }
383
384 ioat_chan->desc_chunks = 0;
347 kfree(ring); 385 kfree(ring);
348 return NULL; 386 return NULL;
349 } 387 }
@@ -351,7 +389,7 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
351 } 389 }
352 390
353 /* link descs */ 391 /* link descs */
354 for (i = 0; i < descs-1; i++) { 392 for (i = 0; i < total_descs-1; i++) {
355 struct ioat_ring_ent *next = ring[i+1]; 393 struct ioat_ring_ent *next = ring[i+1];
356 struct ioat_dma_descriptor *hw = ring[i]->hw; 394 struct ioat_dma_descriptor *hw = ring[i]->hw;
357 395
@@ -362,114 +400,6 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
362 return ring; 400 return ring;
363} 401}
364 402
365static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
366{
367 /* reshape differs from normal ring allocation in that we want
368 * to allocate a new software ring while only
369 * extending/truncating the hardware ring
370 */
371 struct dma_chan *c = &ioat_chan->dma_chan;
372 const u32 curr_size = ioat_ring_size(ioat_chan);
373 const u16 active = ioat_ring_active(ioat_chan);
374 const u32 new_size = 1 << order;
375 struct ioat_ring_ent **ring;
376 u32 i;
377
378 if (order > ioat_get_max_alloc_order())
379 return false;
380
381 /* double check that we have at least 1 free descriptor */
382 if (active == curr_size)
383 return false;
384
385 /* when shrinking, verify that we can hold the current active
386 * set in the new ring
387 */
388 if (active >= new_size)
389 return false;
390
391 /* allocate the array to hold the software ring */
392 ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
393 if (!ring)
394 return false;
395
396 /* allocate/trim descriptors as needed */
397 if (new_size > curr_size) {
398 /* copy current descriptors to the new ring */
399 for (i = 0; i < curr_size; i++) {
400 u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
401 u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
402
403 ring[new_idx] = ioat_chan->ring[curr_idx];
404 set_desc_id(ring[new_idx], new_idx);
405 }
406
407 /* add new descriptors to the ring */
408 for (i = curr_size; i < new_size; i++) {
409 u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
410
411 ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT);
412 if (!ring[new_idx]) {
413 while (i--) {
414 u16 new_idx = (ioat_chan->tail+i) &
415 (new_size-1);
416
417 ioat_free_ring_ent(ring[new_idx], c);
418 }
419 kfree(ring);
420 return false;
421 }
422 set_desc_id(ring[new_idx], new_idx);
423 }
424
425 /* hw link new descriptors */
426 for (i = curr_size-1; i < new_size; i++) {
427 u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
428 struct ioat_ring_ent *next =
429 ring[(new_idx+1) & (new_size-1)];
430 struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
431
432 hw->next = next->txd.phys;
433 }
434 } else {
435 struct ioat_dma_descriptor *hw;
436 struct ioat_ring_ent *next;
437
438 /* copy current descriptors to the new ring, dropping the
439 * removed descriptors
440 */
441 for (i = 0; i < new_size; i++) {
442 u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
443 u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
444
445 ring[new_idx] = ioat_chan->ring[curr_idx];
446 set_desc_id(ring[new_idx], new_idx);
447 }
448
449 /* free deleted descriptors */
450 for (i = new_size; i < curr_size; i++) {
451 struct ioat_ring_ent *ent;
452
453 ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i);
454 ioat_free_ring_ent(ent, c);
455 }
456
457 /* fix up hardware ring */
458 hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw;
459 next = ring[(ioat_chan->tail+new_size) & (new_size-1)];
460 hw->next = next->txd.phys;
461 }
462
463 dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n",
464 __func__, new_size);
465
466 kfree(ioat_chan->ring);
467 ioat_chan->ring = ring;
468 ioat_chan->alloc_order = order;
469
470 return true;
471}
472
473/** 403/**
474 * ioat_check_space_lock - verify space and grab ring producer lock 404 * ioat_check_space_lock - verify space and grab ring producer lock
475 * @ioat: ioat,3 channel (ring) to operate on 405 * @ioat: ioat,3 channel (ring) to operate on
@@ -478,9 +408,6 @@ static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
478int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) 408int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
479 __acquires(&ioat_chan->prep_lock) 409 __acquires(&ioat_chan->prep_lock)
480{ 410{
481 bool retry;
482
483 retry:
484 spin_lock_bh(&ioat_chan->prep_lock); 411 spin_lock_bh(&ioat_chan->prep_lock);
485 /* never allow the last descriptor to be consumed, we need at 412 /* never allow the last descriptor to be consumed, we need at
486 * least one free at all times to allow for on-the-fly ring 413 * least one free at all times to allow for on-the-fly ring
@@ -493,24 +420,8 @@ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
493 ioat_chan->produce = num_descs; 420 ioat_chan->produce = num_descs;
494 return 0; /* with ioat->prep_lock held */ 421 return 0; /* with ioat->prep_lock held */
495 } 422 }
496 retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
497 spin_unlock_bh(&ioat_chan->prep_lock); 423 spin_unlock_bh(&ioat_chan->prep_lock);
498 424
499 /* is another cpu already trying to expand the ring? */
500 if (retry)
501 goto retry;
502
503 spin_lock_bh(&ioat_chan->cleanup_lock);
504 spin_lock_bh(&ioat_chan->prep_lock);
505 retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1);
506 clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
507 spin_unlock_bh(&ioat_chan->prep_lock);
508 spin_unlock_bh(&ioat_chan->cleanup_lock);
509
510 /* if we were able to expand the ring retry the allocation */
511 if (retry)
512 goto retry;
513
514 dev_dbg_ratelimited(to_dev(ioat_chan), 425 dev_dbg_ratelimited(to_dev(ioat_chan),
515 "%s: ring full! num_descs: %d (%x:%x:%x)\n", 426 "%s: ring full! num_descs: %d (%x:%x:%x)\n",
516 __func__, num_descs, ioat_chan->head, 427 __func__, num_descs, ioat_chan->head,
@@ -823,19 +734,6 @@ static void check_active(struct ioatdma_chan *ioat_chan)
823 734
824 if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) 735 if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
825 mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); 736 mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
826 else if (ioat_chan->alloc_order > ioat_get_alloc_order()) {
827 /* if the ring is idle, empty, and oversized try to step
828 * down the size
829 */
830 reshape_ring(ioat_chan, ioat_chan->alloc_order - 1);
831
832 /* keep shrinking until we get back to our minimum
833 * default size
834 */
835 if (ioat_chan->alloc_order > ioat_get_alloc_order())
836 mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
837 }
838
839} 737}
840 738
841void ioat_timer_event(unsigned long data) 739void ioat_timer_event(unsigned long data)
@@ -916,40 +814,6 @@ ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
916 return dma_cookie_status(c, cookie, txstate); 814 return dma_cookie_status(c, cookie, txstate);
917} 815}
918 816
919static int ioat_irq_reinit(struct ioatdma_device *ioat_dma)
920{
921 struct pci_dev *pdev = ioat_dma->pdev;
922 int irq = pdev->irq, i;
923
924 if (!is_bwd_ioat(pdev))
925 return 0;
926
927 switch (ioat_dma->irq_mode) {
928 case IOAT_MSIX:
929 for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) {
930 struct msix_entry *msix = &ioat_dma->msix_entries[i];
931 struct ioatdma_chan *ioat_chan;
932
933 ioat_chan = ioat_chan_by_index(ioat_dma, i);
934 devm_free_irq(&pdev->dev, msix->vector, ioat_chan);
935 }
936
937 pci_disable_msix(pdev);
938 break;
939 case IOAT_MSI:
940 pci_disable_msi(pdev);
941 /* fall through */
942 case IOAT_INTX:
943 devm_free_irq(&pdev->dev, irq, ioat_dma);
944 break;
945 default:
946 return 0;
947 }
948 ioat_dma->irq_mode = IOAT_NOIRQ;
949
950 return ioat_dma_setup_interrupts(ioat_dma);
951}
952
953int ioat_reset_hw(struct ioatdma_chan *ioat_chan) 817int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
954{ 818{
955 /* throw away whatever the channel was doing and get it 819 /* throw away whatever the channel was doing and get it
@@ -989,9 +853,21 @@ int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
989 } 853 }
990 } 854 }
991 855
856 if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
857 ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000);
858 ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008);
859 ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800);
860 }
861
862
992 err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); 863 err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
993 if (!err) 864 if (!err) {
994 err = ioat_irq_reinit(ioat_dma); 865 if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
866 writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000);
867 writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008);
868 writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800);
869 }
870 }
995 871
996 if (err) 872 if (err)
997 dev_err(&pdev->dev, "Failed to reset: %d\n", err); 873 dev_err(&pdev->dev, "Failed to reset: %d\n", err);
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index b8f48074789f..a9bc1a15b0d1 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -62,7 +62,6 @@ enum ioat_irq_mode {
62 * struct ioatdma_device - internal representation of a IOAT device 62 * struct ioatdma_device - internal representation of a IOAT device
63 * @pdev: PCI-Express device 63 * @pdev: PCI-Express device
64 * @reg_base: MMIO register space base address 64 * @reg_base: MMIO register space base address
65 * @dma_pool: for allocating DMA descriptors
66 * @completion_pool: DMA buffers for completion ops 65 * @completion_pool: DMA buffers for completion ops
67 * @sed_hw_pool: DMA super descriptor pools 66 * @sed_hw_pool: DMA super descriptor pools
68 * @dma_dev: embedded struct dma_device 67 * @dma_dev: embedded struct dma_device
@@ -76,8 +75,7 @@ enum ioat_irq_mode {
76struct ioatdma_device { 75struct ioatdma_device {
77 struct pci_dev *pdev; 76 struct pci_dev *pdev;
78 void __iomem *reg_base; 77 void __iomem *reg_base;
79 struct pci_pool *dma_pool; 78 struct dma_pool *completion_pool;
80 struct pci_pool *completion_pool;
81#define MAX_SED_POOLS 5 79#define MAX_SED_POOLS 5
82 struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; 80 struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
83 struct dma_device dma_dev; 81 struct dma_device dma_dev;
@@ -88,6 +86,16 @@ struct ioatdma_device {
88 struct dca_provider *dca; 86 struct dca_provider *dca;
89 enum ioat_irq_mode irq_mode; 87 enum ioat_irq_mode irq_mode;
90 u32 cap; 88 u32 cap;
89
90 /* shadow version for CB3.3 chan reset errata workaround */
91 u64 msixtba0;
92 u64 msixdata0;
93 u32 msixpba;
94};
95
96struct ioat_descs {
97 void *virt;
98 dma_addr_t hw;
91}; 99};
92 100
93struct ioatdma_chan { 101struct ioatdma_chan {
@@ -100,7 +108,6 @@ struct ioatdma_chan {
100 #define IOAT_COMPLETION_ACK 1 108 #define IOAT_COMPLETION_ACK 1
101 #define IOAT_RESET_PENDING 2 109 #define IOAT_RESET_PENDING 2
102 #define IOAT_KOBJ_INIT_FAIL 3 110 #define IOAT_KOBJ_INIT_FAIL 3
103 #define IOAT_RESHAPE_PENDING 4
104 #define IOAT_RUN 5 111 #define IOAT_RUN 5
105 #define IOAT_CHAN_ACTIVE 6 112 #define IOAT_CHAN_ACTIVE 6
106 struct timer_list timer; 113 struct timer_list timer;
@@ -133,6 +140,8 @@ struct ioatdma_chan {
133 u16 produce; 140 u16 produce;
134 struct ioat_ring_ent **ring; 141 struct ioat_ring_ent **ring;
135 spinlock_t prep_lock; 142 spinlock_t prep_lock;
143 struct ioat_descs descs[2];
144 int desc_chunks;
136}; 145};
137 146
138struct ioat_sysfs_entry { 147struct ioat_sysfs_entry {
@@ -302,10 +311,8 @@ static inline bool is_ioat_bug(unsigned long err)
302} 311}
303 312
304#define IOAT_MAX_ORDER 16 313#define IOAT_MAX_ORDER 16
305#define ioat_get_alloc_order() \ 314#define IOAT_MAX_DESCS 65536
306 (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) 315#define IOAT_DESCS_PER_2M 32768
307#define ioat_get_max_alloc_order() \
308 (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
309 316
310static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) 317static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan)
311{ 318{
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index 690e3b4f8202..8e67895bcca3 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -73,6 +73,8 @@
73 73
74int system_has_dca_enabled(struct pci_dev *pdev); 74int system_has_dca_enabled(struct pci_dev *pdev);
75 75
76#define IOAT_DESC_SZ 64
77
76struct ioat_dma_descriptor { 78struct ioat_dma_descriptor {
77 uint32_t size; 79 uint32_t size;
78 union { 80 union {
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 4ef0c5e07912..efdee1a69fc4 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -28,6 +28,7 @@
28#include <linux/prefetch.h> 28#include <linux/prefetch.h>
29#include <linux/dca.h> 29#include <linux/dca.h>
30#include <linux/aer.h> 30#include <linux/aer.h>
31#include <linux/sizes.h>
31#include "dma.h" 32#include "dma.h"
32#include "registers.h" 33#include "registers.h"
33#include "hw.h" 34#include "hw.h"
@@ -136,14 +137,6 @@ int ioat_pending_level = 4;
136module_param(ioat_pending_level, int, 0644); 137module_param(ioat_pending_level, int, 0644);
137MODULE_PARM_DESC(ioat_pending_level, 138MODULE_PARM_DESC(ioat_pending_level,
138 "high-water mark for pushing ioat descriptors (default: 4)"); 139 "high-water mark for pushing ioat descriptors (default: 4)");
139int ioat_ring_alloc_order = 8;
140module_param(ioat_ring_alloc_order, int, 0644);
141MODULE_PARM_DESC(ioat_ring_alloc_order,
142 "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)");
143int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
144module_param(ioat_ring_max_alloc_order, int, 0644);
145MODULE_PARM_DESC(ioat_ring_max_alloc_order,
146 "ioat+: upper limit for ring size (default: 16)");
147static char ioat_interrupt_style[32] = "msix"; 140static char ioat_interrupt_style[32] = "msix";
148module_param_string(ioat_interrupt_style, ioat_interrupt_style, 141module_param_string(ioat_interrupt_style, ioat_interrupt_style,
149 sizeof(ioat_interrupt_style), 0644); 142 sizeof(ioat_interrupt_style), 0644);
@@ -504,23 +497,14 @@ static int ioat_probe(struct ioatdma_device *ioat_dma)
504 struct pci_dev *pdev = ioat_dma->pdev; 497 struct pci_dev *pdev = ioat_dma->pdev;
505 struct device *dev = &pdev->dev; 498 struct device *dev = &pdev->dev;
506 499
507 /* DMA coherent memory pool for DMA descriptor allocations */ 500 ioat_dma->completion_pool = dma_pool_create("completion_pool", dev,
508 ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev,
509 sizeof(struct ioat_dma_descriptor),
510 64, 0);
511 if (!ioat_dma->dma_pool) {
512 err = -ENOMEM;
513 goto err_dma_pool;
514 }
515
516 ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev,
517 sizeof(u64), 501 sizeof(u64),
518 SMP_CACHE_BYTES, 502 SMP_CACHE_BYTES,
519 SMP_CACHE_BYTES); 503 SMP_CACHE_BYTES);
520 504
521 if (!ioat_dma->completion_pool) { 505 if (!ioat_dma->completion_pool) {
522 err = -ENOMEM; 506 err = -ENOMEM;
523 goto err_completion_pool; 507 goto err_out;
524 } 508 }
525 509
526 ioat_enumerate_channels(ioat_dma); 510 ioat_enumerate_channels(ioat_dma);
@@ -546,10 +530,8 @@ static int ioat_probe(struct ioatdma_device *ioat_dma)
546err_self_test: 530err_self_test:
547 ioat_disable_interrupts(ioat_dma); 531 ioat_disable_interrupts(ioat_dma);
548err_setup_interrupts: 532err_setup_interrupts:
549 pci_pool_destroy(ioat_dma->completion_pool); 533 dma_pool_destroy(ioat_dma->completion_pool);
550err_completion_pool: 534err_out:
551 pci_pool_destroy(ioat_dma->dma_pool);
552err_dma_pool:
553 return err; 535 return err;
554} 536}
555 537
@@ -559,8 +541,7 @@ static int ioat_register(struct ioatdma_device *ioat_dma)
559 541
560 if (err) { 542 if (err) {
561 ioat_disable_interrupts(ioat_dma); 543 ioat_disable_interrupts(ioat_dma);
562 pci_pool_destroy(ioat_dma->completion_pool); 544 dma_pool_destroy(ioat_dma->completion_pool);
563 pci_pool_destroy(ioat_dma->dma_pool);
564 } 545 }
565 546
566 return err; 547 return err;
@@ -576,8 +557,7 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma)
576 557
577 dma_async_device_unregister(dma); 558 dma_async_device_unregister(dma);
578 559
579 pci_pool_destroy(ioat_dma->dma_pool); 560 dma_pool_destroy(ioat_dma->completion_pool);
580 pci_pool_destroy(ioat_dma->completion_pool);
581 561
582 INIT_LIST_HEAD(&dma->channels); 562 INIT_LIST_HEAD(&dma->channels);
583} 563}
@@ -666,10 +646,19 @@ static void ioat_free_chan_resources(struct dma_chan *c)
666 ioat_free_ring_ent(desc, c); 646 ioat_free_ring_ent(desc, c);
667 } 647 }
668 648
649 for (i = 0; i < ioat_chan->desc_chunks; i++) {
650 dma_free_coherent(to_dev(ioat_chan), SZ_2M,
651 ioat_chan->descs[i].virt,
652 ioat_chan->descs[i].hw);
653 ioat_chan->descs[i].virt = NULL;
654 ioat_chan->descs[i].hw = 0;
655 }
656 ioat_chan->desc_chunks = 0;
657
669 kfree(ioat_chan->ring); 658 kfree(ioat_chan->ring);
670 ioat_chan->ring = NULL; 659 ioat_chan->ring = NULL;
671 ioat_chan->alloc_order = 0; 660 ioat_chan->alloc_order = 0;
672 pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion, 661 dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
673 ioat_chan->completion_dma); 662 ioat_chan->completion_dma);
674 spin_unlock_bh(&ioat_chan->prep_lock); 663 spin_unlock_bh(&ioat_chan->prep_lock);
675 spin_unlock_bh(&ioat_chan->cleanup_lock); 664 spin_unlock_bh(&ioat_chan->cleanup_lock);
@@ -701,7 +690,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
701 /* allocate a completion writeback area */ 690 /* allocate a completion writeback area */
702 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ 691 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
703 ioat_chan->completion = 692 ioat_chan->completion =
704 pci_pool_alloc(ioat_chan->ioat_dma->completion_pool, 693 dma_pool_alloc(ioat_chan->ioat_dma->completion_pool,
705 GFP_KERNEL, &ioat_chan->completion_dma); 694 GFP_KERNEL, &ioat_chan->completion_dma);
706 if (!ioat_chan->completion) 695 if (!ioat_chan->completion)
707 return -ENOMEM; 696 return -ENOMEM;
@@ -712,7 +701,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
712 writel(((u64)ioat_chan->completion_dma) >> 32, 701 writel(((u64)ioat_chan->completion_dma) >> 32,
713 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); 702 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
714 703
715 order = ioat_get_alloc_order(); 704 order = IOAT_MAX_ORDER;
716 ring = ioat_alloc_ring(c, order, GFP_KERNEL); 705 ring = ioat_alloc_ring(c, order, GFP_KERNEL);
717 if (!ring) 706 if (!ring)
718 return -ENOMEM; 707 return -ENOMEM;
diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c
index 6bb4a13a8fbd..243421af888f 100644
--- a/drivers/dma/ioat/prep.c
+++ b/drivers/dma/ioat/prep.c
@@ -26,7 +26,7 @@
26#include "hw.h" 26#include "hw.h"
27#include "dma.h" 27#include "dma.h"
28 28
29#define MAX_SCF 1024 29#define MAX_SCF 256
30 30
31/* provide a lookup table for setting the source address in the base or 31/* provide a lookup table for setting the source address in the base or
32 * extended descriptor of an xor or pq descriptor 32 * extended descriptor of an xor or pq descriptor