aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLaurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>2014-07-18 18:05:14 -0400
committerLaurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>2014-12-23 04:13:03 -0500
commit1ed1315f9b63c45f57f607e7ad2dac066b101f16 (patch)
tree19e4fddc2cefedec5b3711ec23651d595aabaf91
parentccadee9b1e90dc6d3d97a20ac96cb1a82e0d5a1d (diff)
dmaengine: rcar-dmac: Cache hardware descriptors memory
Unlike DMA transfers descriptors that are preallocated and cached, memory used to store hardware descriptors is allocated and freed with the DMA coherent allocation API for every transfer. Besides degrading performances, this creates a CMA stress test that seems to cause issues. Running dmatest with the noverify option produces [ 50.066539] alloc_contig_range test_pages_isolated(6b845, 6b846) failed [ 50.235180] alloc_contig_range test_pages_isolated(6b848, 6b84e) failed [ 52.964584] alloc_contig_range test_pages_isolated(6b847, 6b848) failed [ 54.127113] alloc_contig_range test_pages_isolated(6b843, 6b844) failed [ 56.270253] alloc_contig_range test_pages_isolated(6b84c, 6b850) failed The root cause needs to be fixed, but in the meantime, as a workaround and a performance improvement, cache hardware descriptors. Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com> Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
-rw-r--r--drivers/dma/sh/rcar-dmac.c78
1 files changed, 58 insertions, 20 deletions
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 6e7cdab61827..f71a3dc89048 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -64,6 +64,7 @@ struct rcar_dmac_hw_desc {
64 * @chunks: list of transfer chunks for this transfer 64 * @chunks: list of transfer chunks for this transfer
65 * @running: the transfer chunk being currently processed 65 * @running: the transfer chunk being currently processed
66 * @nchunks: number of transfer chunks for this transfer 66 * @nchunks: number of transfer chunks for this transfer
67 * @hwdescs.use: whether the transfer descriptor uses hardware descriptors
67 * @hwdescs.mem: hardware descriptors memory for the transfer 68 * @hwdescs.mem: hardware descriptors memory for the transfer
68 * @hwdescs.dma: device address of the hardware descriptors memory 69 * @hwdescs.dma: device address of the hardware descriptors memory
69 * @hwdescs.size: size of the hardware descriptors in bytes 70 * @hwdescs.size: size of the hardware descriptors in bytes
@@ -82,6 +83,7 @@ struct rcar_dmac_desc {
82 unsigned int nchunks; 83 unsigned int nchunks;
83 84
84 struct { 85 struct {
86 bool use;
85 struct rcar_dmac_hw_desc *mem; 87 struct rcar_dmac_hw_desc *mem;
86 dma_addr_t dma; 88 dma_addr_t dma;
87 size_t size; 89 size_t size;
@@ -322,7 +324,7 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
322 if (chan->mid_rid >= 0) 324 if (chan->mid_rid >= 0)
323 rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid); 325 rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
324 326
325 if (desc->hwdescs.mem) { 327 if (desc->hwdescs.use) {
326 dev_dbg(chan->chan.device->dev, 328 dev_dbg(chan->chan.device->dev,
327 "chan%u: queue desc %p: %u@%pad\n", 329 "chan%u: queue desc %p: %u@%pad\n",
328 chan->index, desc, desc->nchunks, &desc->hwdescs.dma); 330 chan->index, desc, desc->nchunks, &desc->hwdescs.dma);
@@ -480,8 +482,8 @@ static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
480 * @desc: the descriptor 482 * @desc: the descriptor
481 * 483 *
482 * Put the descriptor and its transfer chunk descriptors back in the channel's 484 * Put the descriptor and its transfer chunk descriptors back in the channel's
483 * free descriptors lists, and free the hardware descriptors list memory. The 485 * free descriptors lists. The descriptor's chunks list will be reinitialized to
484 * descriptor's chunks list will be reinitialized to an empty list as a result. 486 * an empty list as a result.
485 * 487 *
486 * The descriptor must have been removed from the channel's lists before calling 488 * The descriptor must have been removed from the channel's lists before calling
487 * this function. 489 * this function.
@@ -491,12 +493,6 @@ static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
491static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan, 493static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan,
492 struct rcar_dmac_desc *desc) 494 struct rcar_dmac_desc *desc)
493{ 495{
494 if (desc->hwdescs.mem) {
495 dma_free_coherent(NULL, desc->hwdescs.size, desc->hwdescs.mem,
496 desc->hwdescs.dma);
497 desc->hwdescs.mem = NULL;
498 }
499
500 spin_lock_irq(&chan->lock); 496 spin_lock_irq(&chan->lock);
501 list_splice_tail_init(&desc->chunks, &chan->desc.chunks_free); 497 list_splice_tail_init(&desc->chunks, &chan->desc.chunks_free);
502 list_add_tail(&desc->node, &chan->desc.free); 498 list_add_tail(&desc->node, &chan->desc.free);
@@ -651,20 +647,50 @@ rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan)
651 return chunk; 647 return chunk;
652} 648}
653 649
654static void rcar_dmac_alloc_hwdesc(struct rcar_dmac_chan *chan, 650static void rcar_dmac_realloc_hwdesc(struct rcar_dmac_chan *chan,
655 struct rcar_dmac_desc *desc) 651 struct rcar_dmac_desc *desc, size_t size)
652{
653 /*
654 * dma_alloc_coherent() allocates memory in page size increments. To
655 * avoid reallocating the hardware descriptors when the allocated size
656 * wouldn't change align the requested size to a multiple of the page
657 * size.
658 */
659 size = PAGE_ALIGN(size);
660
661 if (desc->hwdescs.size == size)
662 return;
663
664 if (desc->hwdescs.mem) {
665 dma_free_coherent(NULL, desc->hwdescs.size, desc->hwdescs.mem,
666 desc->hwdescs.dma);
667 desc->hwdescs.mem = NULL;
668 desc->hwdescs.size = 0;
669 }
670
671 if (!size)
672 return;
673
674 desc->hwdescs.mem = dma_alloc_coherent(NULL, size, &desc->hwdescs.dma,
675 GFP_NOWAIT);
676 if (!desc->hwdescs.mem)
677 return;
678
679 desc->hwdescs.size = size;
680}
681
682static void rcar_dmac_fill_hwdesc(struct rcar_dmac_chan *chan,
683 struct rcar_dmac_desc *desc)
656{ 684{
657 struct rcar_dmac_xfer_chunk *chunk; 685 struct rcar_dmac_xfer_chunk *chunk;
658 struct rcar_dmac_hw_desc *hwdesc; 686 struct rcar_dmac_hw_desc *hwdesc;
659 size_t size = desc->nchunks * sizeof(*hwdesc);
660 687
661 hwdesc = dma_alloc_coherent(NULL, size, &desc->hwdescs.dma, GFP_NOWAIT); 688 rcar_dmac_realloc_hwdesc(chan, desc, desc->nchunks * sizeof(*hwdesc));
689
690 hwdesc = desc->hwdescs.mem;
662 if (!hwdesc) 691 if (!hwdesc)
663 return; 692 return;
664 693
665 desc->hwdescs.mem = hwdesc;
666 desc->hwdescs.size = size;
667
668 list_for_each_entry(chunk, &desc->chunks, node) { 694 list_for_each_entry(chunk, &desc->chunks, node) {
669 hwdesc->sar = chunk->src_addr; 695 hwdesc->sar = chunk->src_addr;
670 hwdesc->dar = chunk->dst_addr; 696 hwdesc->dar = chunk->dst_addr;
@@ -890,8 +916,9 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
890 * performance improvement would be significant enough compared to the 916 * performance improvement would be significant enough compared to the
891 * additional complexity remains to be investigated. 917 * additional complexity remains to be investigated.
892 */ 918 */
893 if (!highmem && nchunks > 1) 919 desc->hwdescs.use = !highmem && nchunks > 1;
894 rcar_dmac_alloc_hwdesc(chan, desc); 920 if (desc->hwdescs.use)
921 rcar_dmac_fill_hwdesc(chan, desc);
895 922
896 return &desc->async_tx; 923 return &desc->async_tx;
897} 924}
@@ -930,6 +957,8 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
930 struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); 957 struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
931 struct rcar_dmac *dmac = to_rcar_dmac(chan->device); 958 struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
932 struct rcar_dmac_desc_page *page, *_page; 959 struct rcar_dmac_desc_page *page, *_page;
960 struct rcar_dmac_desc *desc;
961 LIST_HEAD(list);
933 962
934 /* Protect against ISR */ 963 /* Protect against ISR */
935 spin_lock_irq(&rchan->lock); 964 spin_lock_irq(&rchan->lock);
@@ -944,6 +973,15 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
944 rchan->mid_rid = -EINVAL; 973 rchan->mid_rid = -EINVAL;
945 } 974 }
946 975
976 list_splice(&rchan->desc.free, &list);
977 list_splice(&rchan->desc.pending, &list);
978 list_splice(&rchan->desc.active, &list);
979 list_splice(&rchan->desc.done, &list);
980 list_splice(&rchan->desc.wait, &list);
981
982 list_for_each_entry(desc, &list, node)
983 rcar_dmac_realloc_hwdesc(rchan, desc, 0);
984
947 list_for_each_entry_safe(page, _page, &rchan->desc.pages, node) { 985 list_for_each_entry_safe(page, _page, &rchan->desc.pages, node) {
948 list_del(&page->node); 986 list_del(&page->node);
949 free_page((unsigned long)page); 987 free_page((unsigned long)page);
@@ -1114,7 +1152,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
1114 * descriptor pointer field in the CHCRB register. In non-descriptor 1152 * descriptor pointer field in the CHCRB register. In non-descriptor
1115 * mode just use the running descriptor pointer. 1153 * mode just use the running descriptor pointer.
1116 */ 1154 */
1117 if (desc->hwdescs.mem) { 1155 if (desc->hwdescs.use) {
1118 dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & 1156 dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
1119 RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT; 1157 RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
1120 WARN_ON(dptr >= desc->nchunks); 1158 WARN_ON(dptr >= desc->nchunks);
@@ -1234,7 +1272,7 @@ static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan *chan)
1234 * descriptor mode. Only update the running chunk pointer in 1272 * descriptor mode. Only update the running chunk pointer in
1235 * non-descriptor mode. 1273 * non-descriptor mode.
1236 */ 1274 */
1237 if (!desc->hwdescs.mem) { 1275 if (!desc->hwdescs.use) {
1238 /* 1276 /*
1239 * If we haven't completed the last transfer chunk simply move 1277 * If we haven't completed the last transfer chunk simply move
1240 * to the next one. Only wake the IRQ thread if the transfer is 1278 * to the next one. Only wake the IRQ thread if the transfer is