aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Ujfalusi <peter.ujfalusi@ti.com>2015-10-16 03:18:00 -0400
committerVinod Koul <vinod.koul@intel.com>2015-10-26 21:22:44 -0400
commitdf6694f80365a72700d4c68fcf61ef068f5b3c25 (patch)
tree78315b272212f729b5cfbcc5c84b1933998df1ca
parent21a31846a7736a88709fe6fe2e73857d884de89c (diff)
dmaengine: edma: Optimize memcpy operation
If the transfer is shorted then 64K we can complete it with one ACNT burst by configuring ACNT to the length of the copy, this require one paRAM slot. Otherwise we use two paRAM slots for the copy: slot1: will copy (length / 32767) number of 32767 byte long blocks slot2: will be configured to copy the remaining data. According to tests this patch increases the throughput of memcpy from ~3MB/s to 15MB/s Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com> Signed-off-by: Vinod Koul <vinod.koul@intel.com>
-rw-r--r--drivers/dma/edma.c96
1 files changed, 75 insertions, 21 deletions
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index b36dfa5458cb..c0165e3d3396 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -1107,19 +1107,16 @@ static int edma_dma_resume(struct dma_chan *chan)
1107 */ 1107 */
1108static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset, 1108static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
1109 dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst, 1109 dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
1110 enum dma_slave_buswidth dev_width, 1110 unsigned int acnt, unsigned int dma_length,
1111 unsigned int dma_length,
1112 enum dma_transfer_direction direction) 1111 enum dma_transfer_direction direction)
1113{ 1112{
1114 struct edma_chan *echan = to_edma_chan(chan); 1113 struct edma_chan *echan = to_edma_chan(chan);
1115 struct device *dev = chan->device->dev; 1114 struct device *dev = chan->device->dev;
1116 struct edmacc_param *param = &epset->param; 1115 struct edmacc_param *param = &epset->param;
1117 int acnt, bcnt, ccnt, cidx; 1116 int bcnt, ccnt, cidx;
1118 int src_bidx, dst_bidx, src_cidx, dst_cidx; 1117 int src_bidx, dst_bidx, src_cidx, dst_cidx;
1119 int absync; 1118 int absync;
1120 1119
1121 acnt = dev_width;
1122
1123 /* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */ 1120 /* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */
1124 if (!burst) 1121 if (!burst)
1125 burst = 1; 1122 burst = 1;
@@ -1320,41 +1317,98 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
1320 struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, 1317 struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
1321 size_t len, unsigned long tx_flags) 1318 size_t len, unsigned long tx_flags)
1322{ 1319{
1323 int ret; 1320 int ret, nslots;
1324 struct edma_desc *edesc; 1321 struct edma_desc *edesc;
1325 struct device *dev = chan->device->dev; 1322 struct device *dev = chan->device->dev;
1326 struct edma_chan *echan = to_edma_chan(chan); 1323 struct edma_chan *echan = to_edma_chan(chan);
1327 unsigned int width; 1324 unsigned int width, pset_len;
1328 1325
1329 if (unlikely(!echan || !len)) 1326 if (unlikely(!echan || !len))
1330 return NULL; 1327 return NULL;
1331 1328
1332 edesc = kzalloc(sizeof(*edesc) + sizeof(edesc->pset[0]), GFP_ATOMIC); 1329 if (len < SZ_64K) {
1330 /*
1331 * Transfer size less than 64K can be handled with one paRAM
1332 * slot and with one burst.
1333 * ACNT = length
1334 */
1335 width = len;
1336 pset_len = len;
1337 nslots = 1;
1338 } else {
1339 /*
1340 * Transfer size bigger than 64K will be handled with maximum of
1341 * two paRAM slots.
1342 * slot1: (full_length / 32767) times 32767 bytes bursts.
1343 * ACNT = 32767, length1: (full_length / 32767) * 32767
1344 * slot2: the remaining amount of data after slot1.
1345 * ACNT = full_length - length1, length2 = ACNT
1346 *
1347 * When the full_length is multibple of 32767 one slot can be
1348 * used to complete the transfer.
1349 */
1350 width = SZ_32K - 1;
1351 pset_len = rounddown(len, width);
1352 /* One slot is enough for lengths multiple of (SZ_32K -1) */
1353 if (unlikely(pset_len == len))
1354 nslots = 1;
1355 else
1356 nslots = 2;
1357 }
1358
1359 edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]),
1360 GFP_ATOMIC);
1333 if (!edesc) { 1361 if (!edesc) {
1334 dev_dbg(dev, "Failed to allocate a descriptor\n"); 1362 dev_dbg(dev, "Failed to allocate a descriptor\n");
1335 return NULL; 1363 return NULL;
1336 } 1364 }
1337 1365
1338 edesc->pset_nr = 1; 1366 edesc->pset_nr = nslots;
1339 1367 edesc->residue = edesc->residue_stat = len;
1340 width = 1 << __ffs((src | dest | len)); 1368 edesc->direction = DMA_MEM_TO_MEM;
1341 if (width > DMA_SLAVE_BUSWIDTH_64_BYTES) 1369 edesc->echan = echan;
1342 width = DMA_SLAVE_BUSWIDTH_64_BYTES;
1343 1370
1344 ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1, 1371 ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1,
1345 width, len, DMA_MEM_TO_MEM); 1372 width, pset_len, DMA_MEM_TO_MEM);
1346 if (ret < 0) 1373 if (ret < 0) {
1374 kfree(edesc);
1347 return NULL; 1375 return NULL;
1376 }
1348 1377
1349 edesc->absync = ret; 1378 edesc->absync = ret;
1350 1379
1351 /*
1352 * Enable intermediate transfer chaining to re-trigger channel
1353 * on completion of every TR, and enable transfer-completion
1354 * interrupt on completion of the whole transfer.
1355 */
1356 edesc->pset[0].param.opt |= ITCCHEN; 1380 edesc->pset[0].param.opt |= ITCCHEN;
1357 edesc->pset[0].param.opt |= TCINTEN; 1381 if (nslots == 1) {
1382 /* Enable transfer complete interrupt */
1383 edesc->pset[0].param.opt |= TCINTEN;
1384 } else {
1385 /* Enable transfer complete chaining for the first slot */
1386 edesc->pset[0].param.opt |= TCCHEN;
1387
1388 if (echan->slot[1] < 0) {
1389 echan->slot[1] = edma_alloc_slot(echan->ecc,
1390 EDMA_SLOT_ANY);
1391 if (echan->slot[1] < 0) {
1392 kfree(edesc);
1393 dev_err(dev, "%s: Failed to allocate slot\n",
1394 __func__);
1395 return NULL;
1396 }
1397 }
1398 dest += pset_len;
1399 src += pset_len;
1400 pset_len = width = len % (SZ_32K - 1);
1401
1402 ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1,
1403 width, pset_len, DMA_MEM_TO_MEM);
1404 if (ret < 0) {
1405 kfree(edesc);
1406 return NULL;
1407 }
1408
1409 edesc->pset[1].param.opt |= ITCCHEN;
1410 edesc->pset[1].param.opt |= TCINTEN;
1411 }
1358 1412
1359 return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags); 1413 return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
1360} 1414}