aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Walleij <linus.walleij@linaro.org>2018-01-28 18:44:53 -0500
committerUlf Hansson <ulf.hansson@linaro.org>2018-01-31 05:27:23 -0500
commitbd9b902798ab14d19ca116b10bde581ddff8f905 (patch)
tree17a5bfd88aae34251196027c4385ebd3986c9c06
parent3da90b159b146672f830bcd2489dd3a1f4e9e089 (diff)
mmc: sdhci: Implement an SDHCI-specific bounce buffer
The bounce buffer is gone from the MMC core, and now we found out that there are some (crippled) i.MX boards out there that have broken ADMA (cannot do scatter-gather), and also broken PIO so they must use SDMA. Closer examination shows a less significant slowdown also on SDMA-only capable Laptop hosts. SDMA sets down the number of segments to one, so that each segment gets turned into a singular request that ping-pongs to the block layer before the next request/segment is issued. Apparently it happens a lot that the block layer send requests that include a lot of physically discontiguous segments. My guess is that this phenomenon is coming from the file system. These devices that cannot handle scatterlists in hardware can see major benefits from a DMA-contiguous bounce buffer. This patch accumulates those fragmented scatterlists in a physically contiguous bounce buffer so that we can issue bigger DMA data chunks to/from the card. When tested with a PCI-integrated host (1217:8221) that only supports SDMA: 0b:00.0 SD Host controller: O2 Micro, Inc. OZ600FJ0/OZ900FJ0/OZ600FJS SD/MMC Card Reader Controller (rev 05) This patch gave ~1Mbyte/s improved throughput on large reads and writes when testing using iozone than without the patch. dmesg: sdhci-pci 0000:0b:00.0: SDHCI controller found [1217:8221] (rev 5) mmc0 bounce up to 128 segments into one, max segment size 65536 bytes mmc0: SDHCI controller on PCI [0000:0b:00.0] using DMA On the i.MX SDHCI controllers on the crippled i.MX 25 and i.MX 35 the patch restores the performance to what it was before we removed the bounce buffers. Cc: Pierre Ossman <pierre@ossman.eu> Cc: Benoît Thébaudeau <benoit@wsystem.com> Cc: Fabio Estevam <fabio.estevam@nxp.com> Cc: Benjamin Beckmeyer <beckmeyer.b@rittal.de> Cc: stable@vger.kernel.org # v4.14+ Fixes: de3ee99b097d ("mmc: Delete bounce buffer handling") Tested-by: Benjamin Beckmeyer <beckmeyer.b@rittal.de> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
-rw-r--r--drivers/mmc/host/sdhci.c164
-rw-r--r--drivers/mmc/host/sdhci.h3
2 files changed, 159 insertions, 8 deletions
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 070aff9c108f..2020e57ffa7e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -21,6 +21,7 @@
21#include <linux/dma-mapping.h> 21#include <linux/dma-mapping.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/scatterlist.h> 23#include <linux/scatterlist.h>
24#include <linux/sizes.h>
24#include <linux/swiotlb.h> 25#include <linux/swiotlb.h>
25#include <linux/regulator/consumer.h> 26#include <linux/regulator/consumer.h>
26#include <linux/pm_runtime.h> 27#include <linux/pm_runtime.h>
@@ -502,8 +503,35 @@ static int sdhci_pre_dma_transfer(struct sdhci_host *host,
502 if (data->host_cookie == COOKIE_PRE_MAPPED) 503 if (data->host_cookie == COOKIE_PRE_MAPPED)
503 return data->sg_count; 504 return data->sg_count;
504 505
505 sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, 506 /* Bounce write requests to the bounce buffer */
506 mmc_get_dma_dir(data)); 507 if (host->bounce_buffer) {
508 unsigned int length = data->blksz * data->blocks;
509
510 if (length > host->bounce_buffer_size) {
511 pr_err("%s: asked for transfer of %u bytes exceeds bounce buffer %u bytes\n",
512 mmc_hostname(host->mmc), length,
513 host->bounce_buffer_size);
514 return -EIO;
515 }
516 if (mmc_get_dma_dir(data) == DMA_TO_DEVICE) {
517 /* Copy the data to the bounce buffer */
518 sg_copy_to_buffer(data->sg, data->sg_len,
519 host->bounce_buffer,
520 length);
521 }
522 /* Switch ownership to the DMA */
523 dma_sync_single_for_device(host->mmc->parent,
524 host->bounce_addr,
525 host->bounce_buffer_size,
526 mmc_get_dma_dir(data));
527 /* Just a dummy value */
528 sg_count = 1;
529 } else {
530 /* Just access the data directly from memory */
531 sg_count = dma_map_sg(mmc_dev(host->mmc),
532 data->sg, data->sg_len,
533 mmc_get_dma_dir(data));
534 }
507 535
508 if (sg_count == 0) 536 if (sg_count == 0)
509 return -ENOSPC; 537 return -ENOSPC;
@@ -673,6 +701,14 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
673 } 701 }
674} 702}
675 703
704static u32 sdhci_sdma_address(struct sdhci_host *host)
705{
706 if (host->bounce_buffer)
707 return host->bounce_addr;
708 else
709 return sg_dma_address(host->data->sg);
710}
711
676static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) 712static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
677{ 713{
678 u8 count; 714 u8 count;
@@ -858,8 +894,8 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
858 SDHCI_ADMA_ADDRESS_HI); 894 SDHCI_ADMA_ADDRESS_HI);
859 } else { 895 } else {
860 WARN_ON(sg_cnt != 1); 896 WARN_ON(sg_cnt != 1);
861 sdhci_writel(host, sg_dma_address(data->sg), 897 sdhci_writel(host, sdhci_sdma_address(host),
862 SDHCI_DMA_ADDRESS); 898 SDHCI_DMA_ADDRESS);
863 } 899 }
864 } 900 }
865 901
@@ -2255,7 +2291,12 @@ static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
2255 2291
2256 mrq->data->host_cookie = COOKIE_UNMAPPED; 2292 mrq->data->host_cookie = COOKIE_UNMAPPED;
2257 2293
2258 if (host->flags & SDHCI_REQ_USE_DMA) 2294 /*
2295 * No pre-mapping in the pre hook if we're using the bounce buffer,
2296 * for that we would need two bounce buffers since one buffer is
2297 * in flight when this is getting called.
2298 */
2299 if (host->flags & SDHCI_REQ_USE_DMA && !host->bounce_buffer)
2259 sdhci_pre_dma_transfer(host, mrq->data, COOKIE_PRE_MAPPED); 2300 sdhci_pre_dma_transfer(host, mrq->data, COOKIE_PRE_MAPPED);
2260} 2301}
2261 2302
@@ -2359,8 +2400,45 @@ static bool sdhci_request_done(struct sdhci_host *host)
2359 struct mmc_data *data = mrq->data; 2400 struct mmc_data *data = mrq->data;
2360 2401
2361 if (data && data->host_cookie == COOKIE_MAPPED) { 2402 if (data && data->host_cookie == COOKIE_MAPPED) {
2362 dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, 2403 if (host->bounce_buffer) {
2363 mmc_get_dma_dir(data)); 2404 /*
2405 * On reads, copy the bounced data into the
2406 * sglist
2407 */
2408 if (mmc_get_dma_dir(data) == DMA_FROM_DEVICE) {
2409 unsigned int length = data->bytes_xfered;
2410
2411 if (length > host->bounce_buffer_size) {
2412 pr_err("%s: bounce buffer is %u bytes but DMA claims to have transferred %u bytes\n",
2413 mmc_hostname(host->mmc),
2414 host->bounce_buffer_size,
2415 data->bytes_xfered);
2416 /* Cap it down and continue */
2417 length = host->bounce_buffer_size;
2418 }
2419 dma_sync_single_for_cpu(
2420 host->mmc->parent,
2421 host->bounce_addr,
2422 host->bounce_buffer_size,
2423 DMA_FROM_DEVICE);
2424 sg_copy_from_buffer(data->sg,
2425 data->sg_len,
2426 host->bounce_buffer,
2427 length);
2428 } else {
2429 /* No copying, just switch ownership */
2430 dma_sync_single_for_cpu(
2431 host->mmc->parent,
2432 host->bounce_addr,
2433 host->bounce_buffer_size,
2434 mmc_get_dma_dir(data));
2435 }
2436 } else {
2437 /* Unmap the raw data */
2438 dma_unmap_sg(mmc_dev(host->mmc), data->sg,
2439 data->sg_len,
2440 mmc_get_dma_dir(data));
2441 }
2364 data->host_cookie = COOKIE_UNMAPPED; 2442 data->host_cookie = COOKIE_UNMAPPED;
2365 } 2443 }
2366 } 2444 }
@@ -2643,7 +2721,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
2643 */ 2721 */
2644 if (intmask & SDHCI_INT_DMA_END) { 2722 if (intmask & SDHCI_INT_DMA_END) {
2645 u32 dmastart, dmanow; 2723 u32 dmastart, dmanow;
2646 dmastart = sg_dma_address(host->data->sg); 2724
2725 dmastart = sdhci_sdma_address(host);
2647 dmanow = dmastart + host->data->bytes_xfered; 2726 dmanow = dmastart + host->data->bytes_xfered;
2648 /* 2727 /*
2649 * Force update to the next DMA block boundary. 2728 * Force update to the next DMA block boundary.
@@ -3234,6 +3313,68 @@ void __sdhci_read_caps(struct sdhci_host *host, u16 *ver, u32 *caps, u32 *caps1)
3234} 3313}
3235EXPORT_SYMBOL_GPL(__sdhci_read_caps); 3314EXPORT_SYMBOL_GPL(__sdhci_read_caps);
3236 3315
3316static int sdhci_allocate_bounce_buffer(struct sdhci_host *host)
3317{
3318 struct mmc_host *mmc = host->mmc;
3319 unsigned int max_blocks;
3320 unsigned int bounce_size;
3321 int ret;
3322
3323 /*
3324 * Cap the bounce buffer at 64KB. Using a bigger bounce buffer
3325 * has diminishing returns, this is probably because SD/MMC
3326 * cards are usually optimized to handle this size of requests.
3327 */
3328 bounce_size = SZ_64K;
3329 /*
3330 * Adjust downwards to maximum request size if this is less
3331 * than our segment size, else hammer down the maximum
3332 * request size to the maximum buffer size.
3333 */
3334 if (mmc->max_req_size < bounce_size)
3335 bounce_size = mmc->max_req_size;
3336 max_blocks = bounce_size / 512;
3337
3338 /*
3339 * When we just support one segment, we can get significant
3340 * speedups by the help of a bounce buffer to group scattered
3341 * reads/writes together.
3342 */
3343 host->bounce_buffer = devm_kmalloc(mmc->parent,
3344 bounce_size,
3345 GFP_KERNEL);
3346 if (!host->bounce_buffer) {
3347 pr_err("%s: failed to allocate %u bytes for bounce buffer, falling back to single segments\n",
3348 mmc_hostname(mmc),
3349 bounce_size);
3350 /*
3351 * Exiting with zero here makes sure we proceed with
3352 * mmc->max_segs == 1.
3353 */
3354 return 0;
3355 }
3356
3357 host->bounce_addr = dma_map_single(mmc->parent,
3358 host->bounce_buffer,
3359 bounce_size,
3360 DMA_BIDIRECTIONAL);
3361 ret = dma_mapping_error(mmc->parent, host->bounce_addr);
3362 if (ret)
3363 /* Again fall back to max_segs == 1 */
3364 return 0;
3365 host->bounce_buffer_size = bounce_size;
3366
3367 /* Lie about this since we're bouncing */
3368 mmc->max_segs = max_blocks;
3369 mmc->max_seg_size = bounce_size;
3370 mmc->max_req_size = bounce_size;
3371
3372 pr_info("%s bounce up to %u segments into one, max segment size %u bytes\n",
3373 mmc_hostname(mmc), max_blocks, bounce_size);
3374
3375 return 0;
3376}
3377
3237int sdhci_setup_host(struct sdhci_host *host) 3378int sdhci_setup_host(struct sdhci_host *host)
3238{ 3379{
3239 struct mmc_host *mmc; 3380 struct mmc_host *mmc;
@@ -3730,6 +3871,13 @@ int sdhci_setup_host(struct sdhci_host *host)
3730 */ 3871 */
3731 mmc->max_blk_count = (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535; 3872 mmc->max_blk_count = (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535;
3732 3873
3874 if (mmc->max_segs == 1) {
3875 /* This may alter mmc->*_blk_* parameters */
3876 ret = sdhci_allocate_bounce_buffer(host);
3877 if (ret)
3878 return ret;
3879 }
3880
3733 return 0; 3881 return 0;
3734 3882
3735unreg: 3883unreg:
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index afab26fd70e6..c95b0a4a7594 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -440,6 +440,9 @@ struct sdhci_host {
440 440
441 int irq; /* Device IRQ */ 441 int irq; /* Device IRQ */
442 void __iomem *ioaddr; /* Mapped address */ 442 void __iomem *ioaddr; /* Mapped address */
443 char *bounce_buffer; /* For packing SDMA reads/writes */
444 dma_addr_t bounce_addr;
445 unsigned int bounce_buffer_size;
443 446
444 const struct sdhci_ops *ops; /* Low level hw interface */ 447 const struct sdhci_ops *ops; /* Low level hw interface */
445 448