aboutsummaryrefslogtreecommitdiffstats
path: root/lib/dma-debug.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/dma-debug.c')
-rw-r--r--lib/dma-debug.c131
1 files changed, 85 insertions, 46 deletions
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 2defd1308b04..98f2d7e91a91 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -424,111 +424,134 @@ void debug_dma_dump_mappings(struct device *dev)
424EXPORT_SYMBOL(debug_dma_dump_mappings); 424EXPORT_SYMBOL(debug_dma_dump_mappings);
425 425
426/* 426/*
427 * For each page mapped (initial page in the case of 427 * For each mapping (initial cacheline in the case of
428 * dma_alloc_coherent/dma_map_{single|page}, or each page in a 428 * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
429 * scatterlist) insert into this tree using the pfn as the key. At 429 * scatterlist, or the cacheline specified in dma_map_single) insert
430 * into this tree using the cacheline as the key. At
430 * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If 431 * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If
431 * the pfn already exists at insertion time add a tag as a reference 432 * the entry already exists at insertion time add a tag as a reference
432 * count for the overlapping mappings. For now, the overlap tracking 433 * count for the overlapping mappings. For now, the overlap tracking
433 * just ensures that 'unmaps' balance 'maps' before marking the pfn 434 * just ensures that 'unmaps' balance 'maps' before marking the
434 * idle, but we should also be flagging overlaps as an API violation. 435 * cacheline idle, but we should also be flagging overlaps as an API
436 * violation.
435 * 437 *
436 * Memory usage is mostly constrained by the maximum number of available 438 * Memory usage is mostly constrained by the maximum number of available
437 * dma-debug entries in that we need a free dma_debug_entry before 439 * dma-debug entries in that we need a free dma_debug_entry before
438 * inserting into the tree. In the case of dma_map_{single|page} and 440 * inserting into the tree. In the case of dma_map_page and
439 * dma_alloc_coherent there is only one dma_debug_entry and one pfn to 441 * dma_alloc_coherent there is only one dma_debug_entry and one
440 * track per event. dma_map_sg(), on the other hand, 442 * dma_active_cacheline entry to track per event. dma_map_sg(), on the
441 * consumes a single dma_debug_entry, but inserts 'nents' entries into 443 * other hand, consumes a single dma_debug_entry, but inserts 'nents'
442 * the tree. 444 * entries into the tree.
443 * 445 *
444 * At any time debug_dma_assert_idle() can be called to trigger a 446 * At any time debug_dma_assert_idle() can be called to trigger a
445 * warning if the given page is in the active set. 447 * warning if any cachelines in the given page are in the active set.
446 */ 448 */
447static RADIX_TREE(dma_active_pfn, GFP_NOWAIT); 449static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
448static DEFINE_SPINLOCK(radix_lock); 450static DEFINE_SPINLOCK(radix_lock);
449#define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) 451#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
452#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
453#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
450 454
451static int active_pfn_read_overlap(unsigned long pfn) 455static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
456{
457 return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
458 (entry->offset >> L1_CACHE_SHIFT);
459}
460
461static int active_cacheline_read_overlap(phys_addr_t cln)
452{ 462{
453 int overlap = 0, i; 463 int overlap = 0, i;
454 464
455 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) 465 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
456 if (radix_tree_tag_get(&dma_active_pfn, pfn, i)) 466 if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
457 overlap |= 1 << i; 467 overlap |= 1 << i;
458 return overlap; 468 return overlap;
459} 469}
460 470
461static int active_pfn_set_overlap(unsigned long pfn, int overlap) 471static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
462{ 472{
463 int i; 473 int i;
464 474
465 if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0) 475 if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
466 return overlap; 476 return overlap;
467 477
468 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) 478 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
469 if (overlap & 1 << i) 479 if (overlap & 1 << i)
470 radix_tree_tag_set(&dma_active_pfn, pfn, i); 480 radix_tree_tag_set(&dma_active_cacheline, cln, i);
471 else 481 else
472 radix_tree_tag_clear(&dma_active_pfn, pfn, i); 482 radix_tree_tag_clear(&dma_active_cacheline, cln, i);
473 483
474 return overlap; 484 return overlap;
475} 485}
476 486
477static void active_pfn_inc_overlap(unsigned long pfn) 487static void active_cacheline_inc_overlap(phys_addr_t cln)
478{ 488{
479 int overlap = active_pfn_read_overlap(pfn); 489 int overlap = active_cacheline_read_overlap(cln);
480 490
481 overlap = active_pfn_set_overlap(pfn, ++overlap); 491 overlap = active_cacheline_set_overlap(cln, ++overlap);
482 492
483 /* If we overflowed the overlap counter then we're potentially 493 /* If we overflowed the overlap counter then we're potentially
484 * leaking dma-mappings. Otherwise, if maps and unmaps are 494 * leaking dma-mappings. Otherwise, if maps and unmaps are
485 * balanced then this overflow may cause false negatives in 495 * balanced then this overflow may cause false negatives in
486 * debug_dma_assert_idle() as the pfn may be marked idle 496 * debug_dma_assert_idle() as the cacheline may be marked idle
487 * prematurely. 497 * prematurely.
488 */ 498 */
489 WARN_ONCE(overlap > ACTIVE_PFN_MAX_OVERLAP, 499 WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
490 "DMA-API: exceeded %d overlapping mappings of pfn %lx\n", 500 "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
491 ACTIVE_PFN_MAX_OVERLAP, pfn); 501 ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
492} 502}
493 503
494static int active_pfn_dec_overlap(unsigned long pfn) 504static int active_cacheline_dec_overlap(phys_addr_t cln)
495{ 505{
496 int overlap = active_pfn_read_overlap(pfn); 506 int overlap = active_cacheline_read_overlap(cln);
497 507
498 return active_pfn_set_overlap(pfn, --overlap); 508 return active_cacheline_set_overlap(cln, --overlap);
499} 509}
500 510
501static int active_pfn_insert(struct dma_debug_entry *entry) 511static int active_cacheline_insert(struct dma_debug_entry *entry)
502{ 512{
513 phys_addr_t cln = to_cacheline_number(entry);
503 unsigned long flags; 514 unsigned long flags;
504 int rc; 515 int rc;
505 516
517 /* If the device is not writing memory then we don't have any
518 * concerns about the cpu consuming stale data. This mitigates
519 * legitimate usages of overlapping mappings.
520 */
521 if (entry->direction == DMA_TO_DEVICE)
522 return 0;
523
506 spin_lock_irqsave(&radix_lock, flags); 524 spin_lock_irqsave(&radix_lock, flags);
507 rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry); 525 rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
508 if (rc == -EEXIST) 526 if (rc == -EEXIST)
509 active_pfn_inc_overlap(entry->pfn); 527 active_cacheline_inc_overlap(cln);
510 spin_unlock_irqrestore(&radix_lock, flags); 528 spin_unlock_irqrestore(&radix_lock, flags);
511 529
512 return rc; 530 return rc;
513} 531}
514 532
515static void active_pfn_remove(struct dma_debug_entry *entry) 533static void active_cacheline_remove(struct dma_debug_entry *entry)
516{ 534{
535 phys_addr_t cln = to_cacheline_number(entry);
517 unsigned long flags; 536 unsigned long flags;
518 537
538 /* ...mirror the insert case */
539 if (entry->direction == DMA_TO_DEVICE)
540 return;
541
519 spin_lock_irqsave(&radix_lock, flags); 542 spin_lock_irqsave(&radix_lock, flags);
520 /* since we are counting overlaps the final put of the 543 /* since we are counting overlaps the final put of the
521 * entry->pfn will occur when the overlap count is 0. 544 * cacheline will occur when the overlap count is 0.
522 * active_pfn_dec_overlap() returns -1 in that case 545 * active_cacheline_dec_overlap() returns -1 in that case
523 */ 546 */
524 if (active_pfn_dec_overlap(entry->pfn) < 0) 547 if (active_cacheline_dec_overlap(cln) < 0)
525 radix_tree_delete(&dma_active_pfn, entry->pfn); 548 radix_tree_delete(&dma_active_cacheline, cln);
526 spin_unlock_irqrestore(&radix_lock, flags); 549 spin_unlock_irqrestore(&radix_lock, flags);
527} 550}
528 551
529/** 552/**
530 * debug_dma_assert_idle() - assert that a page is not undergoing dma 553 * debug_dma_assert_idle() - assert that a page is not undergoing dma
531 * @page: page to lookup in the dma_active_pfn tree 554 * @page: page to lookup in the dma_active_cacheline tree
532 * 555 *
533 * Place a call to this routine in cases where the cpu touching the page 556 * Place a call to this routine in cases where the cpu touching the page
534 * before the dma completes (page is dma_unmapped) will lead to data 557 * before the dma completes (page is dma_unmapped) will lead to data
@@ -536,22 +559,38 @@ static void active_pfn_remove(struct dma_debug_entry *entry)
536 */ 559 */
537void debug_dma_assert_idle(struct page *page) 560void debug_dma_assert_idle(struct page *page)
538{ 561{
562 static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
563 struct dma_debug_entry *entry = NULL;
564 void **results = (void **) &ents;
565 unsigned int nents, i;
539 unsigned long flags; 566 unsigned long flags;
540 struct dma_debug_entry *entry; 567 phys_addr_t cln;
541 568
542 if (!page) 569 if (!page)
543 return; 570 return;
544 571
572 cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
545 spin_lock_irqsave(&radix_lock, flags); 573 spin_lock_irqsave(&radix_lock, flags);
546 entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page)); 574 nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
575 CACHELINES_PER_PAGE);
576 for (i = 0; i < nents; i++) {
577 phys_addr_t ent_cln = to_cacheline_number(ents[i]);
578
579 if (ent_cln == cln) {
580 entry = ents[i];
581 break;
582 } else if (ent_cln >= cln + CACHELINES_PER_PAGE)
583 break;
584 }
547 spin_unlock_irqrestore(&radix_lock, flags); 585 spin_unlock_irqrestore(&radix_lock, flags);
548 586
549 if (!entry) 587 if (!entry)
550 return; 588 return;
551 589
590 cln = to_cacheline_number(entry);
552 err_printk(entry->dev, entry, 591 err_printk(entry->dev, entry,
553 "DMA-API: cpu touching an active dma mapped page " 592 "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
554 "[pfn=0x%lx]\n", entry->pfn); 593 &cln);
555} 594}
556 595
557/* 596/*
@@ -568,9 +607,9 @@ static void add_dma_entry(struct dma_debug_entry *entry)
568 hash_bucket_add(bucket, entry); 607 hash_bucket_add(bucket, entry);
569 put_hash_bucket(bucket, &flags); 608 put_hash_bucket(bucket, &flags);
570 609
571 rc = active_pfn_insert(entry); 610 rc = active_cacheline_insert(entry);
572 if (rc == -ENOMEM) { 611 if (rc == -ENOMEM) {
573 pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n"); 612 pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
574 global_disable = true; 613 global_disable = true;
575 } 614 }
576 615
@@ -631,7 +670,7 @@ static void dma_entry_free(struct dma_debug_entry *entry)
631{ 670{
632 unsigned long flags; 671 unsigned long flags;
633 672
634 active_pfn_remove(entry); 673 active_cacheline_remove(entry);
635 674
636 /* 675 /*
637 * add to beginning of the list - this way the entries are 676 * add to beginning of the list - this way the entries are