diff options
Diffstat (limited to 'lib/dma-debug.c')
-rw-r--r-- | lib/dma-debug.c | 131 |
1 files changed, 85 insertions, 46 deletions
diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 2defd1308b04..98f2d7e91a91 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c | |||
@@ -424,111 +424,134 @@ void debug_dma_dump_mappings(struct device *dev) | |||
424 | EXPORT_SYMBOL(debug_dma_dump_mappings); | 424 | EXPORT_SYMBOL(debug_dma_dump_mappings); |
425 | 425 | ||
426 | /* | 426 | /* |
427 | * For each page mapped (initial page in the case of | 427 | * For each mapping (initial cacheline in the case of |
428 | * dma_alloc_coherent/dma_map_{single|page}, or each page in a | 428 | * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a |
429 | * scatterlist) insert into this tree using the pfn as the key. At | 429 | * scatterlist, or the cacheline specified in dma_map_single) insert |
430 | * into this tree using the cacheline as the key. At | ||
430 | * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If | 431 | * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If |
431 | * the pfn already exists at insertion time add a tag as a reference | 432 | * the entry already exists at insertion time add a tag as a reference |
432 | * count for the overlapping mappings. For now, the overlap tracking | 433 | * count for the overlapping mappings. For now, the overlap tracking |
433 | * just ensures that 'unmaps' balance 'maps' before marking the pfn | 434 | * just ensures that 'unmaps' balance 'maps' before marking the |
434 | * idle, but we should also be flagging overlaps as an API violation. | 435 | * cacheline idle, but we should also be flagging overlaps as an API |
436 | * violation. | ||
435 | * | 437 | * |
436 | * Memory usage is mostly constrained by the maximum number of available | 438 | * Memory usage is mostly constrained by the maximum number of available |
437 | * dma-debug entries in that we need a free dma_debug_entry before | 439 | * dma-debug entries in that we need a free dma_debug_entry before |
438 | * inserting into the tree. In the case of dma_map_{single|page} and | 440 | * inserting into the tree. In the case of dma_map_page and |
439 | * dma_alloc_coherent there is only one dma_debug_entry and one pfn to | 441 | * dma_alloc_coherent there is only one dma_debug_entry and one |
440 | * track per event. dma_map_sg(), on the other hand, | 442 | * dma_active_cacheline entry to track per event. dma_map_sg(), on the |
441 | * consumes a single dma_debug_entry, but inserts 'nents' entries into | 443 | * other hand, consumes a single dma_debug_entry, but inserts 'nents' |
442 | * the tree. | 444 | * entries into the tree. |
443 | * | 445 | * |
444 | * At any time debug_dma_assert_idle() can be called to trigger a | 446 | * At any time debug_dma_assert_idle() can be called to trigger a |
445 | * warning if the given page is in the active set. | 447 | * warning if any cachelines in the given page are in the active set. |
446 | */ | 448 | */ |
447 | static RADIX_TREE(dma_active_pfn, GFP_NOWAIT); | 449 | static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); |
448 | static DEFINE_SPINLOCK(radix_lock); | 450 | static DEFINE_SPINLOCK(radix_lock); |
449 | #define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) | 451 | #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) |
452 | #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) | ||
453 | #define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) | ||
450 | 454 | ||
451 | static int active_pfn_read_overlap(unsigned long pfn) | 455 | static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) |
456 | { | ||
457 | return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + | ||
458 | (entry->offset >> L1_CACHE_SHIFT); | ||
459 | } | ||
460 | |||
461 | static int active_cacheline_read_overlap(phys_addr_t cln) | ||
452 | { | 462 | { |
453 | int overlap = 0, i; | 463 | int overlap = 0, i; |
454 | 464 | ||
455 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) | 465 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) |
456 | if (radix_tree_tag_get(&dma_active_pfn, pfn, i)) | 466 | if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) |
457 | overlap |= 1 << i; | 467 | overlap |= 1 << i; |
458 | return overlap; | 468 | return overlap; |
459 | } | 469 | } |
460 | 470 | ||
461 | static int active_pfn_set_overlap(unsigned long pfn, int overlap) | 471 | static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) |
462 | { | 472 | { |
463 | int i; | 473 | int i; |
464 | 474 | ||
465 | if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0) | 475 | if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) |
466 | return overlap; | 476 | return overlap; |
467 | 477 | ||
468 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) | 478 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) |
469 | if (overlap & 1 << i) | 479 | if (overlap & 1 << i) |
470 | radix_tree_tag_set(&dma_active_pfn, pfn, i); | 480 | radix_tree_tag_set(&dma_active_cacheline, cln, i); |
471 | else | 481 | else |
472 | radix_tree_tag_clear(&dma_active_pfn, pfn, i); | 482 | radix_tree_tag_clear(&dma_active_cacheline, cln, i); |
473 | 483 | ||
474 | return overlap; | 484 | return overlap; |
475 | } | 485 | } |
476 | 486 | ||
477 | static void active_pfn_inc_overlap(unsigned long pfn) | 487 | static void active_cacheline_inc_overlap(phys_addr_t cln) |
478 | { | 488 | { |
479 | int overlap = active_pfn_read_overlap(pfn); | 489 | int overlap = active_cacheline_read_overlap(cln); |
480 | 490 | ||
481 | overlap = active_pfn_set_overlap(pfn, ++overlap); | 491 | overlap = active_cacheline_set_overlap(cln, ++overlap); |
482 | 492 | ||
483 | /* If we overflowed the overlap counter then we're potentially | 493 | /* If we overflowed the overlap counter then we're potentially |
484 | * leaking dma-mappings. Otherwise, if maps and unmaps are | 494 | * leaking dma-mappings. Otherwise, if maps and unmaps are |
485 | * balanced then this overflow may cause false negatives in | 495 | * balanced then this overflow may cause false negatives in |
486 | * debug_dma_assert_idle() as the pfn may be marked idle | 496 | * debug_dma_assert_idle() as the cacheline may be marked idle |
487 | * prematurely. | 497 | * prematurely. |
488 | */ | 498 | */ |
489 | WARN_ONCE(overlap > ACTIVE_PFN_MAX_OVERLAP, | 499 | WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, |
490 | "DMA-API: exceeded %d overlapping mappings of pfn %lx\n", | 500 | "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", |
491 | ACTIVE_PFN_MAX_OVERLAP, pfn); | 501 | ACTIVE_CACHELINE_MAX_OVERLAP, &cln); |
492 | } | 502 | } |
493 | 503 | ||
494 | static int active_pfn_dec_overlap(unsigned long pfn) | 504 | static int active_cacheline_dec_overlap(phys_addr_t cln) |
495 | { | 505 | { |
496 | int overlap = active_pfn_read_overlap(pfn); | 506 | int overlap = active_cacheline_read_overlap(cln); |
497 | 507 | ||
498 | return active_pfn_set_overlap(pfn, --overlap); | 508 | return active_cacheline_set_overlap(cln, --overlap); |
499 | } | 509 | } |
500 | 510 | ||
501 | static int active_pfn_insert(struct dma_debug_entry *entry) | 511 | static int active_cacheline_insert(struct dma_debug_entry *entry) |
502 | { | 512 | { |
513 | phys_addr_t cln = to_cacheline_number(entry); | ||
503 | unsigned long flags; | 514 | unsigned long flags; |
504 | int rc; | 515 | int rc; |
505 | 516 | ||
517 | /* If the device is not writing memory then we don't have any | ||
518 | * concerns about the cpu consuming stale data. This mitigates | ||
519 | * legitimate usages of overlapping mappings. | ||
520 | */ | ||
521 | if (entry->direction == DMA_TO_DEVICE) | ||
522 | return 0; | ||
523 | |||
506 | spin_lock_irqsave(&radix_lock, flags); | 524 | spin_lock_irqsave(&radix_lock, flags); |
507 | rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry); | 525 | rc = radix_tree_insert(&dma_active_cacheline, cln, entry); |
508 | if (rc == -EEXIST) | 526 | if (rc == -EEXIST) |
509 | active_pfn_inc_overlap(entry->pfn); | 527 | active_cacheline_inc_overlap(cln); |
510 | spin_unlock_irqrestore(&radix_lock, flags); | 528 | spin_unlock_irqrestore(&radix_lock, flags); |
511 | 529 | ||
512 | return rc; | 530 | return rc; |
513 | } | 531 | } |
514 | 532 | ||
515 | static void active_pfn_remove(struct dma_debug_entry *entry) | 533 | static void active_cacheline_remove(struct dma_debug_entry *entry) |
516 | { | 534 | { |
535 | phys_addr_t cln = to_cacheline_number(entry); | ||
517 | unsigned long flags; | 536 | unsigned long flags; |
518 | 537 | ||
538 | /* ...mirror the insert case */ | ||
539 | if (entry->direction == DMA_TO_DEVICE) | ||
540 | return; | ||
541 | |||
519 | spin_lock_irqsave(&radix_lock, flags); | 542 | spin_lock_irqsave(&radix_lock, flags); |
520 | /* since we are counting overlaps the final put of the | 543 | /* since we are counting overlaps the final put of the |
521 | * entry->pfn will occur when the overlap count is 0. | 544 | * cacheline will occur when the overlap count is 0. |
522 | * active_pfn_dec_overlap() returns -1 in that case | 545 | * active_cacheline_dec_overlap() returns -1 in that case |
523 | */ | 546 | */ |
524 | if (active_pfn_dec_overlap(entry->pfn) < 0) | 547 | if (active_cacheline_dec_overlap(cln) < 0) |
525 | radix_tree_delete(&dma_active_pfn, entry->pfn); | 548 | radix_tree_delete(&dma_active_cacheline, cln); |
526 | spin_unlock_irqrestore(&radix_lock, flags); | 549 | spin_unlock_irqrestore(&radix_lock, flags); |
527 | } | 550 | } |
528 | 551 | ||
529 | /** | 552 | /** |
530 | * debug_dma_assert_idle() - assert that a page is not undergoing dma | 553 | * debug_dma_assert_idle() - assert that a page is not undergoing dma |
531 | * @page: page to lookup in the dma_active_pfn tree | 554 | * @page: page to lookup in the dma_active_cacheline tree |
532 | * | 555 | * |
533 | * Place a call to this routine in cases where the cpu touching the page | 556 | * Place a call to this routine in cases where the cpu touching the page |
534 | * before the dma completes (page is dma_unmapped) will lead to data | 557 | * before the dma completes (page is dma_unmapped) will lead to data |
@@ -536,22 +559,38 @@ static void active_pfn_remove(struct dma_debug_entry *entry) | |||
536 | */ | 559 | */ |
537 | void debug_dma_assert_idle(struct page *page) | 560 | void debug_dma_assert_idle(struct page *page) |
538 | { | 561 | { |
562 | static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; | ||
563 | struct dma_debug_entry *entry = NULL; | ||
564 | void **results = (void **) &ents; | ||
565 | unsigned int nents, i; | ||
539 | unsigned long flags; | 566 | unsigned long flags; |
540 | struct dma_debug_entry *entry; | 567 | phys_addr_t cln; |
541 | 568 | ||
542 | if (!page) | 569 | if (!page) |
543 | return; | 570 | return; |
544 | 571 | ||
572 | cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; | ||
545 | spin_lock_irqsave(&radix_lock, flags); | 573 | spin_lock_irqsave(&radix_lock, flags); |
546 | entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page)); | 574 | nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, |
575 | CACHELINES_PER_PAGE); | ||
576 | for (i = 0; i < nents; i++) { | ||
577 | phys_addr_t ent_cln = to_cacheline_number(ents[i]); | ||
578 | |||
579 | if (ent_cln == cln) { | ||
580 | entry = ents[i]; | ||
581 | break; | ||
582 | } else if (ent_cln >= cln + CACHELINES_PER_PAGE) | ||
583 | break; | ||
584 | } | ||
547 | spin_unlock_irqrestore(&radix_lock, flags); | 585 | spin_unlock_irqrestore(&radix_lock, flags); |
548 | 586 | ||
549 | if (!entry) | 587 | if (!entry) |
550 | return; | 588 | return; |
551 | 589 | ||
590 | cln = to_cacheline_number(entry); | ||
552 | err_printk(entry->dev, entry, | 591 | err_printk(entry->dev, entry, |
553 | "DMA-API: cpu touching an active dma mapped page " | 592 | "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", |
554 | "[pfn=0x%lx]\n", entry->pfn); | 593 | &cln); |
555 | } | 594 | } |
556 | 595 | ||
557 | /* | 596 | /* |
@@ -568,9 +607,9 @@ static void add_dma_entry(struct dma_debug_entry *entry) | |||
568 | hash_bucket_add(bucket, entry); | 607 | hash_bucket_add(bucket, entry); |
569 | put_hash_bucket(bucket, &flags); | 608 | put_hash_bucket(bucket, &flags); |
570 | 609 | ||
571 | rc = active_pfn_insert(entry); | 610 | rc = active_cacheline_insert(entry); |
572 | if (rc == -ENOMEM) { | 611 | if (rc == -ENOMEM) { |
573 | pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n"); | 612 | pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); |
574 | global_disable = true; | 613 | global_disable = true; |
575 | } | 614 | } |
576 | 615 | ||
@@ -631,7 +670,7 @@ static void dma_entry_free(struct dma_debug_entry *entry) | |||
631 | { | 670 | { |
632 | unsigned long flags; | 671 | unsigned long flags; |
633 | 672 | ||
634 | active_pfn_remove(entry); | 673 | active_cacheline_remove(entry); |
635 | 674 | ||
636 | /* | 675 | /* |
637 | * add to beginning of the list - this way the entries are | 676 | * add to beginning of the list - this way the entries are |