diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2014-06-08 02:24:07 -0400 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2014-06-08 02:24:07 -0400 |
| commit | a292241cccb7e20e8b997a9a44177e7c98141859 (patch) | |
| tree | a0b0bb95e7dce3233a2d8b203f9e326cdec7a00e /lib/dma-debug.c | |
| parent | d49cb7aeebb974713f9f7ab2991352d3050b095b (diff) | |
| parent | 68807a0c2015cb40df4869e16651f0ce5cc14d52 (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 3.16.
Diffstat (limited to 'lib/dma-debug.c')
| -rw-r--r-- | lib/dma-debug.c | 131 |
1 files changed, 85 insertions, 46 deletions
diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 2defd1308b04..98f2d7e91a91 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c | |||
| @@ -424,111 +424,134 @@ void debug_dma_dump_mappings(struct device *dev) | |||
| 424 | EXPORT_SYMBOL(debug_dma_dump_mappings); | 424 | EXPORT_SYMBOL(debug_dma_dump_mappings); |
| 425 | 425 | ||
| 426 | /* | 426 | /* |
| 427 | * For each page mapped (initial page in the case of | 427 | * For each mapping (initial cacheline in the case of |
| 428 | * dma_alloc_coherent/dma_map_{single|page}, or each page in a | 428 | * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a |
| 429 | * scatterlist) insert into this tree using the pfn as the key. At | 429 | * scatterlist, or the cacheline specified in dma_map_single) insert |
| 430 | * into this tree using the cacheline as the key. At | ||
| 430 | * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If | 431 | * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If |
| 431 | * the pfn already exists at insertion time add a tag as a reference | 432 | * the entry already exists at insertion time add a tag as a reference |
| 432 | * count for the overlapping mappings. For now, the overlap tracking | 433 | * count for the overlapping mappings. For now, the overlap tracking |
| 433 | * just ensures that 'unmaps' balance 'maps' before marking the pfn | 434 | * just ensures that 'unmaps' balance 'maps' before marking the |
| 434 | * idle, but we should also be flagging overlaps as an API violation. | 435 | * cacheline idle, but we should also be flagging overlaps as an API |
| 436 | * violation. | ||
| 435 | * | 437 | * |
| 436 | * Memory usage is mostly constrained by the maximum number of available | 438 | * Memory usage is mostly constrained by the maximum number of available |
| 437 | * dma-debug entries in that we need a free dma_debug_entry before | 439 | * dma-debug entries in that we need a free dma_debug_entry before |
| 438 | * inserting into the tree. In the case of dma_map_{single|page} and | 440 | * inserting into the tree. In the case of dma_map_page and |
| 439 | * dma_alloc_coherent there is only one dma_debug_entry and one pfn to | 441 | * dma_alloc_coherent there is only one dma_debug_entry and one |
| 440 | * track per event. dma_map_sg(), on the other hand, | 442 | * dma_active_cacheline entry to track per event. dma_map_sg(), on the |
| 441 | * consumes a single dma_debug_entry, but inserts 'nents' entries into | 443 | * other hand, consumes a single dma_debug_entry, but inserts 'nents' |
| 442 | * the tree. | 444 | * entries into the tree. |
| 443 | * | 445 | * |
| 444 | * At any time debug_dma_assert_idle() can be called to trigger a | 446 | * At any time debug_dma_assert_idle() can be called to trigger a |
| 445 | * warning if the given page is in the active set. | 447 | * warning if any cachelines in the given page are in the active set. |
| 446 | */ | 448 | */ |
| 447 | static RADIX_TREE(dma_active_pfn, GFP_NOWAIT); | 449 | static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); |
| 448 | static DEFINE_SPINLOCK(radix_lock); | 450 | static DEFINE_SPINLOCK(radix_lock); |
| 449 | #define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) | 451 | #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) |
| 452 | #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) | ||
| 453 | #define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) | ||
| 450 | 454 | ||
| 451 | static int active_pfn_read_overlap(unsigned long pfn) | 455 | static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) |
| 456 | { | ||
| 457 | return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + | ||
| 458 | (entry->offset >> L1_CACHE_SHIFT); | ||
| 459 | } | ||
| 460 | |||
| 461 | static int active_cacheline_read_overlap(phys_addr_t cln) | ||
| 452 | { | 462 | { |
| 453 | int overlap = 0, i; | 463 | int overlap = 0, i; |
| 454 | 464 | ||
| 455 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) | 465 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) |
| 456 | if (radix_tree_tag_get(&dma_active_pfn, pfn, i)) | 466 | if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) |
| 457 | overlap |= 1 << i; | 467 | overlap |= 1 << i; |
| 458 | return overlap; | 468 | return overlap; |
| 459 | } | 469 | } |
| 460 | 470 | ||
| 461 | static int active_pfn_set_overlap(unsigned long pfn, int overlap) | 471 | static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) |
| 462 | { | 472 | { |
| 463 | int i; | 473 | int i; |
| 464 | 474 | ||
| 465 | if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0) | 475 | if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) |
| 466 | return overlap; | 476 | return overlap; |
| 467 | 477 | ||
| 468 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) | 478 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) |
| 469 | if (overlap & 1 << i) | 479 | if (overlap & 1 << i) |
| 470 | radix_tree_tag_set(&dma_active_pfn, pfn, i); | 480 | radix_tree_tag_set(&dma_active_cacheline, cln, i); |
| 471 | else | 481 | else |
| 472 | radix_tree_tag_clear(&dma_active_pfn, pfn, i); | 482 | radix_tree_tag_clear(&dma_active_cacheline, cln, i); |
| 473 | 483 | ||
| 474 | return overlap; | 484 | return overlap; |
| 475 | } | 485 | } |
| 476 | 486 | ||
| 477 | static void active_pfn_inc_overlap(unsigned long pfn) | 487 | static void active_cacheline_inc_overlap(phys_addr_t cln) |
| 478 | { | 488 | { |
| 479 | int overlap = active_pfn_read_overlap(pfn); | 489 | int overlap = active_cacheline_read_overlap(cln); |
| 480 | 490 | ||
| 481 | overlap = active_pfn_set_overlap(pfn, ++overlap); | 491 | overlap = active_cacheline_set_overlap(cln, ++overlap); |
| 482 | 492 | ||
| 483 | /* If we overflowed the overlap counter then we're potentially | 493 | /* If we overflowed the overlap counter then we're potentially |
| 484 | * leaking dma-mappings. Otherwise, if maps and unmaps are | 494 | * leaking dma-mappings. Otherwise, if maps and unmaps are |
| 485 | * balanced then this overflow may cause false negatives in | 495 | * balanced then this overflow may cause false negatives in |
| 486 | * debug_dma_assert_idle() as the pfn may be marked idle | 496 | * debug_dma_assert_idle() as the cacheline may be marked idle |
| 487 | * prematurely. | 497 | * prematurely. |
| 488 | */ | 498 | */ |
| 489 | WARN_ONCE(overlap > ACTIVE_PFN_MAX_OVERLAP, | 499 | WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, |
| 490 | "DMA-API: exceeded %d overlapping mappings of pfn %lx\n", | 500 | "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", |
| 491 | ACTIVE_PFN_MAX_OVERLAP, pfn); | 501 | ACTIVE_CACHELINE_MAX_OVERLAP, &cln); |
| 492 | } | 502 | } |
| 493 | 503 | ||
| 494 | static int active_pfn_dec_overlap(unsigned long pfn) | 504 | static int active_cacheline_dec_overlap(phys_addr_t cln) |
| 495 | { | 505 | { |
| 496 | int overlap = active_pfn_read_overlap(pfn); | 506 | int overlap = active_cacheline_read_overlap(cln); |
| 497 | 507 | ||
| 498 | return active_pfn_set_overlap(pfn, --overlap); | 508 | return active_cacheline_set_overlap(cln, --overlap); |
| 499 | } | 509 | } |
| 500 | 510 | ||
| 501 | static int active_pfn_insert(struct dma_debug_entry *entry) | 511 | static int active_cacheline_insert(struct dma_debug_entry *entry) |
| 502 | { | 512 | { |
| 513 | phys_addr_t cln = to_cacheline_number(entry); | ||
| 503 | unsigned long flags; | 514 | unsigned long flags; |
| 504 | int rc; | 515 | int rc; |
| 505 | 516 | ||
| 517 | /* If the device is not writing memory then we don't have any | ||
| 518 | * concerns about the cpu consuming stale data. This mitigates | ||
| 519 | * legitimate usages of overlapping mappings. | ||
| 520 | */ | ||
| 521 | if (entry->direction == DMA_TO_DEVICE) | ||
| 522 | return 0; | ||
| 523 | |||
| 506 | spin_lock_irqsave(&radix_lock, flags); | 524 | spin_lock_irqsave(&radix_lock, flags); |
| 507 | rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry); | 525 | rc = radix_tree_insert(&dma_active_cacheline, cln, entry); |
| 508 | if (rc == -EEXIST) | 526 | if (rc == -EEXIST) |
| 509 | active_pfn_inc_overlap(entry->pfn); | 527 | active_cacheline_inc_overlap(cln); |
| 510 | spin_unlock_irqrestore(&radix_lock, flags); | 528 | spin_unlock_irqrestore(&radix_lock, flags); |
| 511 | 529 | ||
| 512 | return rc; | 530 | return rc; |
| 513 | } | 531 | } |
| 514 | 532 | ||
| 515 | static void active_pfn_remove(struct dma_debug_entry *entry) | 533 | static void active_cacheline_remove(struct dma_debug_entry *entry) |
| 516 | { | 534 | { |
| 535 | phys_addr_t cln = to_cacheline_number(entry); | ||
| 517 | unsigned long flags; | 536 | unsigned long flags; |
| 518 | 537 | ||
| 538 | /* ...mirror the insert case */ | ||
| 539 | if (entry->direction == DMA_TO_DEVICE) | ||
| 540 | return; | ||
| 541 | |||
| 519 | spin_lock_irqsave(&radix_lock, flags); | 542 | spin_lock_irqsave(&radix_lock, flags); |
| 520 | /* since we are counting overlaps the final put of the | 543 | /* since we are counting overlaps the final put of the |
| 521 | * entry->pfn will occur when the overlap count is 0. | 544 | * cacheline will occur when the overlap count is 0. |
| 522 | * active_pfn_dec_overlap() returns -1 in that case | 545 | * active_cacheline_dec_overlap() returns -1 in that case |
| 523 | */ | 546 | */ |
| 524 | if (active_pfn_dec_overlap(entry->pfn) < 0) | 547 | if (active_cacheline_dec_overlap(cln) < 0) |
| 525 | radix_tree_delete(&dma_active_pfn, entry->pfn); | 548 | radix_tree_delete(&dma_active_cacheline, cln); |
| 526 | spin_unlock_irqrestore(&radix_lock, flags); | 549 | spin_unlock_irqrestore(&radix_lock, flags); |
| 527 | } | 550 | } |
| 528 | 551 | ||
| 529 | /** | 552 | /** |
| 530 | * debug_dma_assert_idle() - assert that a page is not undergoing dma | 553 | * debug_dma_assert_idle() - assert that a page is not undergoing dma |
| 531 | * @page: page to lookup in the dma_active_pfn tree | 554 | * @page: page to lookup in the dma_active_cacheline tree |
| 532 | * | 555 | * |
| 533 | * Place a call to this routine in cases where the cpu touching the page | 556 | * Place a call to this routine in cases where the cpu touching the page |
| 534 | * before the dma completes (page is dma_unmapped) will lead to data | 557 | * before the dma completes (page is dma_unmapped) will lead to data |
| @@ -536,22 +559,38 @@ static void active_pfn_remove(struct dma_debug_entry *entry) | |||
| 536 | */ | 559 | */ |
| 537 | void debug_dma_assert_idle(struct page *page) | 560 | void debug_dma_assert_idle(struct page *page) |
| 538 | { | 561 | { |
| 562 | static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; | ||
| 563 | struct dma_debug_entry *entry = NULL; | ||
| 564 | void **results = (void **) &ents; | ||
| 565 | unsigned int nents, i; | ||
| 539 | unsigned long flags; | 566 | unsigned long flags; |
| 540 | struct dma_debug_entry *entry; | 567 | phys_addr_t cln; |
| 541 | 568 | ||
| 542 | if (!page) | 569 | if (!page) |
| 543 | return; | 570 | return; |
| 544 | 571 | ||
| 572 | cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; | ||
| 545 | spin_lock_irqsave(&radix_lock, flags); | 573 | spin_lock_irqsave(&radix_lock, flags); |
| 546 | entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page)); | 574 | nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, |
| 575 | CACHELINES_PER_PAGE); | ||
| 576 | for (i = 0; i < nents; i++) { | ||
| 577 | phys_addr_t ent_cln = to_cacheline_number(ents[i]); | ||
| 578 | |||
| 579 | if (ent_cln == cln) { | ||
| 580 | entry = ents[i]; | ||
| 581 | break; | ||
| 582 | } else if (ent_cln >= cln + CACHELINES_PER_PAGE) | ||
| 583 | break; | ||
| 584 | } | ||
| 547 | spin_unlock_irqrestore(&radix_lock, flags); | 585 | spin_unlock_irqrestore(&radix_lock, flags); |
| 548 | 586 | ||
| 549 | if (!entry) | 587 | if (!entry) |
| 550 | return; | 588 | return; |
| 551 | 589 | ||
| 590 | cln = to_cacheline_number(entry); | ||
| 552 | err_printk(entry->dev, entry, | 591 | err_printk(entry->dev, entry, |
| 553 | "DMA-API: cpu touching an active dma mapped page " | 592 | "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", |
| 554 | "[pfn=0x%lx]\n", entry->pfn); | 593 | &cln); |
| 555 | } | 594 | } |
| 556 | 595 | ||
| 557 | /* | 596 | /* |
| @@ -568,9 +607,9 @@ static void add_dma_entry(struct dma_debug_entry *entry) | |||
| 568 | hash_bucket_add(bucket, entry); | 607 | hash_bucket_add(bucket, entry); |
| 569 | put_hash_bucket(bucket, &flags); | 608 | put_hash_bucket(bucket, &flags); |
| 570 | 609 | ||
| 571 | rc = active_pfn_insert(entry); | 610 | rc = active_cacheline_insert(entry); |
| 572 | if (rc == -ENOMEM) { | 611 | if (rc == -ENOMEM) { |
| 573 | pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n"); | 612 | pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); |
| 574 | global_disable = true; | 613 | global_disable = true; |
| 575 | } | 614 | } |
| 576 | 615 | ||
| @@ -631,7 +670,7 @@ static void dma_entry_free(struct dma_debug_entry *entry) | |||
| 631 | { | 670 | { |
| 632 | unsigned long flags; | 671 | unsigned long flags; |
| 633 | 672 | ||
| 634 | active_pfn_remove(entry); | 673 | active_cacheline_remove(entry); |
| 635 | 674 | ||
| 636 | /* | 675 | /* |
| 637 | * add to beginning of the list - this way the entries are | 676 | * add to beginning of the list - this way the entries are |
