diff options
Diffstat (limited to 'lib/dma-debug.c')
-rw-r--r-- | lib/dma-debug.c | 236 |
1 files changed, 223 insertions, 13 deletions
diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d87a17a819d0..98f2d7e91a91 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c | |||
@@ -53,11 +53,26 @@ enum map_err_types { | |||
53 | 53 | ||
54 | #define DMA_DEBUG_STACKTRACE_ENTRIES 5 | 54 | #define DMA_DEBUG_STACKTRACE_ENTRIES 5 |
55 | 55 | ||
56 | /** | ||
57 | * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping | ||
58 | * @list: node on pre-allocated free_entries list | ||
59 | * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent | ||
60 | * @type: single, page, sg, coherent | ||
61 | * @pfn: page frame of the start address | ||
62 | * @offset: offset of mapping relative to pfn | ||
63 | * @size: length of the mapping | ||
64 | * @direction: enum dma_data_direction | ||
65 | * @sg_call_ents: 'nents' from dma_map_sg | ||
66 | * @sg_mapped_ents: 'mapped_ents' from dma_map_sg | ||
67 | * @map_err_type: track whether dma_mapping_error() was checked | ||
68 | * @stacktrace: support backtraces when a violation is detected | ||
69 | */ | ||
56 | struct dma_debug_entry { | 70 | struct dma_debug_entry { |
57 | struct list_head list; | 71 | struct list_head list; |
58 | struct device *dev; | 72 | struct device *dev; |
59 | int type; | 73 | int type; |
60 | phys_addr_t paddr; | 74 | unsigned long pfn; |
75 | size_t offset; | ||
61 | u64 dev_addr; | 76 | u64 dev_addr; |
62 | u64 size; | 77 | u64 size; |
63 | int direction; | 78 | int direction; |
@@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry) | |||
372 | list_del(&entry->list); | 387 | list_del(&entry->list); |
373 | } | 388 | } |
374 | 389 | ||
390 | static unsigned long long phys_addr(struct dma_debug_entry *entry) | ||
391 | { | ||
392 | return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; | ||
393 | } | ||
394 | |||
375 | /* | 395 | /* |
376 | * Dump mapping entries for debugging purposes | 396 | * Dump mapping entries for debugging purposes |
377 | */ | 397 | */ |
@@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev) | |||
389 | list_for_each_entry(entry, &bucket->list, list) { | 409 | list_for_each_entry(entry, &bucket->list, list) { |
390 | if (!dev || dev == entry->dev) { | 410 | if (!dev || dev == entry->dev) { |
391 | dev_info(entry->dev, | 411 | dev_info(entry->dev, |
392 | "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", | 412 | "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", |
393 | type2name[entry->type], idx, | 413 | type2name[entry->type], idx, |
394 | (unsigned long long)entry->paddr, | 414 | phys_addr(entry), entry->pfn, |
395 | entry->dev_addr, entry->size, | 415 | entry->dev_addr, entry->size, |
396 | dir2name[entry->direction], | 416 | dir2name[entry->direction], |
397 | maperr2str[entry->map_err_type]); | 417 | maperr2str[entry->map_err_type]); |
@@ -404,6 +424,176 @@ void debug_dma_dump_mappings(struct device *dev) | |||
404 | EXPORT_SYMBOL(debug_dma_dump_mappings); | 424 | EXPORT_SYMBOL(debug_dma_dump_mappings); |
405 | 425 | ||
406 | /* | 426 | /* |
427 | * For each mapping (initial cacheline in the case of | ||
428 | * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a | ||
429 | * scatterlist, or the cacheline specified in dma_map_single) insert | ||
430 | * into this tree using the cacheline as the key. At | ||
431 | * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If | ||
432 | * the entry already exists at insertion time add a tag as a reference | ||
433 | * count for the overlapping mappings. For now, the overlap tracking | ||
434 | * just ensures that 'unmaps' balance 'maps' before marking the | ||
435 | * cacheline idle, but we should also be flagging overlaps as an API | ||
436 | * violation. | ||
437 | * | ||
438 | * Memory usage is mostly constrained by the maximum number of available | ||
439 | * dma-debug entries in that we need a free dma_debug_entry before | ||
440 | * inserting into the tree. In the case of dma_map_page and | ||
441 | * dma_alloc_coherent there is only one dma_debug_entry and one | ||
442 | * dma_active_cacheline entry to track per event. dma_map_sg(), on the | ||
443 | * other hand, consumes a single dma_debug_entry, but inserts 'nents' | ||
444 | * entries into the tree. | ||
445 | * | ||
446 | * At any time debug_dma_assert_idle() can be called to trigger a | ||
447 | * warning if any cachelines in the given page are in the active set. | ||
448 | */ | ||
449 | static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); | ||
450 | static DEFINE_SPINLOCK(radix_lock); | ||
451 | #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) | ||
452 | #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) | ||
453 | #define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) | ||
454 | |||
455 | static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) | ||
456 | { | ||
457 | return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + | ||
458 | (entry->offset >> L1_CACHE_SHIFT); | ||
459 | } | ||
460 | |||
461 | static int active_cacheline_read_overlap(phys_addr_t cln) | ||
462 | { | ||
463 | int overlap = 0, i; | ||
464 | |||
465 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) | ||
466 | if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) | ||
467 | overlap |= 1 << i; | ||
468 | return overlap; | ||
469 | } | ||
470 | |||
471 | static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) | ||
472 | { | ||
473 | int i; | ||
474 | |||
475 | if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) | ||
476 | return overlap; | ||
477 | |||
478 | for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) | ||
479 | if (overlap & 1 << i) | ||
480 | radix_tree_tag_set(&dma_active_cacheline, cln, i); | ||
481 | else | ||
482 | radix_tree_tag_clear(&dma_active_cacheline, cln, i); | ||
483 | |||
484 | return overlap; | ||
485 | } | ||
486 | |||
487 | static void active_cacheline_inc_overlap(phys_addr_t cln) | ||
488 | { | ||
489 | int overlap = active_cacheline_read_overlap(cln); | ||
490 | |||
491 | overlap = active_cacheline_set_overlap(cln, ++overlap); | ||
492 | |||
493 | /* If we overflowed the overlap counter then we're potentially | ||
494 | * leaking dma-mappings. Otherwise, if maps and unmaps are | ||
495 | * balanced then this overflow may cause false negatives in | ||
496 | * debug_dma_assert_idle() as the cacheline may be marked idle | ||
497 | * prematurely. | ||
498 | */ | ||
499 | WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, | ||
500 | "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", | ||
501 | ACTIVE_CACHELINE_MAX_OVERLAP, &cln); | ||
502 | } | ||
503 | |||
504 | static int active_cacheline_dec_overlap(phys_addr_t cln) | ||
505 | { | ||
506 | int overlap = active_cacheline_read_overlap(cln); | ||
507 | |||
508 | return active_cacheline_set_overlap(cln, --overlap); | ||
509 | } | ||
510 | |||
511 | static int active_cacheline_insert(struct dma_debug_entry *entry) | ||
512 | { | ||
513 | phys_addr_t cln = to_cacheline_number(entry); | ||
514 | unsigned long flags; | ||
515 | int rc; | ||
516 | |||
517 | /* If the device is not writing memory then we don't have any | ||
518 | * concerns about the cpu consuming stale data. This mitigates | ||
519 | * legitimate usages of overlapping mappings. | ||
520 | */ | ||
521 | if (entry->direction == DMA_TO_DEVICE) | ||
522 | return 0; | ||
523 | |||
524 | spin_lock_irqsave(&radix_lock, flags); | ||
525 | rc = radix_tree_insert(&dma_active_cacheline, cln, entry); | ||
526 | if (rc == -EEXIST) | ||
527 | active_cacheline_inc_overlap(cln); | ||
528 | spin_unlock_irqrestore(&radix_lock, flags); | ||
529 | |||
530 | return rc; | ||
531 | } | ||
532 | |||
533 | static void active_cacheline_remove(struct dma_debug_entry *entry) | ||
534 | { | ||
535 | phys_addr_t cln = to_cacheline_number(entry); | ||
536 | unsigned long flags; | ||
537 | |||
538 | /* ...mirror the insert case */ | ||
539 | if (entry->direction == DMA_TO_DEVICE) | ||
540 | return; | ||
541 | |||
542 | spin_lock_irqsave(&radix_lock, flags); | ||
543 | /* since we are counting overlaps the final put of the | ||
544 | * cacheline will occur when the overlap count is 0. | ||
545 | * active_cacheline_dec_overlap() returns -1 in that case | ||
546 | */ | ||
547 | if (active_cacheline_dec_overlap(cln) < 0) | ||
548 | radix_tree_delete(&dma_active_cacheline, cln); | ||
549 | spin_unlock_irqrestore(&radix_lock, flags); | ||
550 | } | ||
551 | |||
552 | /** | ||
553 | * debug_dma_assert_idle() - assert that a page is not undergoing dma | ||
554 | * @page: page to lookup in the dma_active_cacheline tree | ||
555 | * | ||
556 | * Place a call to this routine in cases where the cpu touching the page | ||
557 | * before the dma completes (page is dma_unmapped) will lead to data | ||
558 | * corruption. | ||
559 | */ | ||
560 | void debug_dma_assert_idle(struct page *page) | ||
561 | { | ||
562 | static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; | ||
563 | struct dma_debug_entry *entry = NULL; | ||
564 | void **results = (void **) &ents; | ||
565 | unsigned int nents, i; | ||
566 | unsigned long flags; | ||
567 | phys_addr_t cln; | ||
568 | |||
569 | if (!page) | ||
570 | return; | ||
571 | |||
572 | cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; | ||
573 | spin_lock_irqsave(&radix_lock, flags); | ||
574 | nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, | ||
575 | CACHELINES_PER_PAGE); | ||
576 | for (i = 0; i < nents; i++) { | ||
577 | phys_addr_t ent_cln = to_cacheline_number(ents[i]); | ||
578 | |||
579 | if (ent_cln == cln) { | ||
580 | entry = ents[i]; | ||
581 | break; | ||
582 | } else if (ent_cln >= cln + CACHELINES_PER_PAGE) | ||
583 | break; | ||
584 | } | ||
585 | spin_unlock_irqrestore(&radix_lock, flags); | ||
586 | |||
587 | if (!entry) | ||
588 | return; | ||
589 | |||
590 | cln = to_cacheline_number(entry); | ||
591 | err_printk(entry->dev, entry, | ||
592 | "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", | ||
593 | &cln); | ||
594 | } | ||
595 | |||
596 | /* | ||
407 | * Wrapper function for adding an entry to the hash. | 597 | * Wrapper function for adding an entry to the hash. |
408 | * This function takes care of locking itself. | 598 | * This function takes care of locking itself. |
409 | */ | 599 | */ |
@@ -411,10 +601,21 @@ static void add_dma_entry(struct dma_debug_entry *entry) | |||
411 | { | 601 | { |
412 | struct hash_bucket *bucket; | 602 | struct hash_bucket *bucket; |
413 | unsigned long flags; | 603 | unsigned long flags; |
604 | int rc; | ||
414 | 605 | ||
415 | bucket = get_hash_bucket(entry, &flags); | 606 | bucket = get_hash_bucket(entry, &flags); |
416 | hash_bucket_add(bucket, entry); | 607 | hash_bucket_add(bucket, entry); |
417 | put_hash_bucket(bucket, &flags); | 608 | put_hash_bucket(bucket, &flags); |
609 | |||
610 | rc = active_cacheline_insert(entry); | ||
611 | if (rc == -ENOMEM) { | ||
612 | pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); | ||
613 | global_disable = true; | ||
614 | } | ||
615 | |||
616 | /* TODO: report -EEXIST errors here as overlapping mappings are | ||
617 | * not supported by the DMA API | ||
618 | */ | ||
418 | } | 619 | } |
419 | 620 | ||
420 | static struct dma_debug_entry *__dma_entry_alloc(void) | 621 | static struct dma_debug_entry *__dma_entry_alloc(void) |
@@ -469,6 +670,8 @@ static void dma_entry_free(struct dma_debug_entry *entry) | |||
469 | { | 670 | { |
470 | unsigned long flags; | 671 | unsigned long flags; |
471 | 672 | ||
673 | active_cacheline_remove(entry); | ||
674 | |||
472 | /* | 675 | /* |
473 | * add to beginning of the list - this way the entries are | 676 | * add to beginning of the list - this way the entries are |
474 | * more likely cache hot when they are reallocated. | 677 | * more likely cache hot when they are reallocated. |
@@ -895,15 +1098,15 @@ static void check_unmap(struct dma_debug_entry *ref) | |||
895 | ref->dev_addr, ref->size, | 1098 | ref->dev_addr, ref->size, |
896 | type2name[entry->type], type2name[ref->type]); | 1099 | type2name[entry->type], type2name[ref->type]); |
897 | } else if ((entry->type == dma_debug_coherent) && | 1100 | } else if ((entry->type == dma_debug_coherent) && |
898 | (ref->paddr != entry->paddr)) { | 1101 | (phys_addr(ref) != phys_addr(entry))) { |
899 | err_printk(ref->dev, entry, "DMA-API: device driver frees " | 1102 | err_printk(ref->dev, entry, "DMA-API: device driver frees " |
900 | "DMA memory with different CPU address " | 1103 | "DMA memory with different CPU address " |
901 | "[device address=0x%016llx] [size=%llu bytes] " | 1104 | "[device address=0x%016llx] [size=%llu bytes] " |
902 | "[cpu alloc address=0x%016llx] " | 1105 | "[cpu alloc address=0x%016llx] " |
903 | "[cpu free address=0x%016llx]", | 1106 | "[cpu free address=0x%016llx]", |
904 | ref->dev_addr, ref->size, | 1107 | ref->dev_addr, ref->size, |
905 | (unsigned long long)entry->paddr, | 1108 | phys_addr(entry), |
906 | (unsigned long long)ref->paddr); | 1109 | phys_addr(ref)); |
907 | } | 1110 | } |
908 | 1111 | ||
909 | if (ref->sg_call_ents && ref->type == dma_debug_sg && | 1112 | if (ref->sg_call_ents && ref->type == dma_debug_sg && |
@@ -1052,7 +1255,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, | |||
1052 | 1255 | ||
1053 | entry->dev = dev; | 1256 | entry->dev = dev; |
1054 | entry->type = dma_debug_page; | 1257 | entry->type = dma_debug_page; |
1055 | entry->paddr = page_to_phys(page) + offset; | 1258 | entry->pfn = page_to_pfn(page); |
1259 | entry->offset = offset, | ||
1056 | entry->dev_addr = dma_addr; | 1260 | entry->dev_addr = dma_addr; |
1057 | entry->size = size; | 1261 | entry->size = size; |
1058 | entry->direction = direction; | 1262 | entry->direction = direction; |
@@ -1148,7 +1352,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, | |||
1148 | 1352 | ||
1149 | entry->type = dma_debug_sg; | 1353 | entry->type = dma_debug_sg; |
1150 | entry->dev = dev; | 1354 | entry->dev = dev; |
1151 | entry->paddr = sg_phys(s); | 1355 | entry->pfn = page_to_pfn(sg_page(s)); |
1356 | entry->offset = s->offset, | ||
1152 | entry->size = sg_dma_len(s); | 1357 | entry->size = sg_dma_len(s); |
1153 | entry->dev_addr = sg_dma_address(s); | 1358 | entry->dev_addr = sg_dma_address(s); |
1154 | entry->direction = direction; | 1359 | entry->direction = direction; |
@@ -1198,7 +1403,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
1198 | struct dma_debug_entry ref = { | 1403 | struct dma_debug_entry ref = { |
1199 | .type = dma_debug_sg, | 1404 | .type = dma_debug_sg, |
1200 | .dev = dev, | 1405 | .dev = dev, |
1201 | .paddr = sg_phys(s), | 1406 | .pfn = page_to_pfn(sg_page(s)), |
1407 | .offset = s->offset, | ||
1202 | .dev_addr = sg_dma_address(s), | 1408 | .dev_addr = sg_dma_address(s), |
1203 | .size = sg_dma_len(s), | 1409 | .size = sg_dma_len(s), |
1204 | .direction = dir, | 1410 | .direction = dir, |
@@ -1233,7 +1439,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, | |||
1233 | 1439 | ||
1234 | entry->type = dma_debug_coherent; | 1440 | entry->type = dma_debug_coherent; |
1235 | entry->dev = dev; | 1441 | entry->dev = dev; |
1236 | entry->paddr = virt_to_phys(virt); | 1442 | entry->pfn = page_to_pfn(virt_to_page(virt)); |
1443 | entry->offset = (size_t) virt & PAGE_MASK; | ||
1237 | entry->size = size; | 1444 | entry->size = size; |
1238 | entry->dev_addr = dma_addr; | 1445 | entry->dev_addr = dma_addr; |
1239 | entry->direction = DMA_BIDIRECTIONAL; | 1446 | entry->direction = DMA_BIDIRECTIONAL; |
@@ -1248,7 +1455,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, | |||
1248 | struct dma_debug_entry ref = { | 1455 | struct dma_debug_entry ref = { |
1249 | .type = dma_debug_coherent, | 1456 | .type = dma_debug_coherent, |
1250 | .dev = dev, | 1457 | .dev = dev, |
1251 | .paddr = virt_to_phys(virt), | 1458 | .pfn = page_to_pfn(virt_to_page(virt)), |
1459 | .offset = (size_t) virt & PAGE_MASK, | ||
1252 | .dev_addr = addr, | 1460 | .dev_addr = addr, |
1253 | .size = size, | 1461 | .size = size, |
1254 | .direction = DMA_BIDIRECTIONAL, | 1462 | .direction = DMA_BIDIRECTIONAL, |
@@ -1356,7 +1564,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, | |||
1356 | struct dma_debug_entry ref = { | 1564 | struct dma_debug_entry ref = { |
1357 | .type = dma_debug_sg, | 1565 | .type = dma_debug_sg, |
1358 | .dev = dev, | 1566 | .dev = dev, |
1359 | .paddr = sg_phys(s), | 1567 | .pfn = page_to_pfn(sg_page(s)), |
1568 | .offset = s->offset, | ||
1360 | .dev_addr = sg_dma_address(s), | 1569 | .dev_addr = sg_dma_address(s), |
1361 | .size = sg_dma_len(s), | 1570 | .size = sg_dma_len(s), |
1362 | .direction = direction, | 1571 | .direction = direction, |
@@ -1388,7 +1597,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, | |||
1388 | struct dma_debug_entry ref = { | 1597 | struct dma_debug_entry ref = { |
1389 | .type = dma_debug_sg, | 1598 | .type = dma_debug_sg, |
1390 | .dev = dev, | 1599 | .dev = dev, |
1391 | .paddr = sg_phys(s), | 1600 | .pfn = page_to_pfn(sg_page(s)), |
1601 | .offset = s->offset, | ||
1392 | .dev_addr = sg_dma_address(s), | 1602 | .dev_addr = sg_dma_address(s), |
1393 | .size = sg_dma_len(s), | 1603 | .size = sg_dma_len(s), |
1394 | .direction = direction, | 1604 | .direction = direction, |