aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2014-01-21 18:48:12 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-21 19:19:41 -0500
commit0abdd7a81b7e3fd781d7fabcca49501852bba17e (patch)
tree20ab3aa4b476ceaf6d3a824c842cd581445ded8b
parent03d11a0e458d7008192585124e4c3313c2829046 (diff)
dma-debug: introduce debug_dma_assert_idle()
Record actively mapped pages and provide an api for asserting a given page is dma inactive before execution proceeds. Placing debug_dma_assert_idle() in cow_user_page() flagged the violation of the dma-api in the NET_DMA implementation (see commit 77873803363c "net_dma: mark broken"). The implementation includes the capability to count, in a limited way, repeat mappings of the same page that occur without an intervening unmap. This 'overlap' counter is limited to the few bits of tag space in a radix tree. This mechanism is added to mitigate false negative cases where, for example, a page is dma mapped twice and debug_dma_assert_idle() is called after the page is un-mapped once. Signed-off-by: Dan Williams <dan.j.williams@intel.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Vinod Koul <vinod.koul@intel.com> Cc: Russell King <rmk+kernel@arm.linux.org.uk> Cc: James Bottomley <JBottomley@Parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/dma-debug.h6
-rw-r--r--lib/Kconfig.debug12
-rw-r--r--lib/dma-debug.c193
-rw-r--r--mm/memory.c3
4 files changed, 199 insertions, 15 deletions
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index fc0e34ce038f..fe8cb610deac 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -85,6 +85,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
85 85
86extern void debug_dma_dump_mappings(struct device *dev); 86extern void debug_dma_dump_mappings(struct device *dev);
87 87
88extern void debug_dma_assert_idle(struct page *page);
89
88#else /* CONFIG_DMA_API_DEBUG */ 90#else /* CONFIG_DMA_API_DEBUG */
89 91
90static inline void dma_debug_add_bus(struct bus_type *bus) 92static inline void dma_debug_add_bus(struct bus_type *bus)
@@ -183,6 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev)
183{ 185{
184} 186}
185 187
188static inline void debug_dma_assert_idle(struct page *page)
189{
190}
191
186#endif /* CONFIG_DMA_API_DEBUG */ 192#endif /* CONFIG_DMA_API_DEBUG */
187 193
188#endif /* __DMA_DEBUG_H */ 194#endif /* __DMA_DEBUG_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6982094a7e74..900b63c1e899 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1584,8 +1584,16 @@ config DMA_API_DEBUG
1584 With this option you will be able to detect common bugs in device 1584 With this option you will be able to detect common bugs in device
1585 drivers like double-freeing of DMA mappings or freeing mappings that 1585 drivers like double-freeing of DMA mappings or freeing mappings that
1586 were never allocated. 1586 were never allocated.
1587 This option causes a performance degredation. Use only if you want 1587
1588 to debug device drivers. If unsure, say N. 1588 This also attempts to catch cases where a page owned by DMA is
1589 accessed by the cpu in a way that could cause data corruption. For
1590 example, this enables cow_user_page() to check that the source page is
1591 not undergoing DMA.
1592
1593 This option causes a performance degradation. Use only if you want to
1594 debug device drivers and dma interactions.
1595
1596 If unsure, say N.
1589 1597
1590source "samples/Kconfig" 1598source "samples/Kconfig"
1591 1599
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d87a17a819d0..c38083871f11 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -53,11 +53,26 @@ enum map_err_types {
53 53
54#define DMA_DEBUG_STACKTRACE_ENTRIES 5 54#define DMA_DEBUG_STACKTRACE_ENTRIES 5
55 55
56/**
57 * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
58 * @list: node on pre-allocated free_entries list
59 * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
60 * @type: single, page, sg, coherent
61 * @pfn: page frame of the start address
62 * @offset: offset of mapping relative to pfn
63 * @size: length of the mapping
64 * @direction: enum dma_data_direction
65 * @sg_call_ents: 'nents' from dma_map_sg
66 * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
67 * @map_err_type: track whether dma_mapping_error() was checked
68 * @stacktrace: support backtraces when a violation is detected
69 */
56struct dma_debug_entry { 70struct dma_debug_entry {
57 struct list_head list; 71 struct list_head list;
58 struct device *dev; 72 struct device *dev;
59 int type; 73 int type;
60 phys_addr_t paddr; 74 unsigned long pfn;
75 size_t offset;
61 u64 dev_addr; 76 u64 dev_addr;
62 u64 size; 77 u64 size;
63 int direction; 78 int direction;
@@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
372 list_del(&entry->list); 387 list_del(&entry->list);
373} 388}
374 389
390static unsigned long long phys_addr(struct dma_debug_entry *entry)
391{
392 return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
393}
394
375/* 395/*
376 * Dump mapping entries for debugging purposes 396 * Dump mapping entries for debugging purposes
377 */ 397 */
@@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev)
389 list_for_each_entry(entry, &bucket->list, list) { 409 list_for_each_entry(entry, &bucket->list, list) {
390 if (!dev || dev == entry->dev) { 410 if (!dev || dev == entry->dev) {
391 dev_info(entry->dev, 411 dev_info(entry->dev,
392 "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", 412 "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
393 type2name[entry->type], idx, 413 type2name[entry->type], idx,
394 (unsigned long long)entry->paddr, 414 phys_addr(entry), entry->pfn,
395 entry->dev_addr, entry->size, 415 entry->dev_addr, entry->size,
396 dir2name[entry->direction], 416 dir2name[entry->direction],
397 maperr2str[entry->map_err_type]); 417 maperr2str[entry->map_err_type]);
@@ -404,6 +424,133 @@ void debug_dma_dump_mappings(struct device *dev)
404EXPORT_SYMBOL(debug_dma_dump_mappings); 424EXPORT_SYMBOL(debug_dma_dump_mappings);
405 425
406/* 426/*
427 * For each page mapped (initial page in the case of
428 * dma_alloc_coherent/dma_map_{single|page}, or each page in a
429 * scatterlist) insert into this tree using the pfn as the key. At
430 * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If
431 * the pfn already exists at insertion time add a tag as a reference
432 * count for the overlapping mappings. For now, the overlap tracking
433 * just ensures that 'unmaps' balance 'maps' before marking the pfn
434 * idle, but we should also be flagging overlaps as an API violation.
435 *
436 * Memory usage is mostly constrained by the maximum number of available
437 * dma-debug entries in that we need a free dma_debug_entry before
438 * inserting into the tree. In the case of dma_map_{single|page} and
439 * dma_alloc_coherent there is only one dma_debug_entry and one pfn to
440 * track per event. dma_map_sg(), on the other hand,
441 * consumes a single dma_debug_entry, but inserts 'nents' entries into
442 * the tree.
443 *
444 * At any time debug_dma_assert_idle() can be called to trigger a
445 * warning if the given page is in the active set.
446 */
447static RADIX_TREE(dma_active_pfn, GFP_NOWAIT);
448static DEFINE_SPINLOCK(radix_lock);
449#define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
450
451static int active_pfn_read_overlap(unsigned long pfn)
452{
453 int overlap = 0, i;
454
455 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
456 if (radix_tree_tag_get(&dma_active_pfn, pfn, i))
457 overlap |= 1 << i;
458 return overlap;
459}
460
461static int active_pfn_set_overlap(unsigned long pfn, int overlap)
462{
463 int i;
464
465 if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0)
466 return 0;
467
468 for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
469 if (overlap & 1 << i)
470 radix_tree_tag_set(&dma_active_pfn, pfn, i);
471 else
472 radix_tree_tag_clear(&dma_active_pfn, pfn, i);
473
474 return overlap;
475}
476
477static void active_pfn_inc_overlap(unsigned long pfn)
478{
479 int overlap = active_pfn_read_overlap(pfn);
480
481 overlap = active_pfn_set_overlap(pfn, ++overlap);
482
483 /* If we overflowed the overlap counter then we're potentially
484 * leaking dma-mappings. Otherwise, if maps and unmaps are
485 * balanced then this overflow may cause false negatives in
486 * debug_dma_assert_idle() as the pfn may be marked idle
487 * prematurely.
488 */
489 WARN_ONCE(overlap == 0,
490 "DMA-API: exceeded %d overlapping mappings of pfn %lx\n",
491 ACTIVE_PFN_MAX_OVERLAP, pfn);
492}
493
494static int active_pfn_dec_overlap(unsigned long pfn)
495{
496 int overlap = active_pfn_read_overlap(pfn);
497
498 return active_pfn_set_overlap(pfn, --overlap);
499}
500
501static int active_pfn_insert(struct dma_debug_entry *entry)
502{
503 unsigned long flags;
504 int rc;
505
506 spin_lock_irqsave(&radix_lock, flags);
507 rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry);
508 if (rc == -EEXIST)
509 active_pfn_inc_overlap(entry->pfn);
510 spin_unlock_irqrestore(&radix_lock, flags);
511
512 return rc;
513}
514
515static void active_pfn_remove(struct dma_debug_entry *entry)
516{
517 unsigned long flags;
518
519 spin_lock_irqsave(&radix_lock, flags);
520 if (active_pfn_dec_overlap(entry->pfn) == 0)
521 radix_tree_delete(&dma_active_pfn, entry->pfn);
522 spin_unlock_irqrestore(&radix_lock, flags);
523}
524
525/**
526 * debug_dma_assert_idle() - assert that a page is not undergoing dma
527 * @page: page to lookup in the dma_active_pfn tree
528 *
529 * Place a call to this routine in cases where the cpu touching the page
530 * before the dma completes (page is dma_unmapped) will lead to data
531 * corruption.
532 */
533void debug_dma_assert_idle(struct page *page)
534{
535 unsigned long flags;
536 struct dma_debug_entry *entry;
537
538 if (!page)
539 return;
540
541 spin_lock_irqsave(&radix_lock, flags);
542 entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page));
543 spin_unlock_irqrestore(&radix_lock, flags);
544
545 if (!entry)
546 return;
547
548 err_printk(entry->dev, entry,
549 "DMA-API: cpu touching an active dma mapped page "
550 "[pfn=0x%lx]\n", entry->pfn);
551}
552
553/*
407 * Wrapper function for adding an entry to the hash. 554 * Wrapper function for adding an entry to the hash.
408 * This function takes care of locking itself. 555 * This function takes care of locking itself.
409 */ 556 */
@@ -411,10 +558,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
411{ 558{
412 struct hash_bucket *bucket; 559 struct hash_bucket *bucket;
413 unsigned long flags; 560 unsigned long flags;
561 int rc;
414 562
415 bucket = get_hash_bucket(entry, &flags); 563 bucket = get_hash_bucket(entry, &flags);
416 hash_bucket_add(bucket, entry); 564 hash_bucket_add(bucket, entry);
417 put_hash_bucket(bucket, &flags); 565 put_hash_bucket(bucket, &flags);
566
567 rc = active_pfn_insert(entry);
568 if (rc == -ENOMEM) {
569 pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n");
570 global_disable = true;
571 }
572
573 /* TODO: report -EEXIST errors here as overlapping mappings are
574 * not supported by the DMA API
575 */
418} 576}
419 577
420static struct dma_debug_entry *__dma_entry_alloc(void) 578static struct dma_debug_entry *__dma_entry_alloc(void)
@@ -469,6 +627,8 @@ static void dma_entry_free(struct dma_debug_entry *entry)
469{ 627{
470 unsigned long flags; 628 unsigned long flags;
471 629
630 active_pfn_remove(entry);
631
472 /* 632 /*
473 * add to beginning of the list - this way the entries are 633 * add to beginning of the list - this way the entries are
474 * more likely cache hot when they are reallocated. 634 * more likely cache hot when they are reallocated.
@@ -895,15 +1055,15 @@ static void check_unmap(struct dma_debug_entry *ref)
895 ref->dev_addr, ref->size, 1055 ref->dev_addr, ref->size,
896 type2name[entry->type], type2name[ref->type]); 1056 type2name[entry->type], type2name[ref->type]);
897 } else if ((entry->type == dma_debug_coherent) && 1057 } else if ((entry->type == dma_debug_coherent) &&
898 (ref->paddr != entry->paddr)) { 1058 (phys_addr(ref) != phys_addr(entry))) {
899 err_printk(ref->dev, entry, "DMA-API: device driver frees " 1059 err_printk(ref->dev, entry, "DMA-API: device driver frees "
900 "DMA memory with different CPU address " 1060 "DMA memory with different CPU address "
901 "[device address=0x%016llx] [size=%llu bytes] " 1061 "[device address=0x%016llx] [size=%llu bytes] "
902 "[cpu alloc address=0x%016llx] " 1062 "[cpu alloc address=0x%016llx] "
903 "[cpu free address=0x%016llx]", 1063 "[cpu free address=0x%016llx]",
904 ref->dev_addr, ref->size, 1064 ref->dev_addr, ref->size,
905 (unsigned long long)entry->paddr, 1065 phys_addr(entry),
906 (unsigned long long)ref->paddr); 1066 phys_addr(ref));
907 } 1067 }
908 1068
909 if (ref->sg_call_ents && ref->type == dma_debug_sg && 1069 if (ref->sg_call_ents && ref->type == dma_debug_sg &&
@@ -1052,7 +1212,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
1052 1212
1053 entry->dev = dev; 1213 entry->dev = dev;
1054 entry->type = dma_debug_page; 1214 entry->type = dma_debug_page;
1055 entry->paddr = page_to_phys(page) + offset; 1215 entry->pfn = page_to_pfn(page);
1216 entry->offset = offset,
1056 entry->dev_addr = dma_addr; 1217 entry->dev_addr = dma_addr;
1057 entry->size = size; 1218 entry->size = size;
1058 entry->direction = direction; 1219 entry->direction = direction;
@@ -1148,7 +1309,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
1148 1309
1149 entry->type = dma_debug_sg; 1310 entry->type = dma_debug_sg;
1150 entry->dev = dev; 1311 entry->dev = dev;
1151 entry->paddr = sg_phys(s); 1312 entry->pfn = page_to_pfn(sg_page(s));
1313 entry->offset = s->offset,
1152 entry->size = sg_dma_len(s); 1314 entry->size = sg_dma_len(s);
1153 entry->dev_addr = sg_dma_address(s); 1315 entry->dev_addr = sg_dma_address(s);
1154 entry->direction = direction; 1316 entry->direction = direction;
@@ -1198,7 +1360,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
1198 struct dma_debug_entry ref = { 1360 struct dma_debug_entry ref = {
1199 .type = dma_debug_sg, 1361 .type = dma_debug_sg,
1200 .dev = dev, 1362 .dev = dev,
1201 .paddr = sg_phys(s), 1363 .pfn = page_to_pfn(sg_page(s)),
1364 .offset = s->offset,
1202 .dev_addr = sg_dma_address(s), 1365 .dev_addr = sg_dma_address(s),
1203 .size = sg_dma_len(s), 1366 .size = sg_dma_len(s),
1204 .direction = dir, 1367 .direction = dir,
@@ -1233,7 +1396,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
1233 1396
1234 entry->type = dma_debug_coherent; 1397 entry->type = dma_debug_coherent;
1235 entry->dev = dev; 1398 entry->dev = dev;
1236 entry->paddr = virt_to_phys(virt); 1399 entry->pfn = page_to_pfn(virt_to_page(virt));
1400 entry->offset = (size_t) virt & PAGE_MASK;
1237 entry->size = size; 1401 entry->size = size;
1238 entry->dev_addr = dma_addr; 1402 entry->dev_addr = dma_addr;
1239 entry->direction = DMA_BIDIRECTIONAL; 1403 entry->direction = DMA_BIDIRECTIONAL;
@@ -1248,7 +1412,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
1248 struct dma_debug_entry ref = { 1412 struct dma_debug_entry ref = {
1249 .type = dma_debug_coherent, 1413 .type = dma_debug_coherent,
1250 .dev = dev, 1414 .dev = dev,
1251 .paddr = virt_to_phys(virt), 1415 .pfn = page_to_pfn(virt_to_page(virt)),
1416 .offset = (size_t) virt & PAGE_MASK,
1252 .dev_addr = addr, 1417 .dev_addr = addr,
1253 .size = size, 1418 .size = size,
1254 .direction = DMA_BIDIRECTIONAL, 1419 .direction = DMA_BIDIRECTIONAL,
@@ -1356,7 +1521,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
1356 struct dma_debug_entry ref = { 1521 struct dma_debug_entry ref = {
1357 .type = dma_debug_sg, 1522 .type = dma_debug_sg,
1358 .dev = dev, 1523 .dev = dev,
1359 .paddr = sg_phys(s), 1524 .pfn = page_to_pfn(sg_page(s)),
1525 .offset = s->offset,
1360 .dev_addr = sg_dma_address(s), 1526 .dev_addr = sg_dma_address(s),
1361 .size = sg_dma_len(s), 1527 .size = sg_dma_len(s),
1362 .direction = direction, 1528 .direction = direction,
@@ -1388,7 +1554,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
1388 struct dma_debug_entry ref = { 1554 struct dma_debug_entry ref = {
1389 .type = dma_debug_sg, 1555 .type = dma_debug_sg,
1390 .dev = dev, 1556 .dev = dev,
1391 .paddr = sg_phys(s), 1557 .pfn = page_to_pfn(sg_page(s)),
1558 .offset = s->offset,
1392 .dev_addr = sg_dma_address(s), 1559 .dev_addr = sg_dma_address(s),
1393 .size = sg_dma_len(s), 1560 .size = sg_dma_len(s),
1394 .direction = direction, 1561 .direction = direction,
diff --git a/mm/memory.c b/mm/memory.c
index 6768ce9e57d2..e9c550484ba6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -59,6 +59,7 @@
59#include <linux/gfp.h> 59#include <linux/gfp.h>
60#include <linux/migrate.h> 60#include <linux/migrate.h>
61#include <linux/string.h> 61#include <linux/string.h>
62#include <linux/dma-debug.h>
62 63
63#include <asm/io.h> 64#include <asm/io.h>
64#include <asm/pgalloc.h> 65#include <asm/pgalloc.h>
@@ -2559,6 +2560,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
2559 2560
2560static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) 2561static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
2561{ 2562{
2563 debug_dma_assert_idle(src);
2564
2562 /* 2565 /*
2563 * If the source page was a PFN mapping, we don't have 2566 * If the source page was a PFN mapping, we don't have
2564 * a "struct page" for it. We do a best-effort copy by 2567 * a "struct page" for it. We do a best-effort copy by