aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-10 19:19:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-10 19:19:14 -0400
commit3f6280ddf25fa656d0e17960588e52bee48a7547 (patch)
tree006854e51246e400c248a9722418bc7a7cce2dbf
parent75063600fd7b27fe447112c27997f100b9e2f99b (diff)
parent92db1e6af747faa129e236d68386af26a0efc12b (diff)
Merge branch 'iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (61 commits) amd-iommu: remove unnecessary "AMD IOMMU: " prefix amd-iommu: detach device explicitly before attaching it to a new domain amd-iommu: remove BUS_NOTIFY_BOUND_DRIVER handling dma-debug: simplify logic in driver_filter() dma-debug: disable/enable irqs only once in device_dma_allocations dma-debug: use pr_* instead of printk(KERN_* ...) dma-debug: code style fixes dma-debug: comment style fixes dma-debug: change hash_bucket_find from first-fit to best-fit x86: enable GART-IOMMU only after setting up protection methods amd_iommu: fix lock imbalance dma-debug: add documentation for the driver filter dma-debug: add dma_debug_driver kernel command line dma-debug: add debugfs file for driver filter dma-debug: add variables and checks for driver filter dma-debug: fix debug_dma_sync_sg_for_cpu and debug_dma_sync_sg_for_device dma-debug: use sg_dma_len accessor dma-debug: use sg_dma_address accessor instead of using dma_address directly amd-iommu: don't free dma adresses below 512MB with CONFIG_IOMMU_STRESS amd-iommu: don't preallocate page tables with CONFIG_IOMMU_STRESS ...
-rw-r--r--Documentation/DMA-API.txt12
-rw-r--r--Documentation/kernel-parameters.txt12
-rw-r--r--arch/x86/Kconfig.debug11
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h55
-rw-r--r--arch/x86/kernel/amd_iommu.c500
-rw-r--r--arch/x86/kernel/amd_iommu_init.c273
-rw-r--r--arch/x86/kernel/pci-calgary_64.c54
-rw-r--r--arch/x86/kernel/pci-gart_64.c55
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--include/linux/dma-debug.h7
-rw-r--r--include/linux/swiotlb.h3
-rw-r--r--lib/dma-debug.c432
-rw-r--r--lib/swiotlb.c119
14 files changed, 1099 insertions, 438 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index d9aa43d78bcc..25fb8bcf32a2 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -704,12 +704,24 @@ this directory the following files can currently be found:
704 The current number of free dma_debug_entries 704 The current number of free dma_debug_entries
705 in the allocator. 705 in the allocator.
706 706
707 dma-api/driver-filter
708 You can write a name of a driver into this file
709 to limit the debug output to requests from that
710 particular driver. Write an empty string to
711 that file to disable the filter and see
712 all errors again.
713
707If you have this code compiled into your kernel it will be enabled by default. 714If you have this code compiled into your kernel it will be enabled by default.
708If you want to boot without the bookkeeping anyway you can provide 715If you want to boot without the bookkeeping anyway you can provide
709'dma_debug=off' as a boot parameter. This will disable DMA-API debugging. 716'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
710Notice that you can not enable it again at runtime. You have to reboot to do 717Notice that you can not enable it again at runtime. You have to reboot to do
711so. 718so.
712 719
720If you want to see debug messages only for a special device driver you can
721specify the dma_debug_driver=<drivername> parameter. This will enable the
722driver filter at boot time. The debug code will only print errors for that
723driver afterwards. This filter can be disabled or changed later using debugfs.
724
713When the code disables itself at runtime this is most likely because it ran 725When the code disables itself at runtime this is most likely because it ran
714out of dma_debug_entries. These entries are preallocated at boot. The number 726out of dma_debug_entries. These entries are preallocated at boot. The number
715of preallocated entries is defined per architecture. If it is too low for you 727of preallocated entries is defined per architecture. If it is too low for you
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 11648c13a729..af43f45e8358 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -329,11 +329,6 @@ and is between 256 and 4096 characters. It is defined in the file
329 flushed before they will be reused, which 329 flushed before they will be reused, which
330 is a lot of faster 330 is a lot of faster
331 331
332 amd_iommu_size= [HW,X86-64]
333 Define the size of the aperture for the AMD IOMMU
334 driver. Possible values are:
335 '32M', '64M' (default), '128M', '256M', '512M', '1G'
336
337 amijoy.map= [HW,JOY] Amiga joystick support 332 amijoy.map= [HW,JOY] Amiga joystick support
338 Map of devices attached to JOY0DAT and JOY1DAT 333 Map of devices attached to JOY0DAT and JOY1DAT
339 Format: <a>,<b> 334 Format: <a>,<b>
@@ -646,6 +641,13 @@ and is between 256 and 4096 characters. It is defined in the file
646 DMA-API debugging code disables itself because the 641 DMA-API debugging code disables itself because the
647 architectural default is too low. 642 architectural default is too low.
648 643
644 dma_debug_driver=<driver_name>
645 With this option the DMA-API debugging driver
646 filter feature can be enabled at boot time. Just
647 pass the driver to filter for as the parameter.
648 The filter can be disabled or changed to another
649 driver later using sysfs.
650
649 dscc4.setup= [NET] 651 dscc4.setup= [NET]
650 652
651 dtc3181e= [HW,SCSI] 653 dtc3181e= [HW,SCSI]
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8359e73317f..33fac6bbe1c2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -159,10 +159,17 @@ config IOMMU_DEBUG
159 options. See Documentation/x86_64/boot-options.txt for more 159 options. See Documentation/x86_64/boot-options.txt for more
160 details. 160 details.
161 161
162config IOMMU_STRESS
163 bool "Enable IOMMU stress-test mode"
164 ---help---
165 This option disables various optimizations in IOMMU related
166 code to do real stress testing of the IOMMU code. This option
167 will cause a performance drop and should only be enabled for
168 testing.
169
162config IOMMU_LEAK 170config IOMMU_LEAK
163 bool "IOMMU leak tracing" 171 bool "IOMMU leak tracing"
164 depends on DEBUG_KERNEL 172 depends on IOMMU_DEBUG && DMA_API_DEBUG
165 depends on IOMMU_DEBUG
166 ---help--- 173 ---help---
167 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
168 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index f712344329bc..262e02820049 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -27,6 +27,8 @@ extern int amd_iommu_init(void);
27extern int amd_iommu_init_dma_ops(void); 27extern int amd_iommu_init_dma_ops(void);
28extern void amd_iommu_detect(void); 28extern void amd_iommu_detect(void);
29extern irqreturn_t amd_iommu_int_handler(int irq, void *data); 29extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
30extern void amd_iommu_flush_all_domains(void);
31extern void amd_iommu_flush_all_devices(void);
30#else 32#else
31static inline int amd_iommu_init(void) { return -ENODEV; } 33static inline int amd_iommu_init(void) { return -ENODEV; }
32static inline void amd_iommu_detect(void) { } 34static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9d22b5..0c878caaa0a2 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -194,6 +194,27 @@
194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ 194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops 195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
196 domain for an IOMMU */ 196 domain for an IOMMU */
197extern bool amd_iommu_dump;
198#define DUMP_printk(format, arg...) \
199 do { \
200 if (amd_iommu_dump) \
201 printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
202 } while(0);
203
204/*
205 * Make iterating over all IOMMUs easier
206 */
207#define for_each_iommu(iommu) \
208 list_for_each_entry((iommu), &amd_iommu_list, list)
209#define for_each_iommu_safe(iommu, next) \
210 list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
211
212#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
213#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
214#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
215#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
216#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
217#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
197 218
198/* 219/*
199 * This structure contains generic data for IOMMU protection domains 220 * This structure contains generic data for IOMMU protection domains
@@ -210,6 +231,26 @@ struct protection_domain {
210}; 231};
211 232
212/* 233/*
234 * For dynamic growth the aperture size is split into ranges of 128MB of
235 * DMA address space each. This struct represents one such range.
236 */
237struct aperture_range {
238
239 /* address allocation bitmap */
240 unsigned long *bitmap;
241
242 /*
243 * Array of PTE pages for the aperture. In this array we save all the
244 * leaf pages of the domain page table used for the aperture. This way
245 * we don't need to walk the page table to find a specific PTE. We can
246 * just calculate its address in constant time.
247 */
248 u64 *pte_pages[64];
249
250 unsigned long offset;
251};
252
253/*
213 * Data container for a dma_ops specific protection domain 254 * Data container for a dma_ops specific protection domain
214 */ 255 */
215struct dma_ops_domain { 256struct dma_ops_domain {
@@ -222,18 +263,10 @@ struct dma_ops_domain {
222 unsigned long aperture_size; 263 unsigned long aperture_size;
223 264
224 /* address we start to search for free addresses */ 265 /* address we start to search for free addresses */
225 unsigned long next_bit; 266 unsigned long next_address;
226
227 /* address allocation bitmap */
228 unsigned long *bitmap;
229 267
230 /* 268 /* address space relevant data */
231 * Array of PTE pages for the aperture. In this array we save all the 269 struct aperture_range *aperture[APERTURE_MAX_RANGES];
232 * leaf pages of the domain page table used for the aperture. This way
233 * we don't need to walk the page table to find a specific PTE. We can
234 * just calculate its address in constant time.
235 */
236 u64 **pte_pages;
237 270
238 /* This will be set to true when TLB needs to be flushed */ 271 /* This will be set to true when TLB needs to be flushed */
239 bool need_flush; 272 bool need_flush;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99dad52..1c60554537c3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -55,7 +55,16 @@ struct iommu_cmd {
55static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 55static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
56 struct unity_map_entry *e); 56 struct unity_map_entry *e);
57static struct dma_ops_domain *find_protection_domain(u16 devid); 57static struct dma_ops_domain *find_protection_domain(u16 devid);
58static u64* alloc_pte(struct protection_domain *dom,
59 unsigned long address, u64
60 **pte_page, gfp_t gfp);
61static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
62 unsigned long start_page,
63 unsigned int pages);
58 64
65#ifndef BUS_NOTIFY_UNBOUND_DRIVER
66#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
67#endif
59 68
60#ifdef CONFIG_AMD_IOMMU_STATS 69#ifdef CONFIG_AMD_IOMMU_STATS
61 70
@@ -213,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
213{ 222{
214 struct amd_iommu *iommu; 223 struct amd_iommu *iommu;
215 224
216 list_for_each_entry(iommu, &amd_iommu_list, list) 225 for_each_iommu(iommu)
217 iommu_poll_events(iommu); 226 iommu_poll_events(iommu);
218 227
219 return IRQ_HANDLED; 228 return IRQ_HANDLED;
@@ -440,7 +449,7 @@ static void iommu_flush_domain(u16 domid)
440 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 449 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
441 domid, 1, 1); 450 domid, 1, 1);
442 451
443 list_for_each_entry(iommu, &amd_iommu_list, list) { 452 for_each_iommu(iommu) {
444 spin_lock_irqsave(&iommu->lock, flags); 453 spin_lock_irqsave(&iommu->lock, flags);
445 __iommu_queue_command(iommu, &cmd); 454 __iommu_queue_command(iommu, &cmd);
446 __iommu_completion_wait(iommu); 455 __iommu_completion_wait(iommu);
@@ -449,6 +458,35 @@ static void iommu_flush_domain(u16 domid)
449 } 458 }
450} 459}
451 460
461void amd_iommu_flush_all_domains(void)
462{
463 int i;
464
465 for (i = 1; i < MAX_DOMAIN_ID; ++i) {
466 if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
467 continue;
468 iommu_flush_domain(i);
469 }
470}
471
472void amd_iommu_flush_all_devices(void)
473{
474 struct amd_iommu *iommu;
475 int i;
476
477 for (i = 0; i <= amd_iommu_last_bdf; ++i) {
478 if (amd_iommu_pd_table[i] == NULL)
479 continue;
480
481 iommu = amd_iommu_rlookup_table[i];
482 if (!iommu)
483 continue;
484
485 iommu_queue_inv_dev_entry(iommu, i);
486 iommu_completion_wait(iommu);
487 }
488}
489
452/**************************************************************************** 490/****************************************************************************
453 * 491 *
454 * The functions below are used the create the page table mappings for 492 * The functions below are used the create the page table mappings for
@@ -468,7 +506,7 @@ static int iommu_map_page(struct protection_domain *dom,
468 unsigned long phys_addr, 506 unsigned long phys_addr,
469 int prot) 507 int prot)
470{ 508{
471 u64 __pte, *pte, *page; 509 u64 __pte, *pte;
472 510
473 bus_addr = PAGE_ALIGN(bus_addr); 511 bus_addr = PAGE_ALIGN(bus_addr);
474 phys_addr = PAGE_ALIGN(phys_addr); 512 phys_addr = PAGE_ALIGN(phys_addr);
@@ -477,27 +515,7 @@ static int iommu_map_page(struct protection_domain *dom,
477 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) 515 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
478 return -EINVAL; 516 return -EINVAL;
479 517
480 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; 518 pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
481
482 if (!IOMMU_PTE_PRESENT(*pte)) {
483 page = (u64 *)get_zeroed_page(GFP_KERNEL);
484 if (!page)
485 return -ENOMEM;
486 *pte = IOMMU_L2_PDE(virt_to_phys(page));
487 }
488
489 pte = IOMMU_PTE_PAGE(*pte);
490 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
491
492 if (!IOMMU_PTE_PRESENT(*pte)) {
493 page = (u64 *)get_zeroed_page(GFP_KERNEL);
494 if (!page)
495 return -ENOMEM;
496 *pte = IOMMU_L1_PDE(virt_to_phys(page));
497 }
498
499 pte = IOMMU_PTE_PAGE(*pte);
500 pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
501 519
502 if (IOMMU_PTE_PRESENT(*pte)) 520 if (IOMMU_PTE_PRESENT(*pte))
503 return -EBUSY; 521 return -EBUSY;
@@ -595,7 +613,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
595 * as allocated in the aperture 613 * as allocated in the aperture
596 */ 614 */
597 if (addr < dma_dom->aperture_size) 615 if (addr < dma_dom->aperture_size)
598 __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap); 616 __set_bit(addr >> PAGE_SHIFT,
617 dma_dom->aperture[0]->bitmap);
599 } 618 }
600 619
601 return 0; 620 return 0;
@@ -632,42 +651,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
632 ****************************************************************************/ 651 ****************************************************************************/
633 652
634/* 653/*
635 * The address allocator core function. 654 * The address allocator core functions.
636 * 655 *
637 * called with domain->lock held 656 * called with domain->lock held
638 */ 657 */
658
659/*
660 * This function checks if there is a PTE for a given dma address. If
661 * there is one, it returns the pointer to it.
662 */
663static u64* fetch_pte(struct protection_domain *domain,
664 unsigned long address)
665{
666 u64 *pte;
667
668 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
669
670 if (!IOMMU_PTE_PRESENT(*pte))
671 return NULL;
672
673 pte = IOMMU_PTE_PAGE(*pte);
674 pte = &pte[IOMMU_PTE_L1_INDEX(address)];
675
676 if (!IOMMU_PTE_PRESENT(*pte))
677 return NULL;
678
679 pte = IOMMU_PTE_PAGE(*pte);
680 pte = &pte[IOMMU_PTE_L0_INDEX(address)];
681
682 return pte;
683}
684
685/*
686 * This function is used to add a new aperture range to an existing
687 * aperture in case of dma_ops domain allocation or address allocation
688 * failure.
689 */
690static int alloc_new_range(struct amd_iommu *iommu,
691 struct dma_ops_domain *dma_dom,
692 bool populate, gfp_t gfp)
693{
694 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
695 int i;
696
697#ifdef CONFIG_IOMMU_STRESS
698 populate = false;
699#endif
700
701 if (index >= APERTURE_MAX_RANGES)
702 return -ENOMEM;
703
704 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
705 if (!dma_dom->aperture[index])
706 return -ENOMEM;
707
708 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
709 if (!dma_dom->aperture[index]->bitmap)
710 goto out_free;
711
712 dma_dom->aperture[index]->offset = dma_dom->aperture_size;
713
714 if (populate) {
715 unsigned long address = dma_dom->aperture_size;
716 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
717 u64 *pte, *pte_page;
718
719 for (i = 0; i < num_ptes; ++i) {
720 pte = alloc_pte(&dma_dom->domain, address,
721 &pte_page, gfp);
722 if (!pte)
723 goto out_free;
724
725 dma_dom->aperture[index]->pte_pages[i] = pte_page;
726
727 address += APERTURE_RANGE_SIZE / 64;
728 }
729 }
730
731 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
732
733 /* Intialize the exclusion range if necessary */
734 if (iommu->exclusion_start &&
735 iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
736 iommu->exclusion_start < dma_dom->aperture_size) {
737 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
738 int pages = iommu_num_pages(iommu->exclusion_start,
739 iommu->exclusion_length,
740 PAGE_SIZE);
741 dma_ops_reserve_addresses(dma_dom, startpage, pages);
742 }
743
744 /*
745 * Check for areas already mapped as present in the new aperture
746 * range and mark those pages as reserved in the allocator. Such
747 * mappings may already exist as a result of requested unity
748 * mappings for devices.
749 */
750 for (i = dma_dom->aperture[index]->offset;
751 i < dma_dom->aperture_size;
752 i += PAGE_SIZE) {
753 u64 *pte = fetch_pte(&dma_dom->domain, i);
754 if (!pte || !IOMMU_PTE_PRESENT(*pte))
755 continue;
756
757 dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
758 }
759
760 return 0;
761
762out_free:
763 free_page((unsigned long)dma_dom->aperture[index]->bitmap);
764
765 kfree(dma_dom->aperture[index]);
766 dma_dom->aperture[index] = NULL;
767
768 return -ENOMEM;
769}
770
771static unsigned long dma_ops_area_alloc(struct device *dev,
772 struct dma_ops_domain *dom,
773 unsigned int pages,
774 unsigned long align_mask,
775 u64 dma_mask,
776 unsigned long start)
777{
778 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
779 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
780 int i = start >> APERTURE_RANGE_SHIFT;
781 unsigned long boundary_size;
782 unsigned long address = -1;
783 unsigned long limit;
784
785 next_bit >>= PAGE_SHIFT;
786
787 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
788 PAGE_SIZE) >> PAGE_SHIFT;
789
790 for (;i < max_index; ++i) {
791 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
792
793 if (dom->aperture[i]->offset >= dma_mask)
794 break;
795
796 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
797 dma_mask >> PAGE_SHIFT);
798
799 address = iommu_area_alloc(dom->aperture[i]->bitmap,
800 limit, next_bit, pages, 0,
801 boundary_size, align_mask);
802 if (address != -1) {
803 address = dom->aperture[i]->offset +
804 (address << PAGE_SHIFT);
805 dom->next_address = address + (pages << PAGE_SHIFT);
806 break;
807 }
808
809 next_bit = 0;
810 }
811
812 return address;
813}
814
639static unsigned long dma_ops_alloc_addresses(struct device *dev, 815static unsigned long dma_ops_alloc_addresses(struct device *dev,
640 struct dma_ops_domain *dom, 816 struct dma_ops_domain *dom,
641 unsigned int pages, 817 unsigned int pages,
642 unsigned long align_mask, 818 unsigned long align_mask,
643 u64 dma_mask) 819 u64 dma_mask)
644{ 820{
645 unsigned long limit;
646 unsigned long address; 821 unsigned long address;
647 unsigned long boundary_size;
648 822
649 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 823#ifdef CONFIG_IOMMU_STRESS
650 PAGE_SIZE) >> PAGE_SHIFT; 824 dom->next_address = 0;
651 limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, 825 dom->need_flush = true;
652 dma_mask >> PAGE_SHIFT); 826#endif
653 827
654 if (dom->next_bit >= limit) { 828 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
655 dom->next_bit = 0; 829 dma_mask, dom->next_address);
656 dom->need_flush = true;
657 }
658 830
659 address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
660 0 , boundary_size, align_mask);
661 if (address == -1) { 831 if (address == -1) {
662 address = iommu_area_alloc(dom->bitmap, limit, 0, pages, 832 dom->next_address = 0;
663 0, boundary_size, align_mask); 833 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
834 dma_mask, 0);
664 dom->need_flush = true; 835 dom->need_flush = true;
665 } 836 }
666 837
667 if (likely(address != -1)) { 838 if (unlikely(address == -1))
668 dom->next_bit = address + pages;
669 address <<= PAGE_SHIFT;
670 } else
671 address = bad_dma_address; 839 address = bad_dma_address;
672 840
673 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); 841 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -684,11 +852,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
684 unsigned long address, 852 unsigned long address,
685 unsigned int pages) 853 unsigned int pages)
686{ 854{
687 address >>= PAGE_SHIFT; 855 unsigned i = address >> APERTURE_RANGE_SHIFT;
688 iommu_area_free(dom->bitmap, address, pages); 856 struct aperture_range *range = dom->aperture[i];
689 857
690 if (address >= dom->next_bit) 858 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
859
860#ifdef CONFIG_IOMMU_STRESS
861 if (i < 4)
862 return;
863#endif
864
865 if (address >= dom->next_address)
691 dom->need_flush = true; 866 dom->need_flush = true;
867
868 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
869
870 iommu_area_free(range->bitmap, address, pages);
871
692} 872}
693 873
694/**************************************************************************** 874/****************************************************************************
@@ -736,12 +916,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
736 unsigned long start_page, 916 unsigned long start_page,
737 unsigned int pages) 917 unsigned int pages)
738{ 918{
739 unsigned int last_page = dom->aperture_size >> PAGE_SHIFT; 919 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
740 920
741 if (start_page + pages > last_page) 921 if (start_page + pages > last_page)
742 pages = last_page - start_page; 922 pages = last_page - start_page;
743 923
744 iommu_area_reserve(dom->bitmap, start_page, pages); 924 for (i = start_page; i < start_page + pages; ++i) {
925 int index = i / APERTURE_RANGE_PAGES;
926 int page = i % APERTURE_RANGE_PAGES;
927 __set_bit(page, dom->aperture[index]->bitmap);
928 }
745} 929}
746 930
747static void free_pagetable(struct protection_domain *domain) 931static void free_pagetable(struct protection_domain *domain)
@@ -780,14 +964,19 @@ static void free_pagetable(struct protection_domain *domain)
780 */ 964 */
781static void dma_ops_domain_free(struct dma_ops_domain *dom) 965static void dma_ops_domain_free(struct dma_ops_domain *dom)
782{ 966{
967 int i;
968
783 if (!dom) 969 if (!dom)
784 return; 970 return;
785 971
786 free_pagetable(&dom->domain); 972 free_pagetable(&dom->domain);
787 973
788 kfree(dom->pte_pages); 974 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
789 975 if (!dom->aperture[i])
790 kfree(dom->bitmap); 976 continue;
977 free_page((unsigned long)dom->aperture[i]->bitmap);
978 kfree(dom->aperture[i]);
979 }
791 980
792 kfree(dom); 981 kfree(dom);
793} 982}
@@ -797,19 +986,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
797 * It also intializes the page table and the address allocator data 986 * It also intializes the page table and the address allocator data
798 * structures required for the dma_ops interface 987 * structures required for the dma_ops interface
799 */ 988 */
800static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, 989static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
801 unsigned order)
802{ 990{
803 struct dma_ops_domain *dma_dom; 991 struct dma_ops_domain *dma_dom;
804 unsigned i, num_pte_pages;
805 u64 *l2_pde;
806 u64 address;
807
808 /*
809 * Currently the DMA aperture must be between 32 MB and 1GB in size
810 */
811 if ((order < 25) || (order > 30))
812 return NULL;
813 992
814 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); 993 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
815 if (!dma_dom) 994 if (!dma_dom)
@@ -826,55 +1005,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
826 dma_dom->domain.priv = dma_dom; 1005 dma_dom->domain.priv = dma_dom;
827 if (!dma_dom->domain.pt_root) 1006 if (!dma_dom->domain.pt_root)
828 goto free_dma_dom; 1007 goto free_dma_dom;
829 dma_dom->aperture_size = (1ULL << order);
830 dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
831 GFP_KERNEL);
832 if (!dma_dom->bitmap)
833 goto free_dma_dom;
834 /*
835 * mark the first page as allocated so we never return 0 as
836 * a valid dma-address. So we can use 0 as error value
837 */
838 dma_dom->bitmap[0] = 1;
839 dma_dom->next_bit = 0;
840 1008
841 dma_dom->need_flush = false; 1009 dma_dom->need_flush = false;
842 dma_dom->target_dev = 0xffff; 1010 dma_dom->target_dev = 0xffff;
843 1011
844 /* Intialize the exclusion range if necessary */ 1012 if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
845 if (iommu->exclusion_start && 1013 goto free_dma_dom;
846 iommu->exclusion_start < dma_dom->aperture_size) {
847 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
848 int pages = iommu_num_pages(iommu->exclusion_start,
849 iommu->exclusion_length,
850 PAGE_SIZE);
851 dma_ops_reserve_addresses(dma_dom, startpage, pages);
852 }
853 1014
854 /* 1015 /*
855 * At the last step, build the page tables so we don't need to 1016 * mark the first page as allocated so we never return 0 as
856 * allocate page table pages in the dma_ops mapping/unmapping 1017 * a valid dma-address. So we can use 0 as error value
857 * path.
858 */ 1018 */
859 num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); 1019 dma_dom->aperture[0]->bitmap[0] = 1;
860 dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), 1020 dma_dom->next_address = 0;
861 GFP_KERNEL);
862 if (!dma_dom->pte_pages)
863 goto free_dma_dom;
864
865 l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
866 if (l2_pde == NULL)
867 goto free_dma_dom;
868 1021
869 dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
870
871 for (i = 0; i < num_pte_pages; ++i) {
872 dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
873 if (!dma_dom->pte_pages[i])
874 goto free_dma_dom;
875 address = virt_to_phys(dma_dom->pte_pages[i]);
876 l2_pde[i] = IOMMU_L1_PDE(address);
877 }
878 1022
879 return dma_dom; 1023 return dma_dom;
880 1024
@@ -983,7 +1127,6 @@ static int device_change_notifier(struct notifier_block *nb,
983 struct protection_domain *domain; 1127 struct protection_domain *domain;
984 struct dma_ops_domain *dma_domain; 1128 struct dma_ops_domain *dma_domain;
985 struct amd_iommu *iommu; 1129 struct amd_iommu *iommu;
986 int order = amd_iommu_aperture_order;
987 unsigned long flags; 1130 unsigned long flags;
988 1131
989 if (devid > amd_iommu_last_bdf) 1132 if (devid > amd_iommu_last_bdf)
@@ -1002,17 +1145,7 @@ static int device_change_notifier(struct notifier_block *nb,
1002 "to a non-dma-ops domain\n", dev_name(dev)); 1145 "to a non-dma-ops domain\n", dev_name(dev));
1003 1146
1004 switch (action) { 1147 switch (action) {
1005 case BUS_NOTIFY_BOUND_DRIVER: 1148 case BUS_NOTIFY_UNBOUND_DRIVER:
1006 if (domain)
1007 goto out;
1008 dma_domain = find_protection_domain(devid);
1009 if (!dma_domain)
1010 dma_domain = iommu->default_dom;
1011 attach_device(iommu, &dma_domain->domain, devid);
1012 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1013 "device %s\n", dma_domain->domain.id, dev_name(dev));
1014 break;
1015 case BUS_NOTIFY_UNBIND_DRIVER:
1016 if (!domain) 1149 if (!domain)
1017 goto out; 1150 goto out;
1018 detach_device(domain, devid); 1151 detach_device(domain, devid);
@@ -1022,7 +1155,7 @@ static int device_change_notifier(struct notifier_block *nb,
1022 dma_domain = find_protection_domain(devid); 1155 dma_domain = find_protection_domain(devid);
1023 if (dma_domain) 1156 if (dma_domain)
1024 goto out; 1157 goto out;
1025 dma_domain = dma_ops_domain_alloc(iommu, order); 1158 dma_domain = dma_ops_domain_alloc(iommu);
1026 if (!dma_domain) 1159 if (!dma_domain)
1027 goto out; 1160 goto out;
1028 dma_domain->target_dev = devid; 1161 dma_domain->target_dev = devid;
@@ -1133,8 +1266,8 @@ static int get_device_resources(struct device *dev,
1133 dma_dom = (*iommu)->default_dom; 1266 dma_dom = (*iommu)->default_dom;
1134 *domain = &dma_dom->domain; 1267 *domain = &dma_dom->domain;
1135 attach_device(*iommu, *domain, *bdf); 1268 attach_device(*iommu, *domain, *bdf);
1136 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1269 DUMP_printk("Using protection domain %d for device %s\n",
1137 "device %s\n", (*domain)->id, dev_name(dev)); 1270 (*domain)->id, dev_name(dev));
1138 } 1271 }
1139 1272
1140 if (domain_for_device(_bdf) == NULL) 1273 if (domain_for_device(_bdf) == NULL)
@@ -1144,6 +1277,66 @@ static int get_device_resources(struct device *dev,
1144} 1277}
1145 1278
1146/* 1279/*
1280 * If the pte_page is not yet allocated this function is called
1281 */
1282static u64* alloc_pte(struct protection_domain *dom,
1283 unsigned long address, u64 **pte_page, gfp_t gfp)
1284{
1285 u64 *pte, *page;
1286
1287 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
1288
1289 if (!IOMMU_PTE_PRESENT(*pte)) {
1290 page = (u64 *)get_zeroed_page(gfp);
1291 if (!page)
1292 return NULL;
1293 *pte = IOMMU_L2_PDE(virt_to_phys(page));
1294 }
1295
1296 pte = IOMMU_PTE_PAGE(*pte);
1297 pte = &pte[IOMMU_PTE_L1_INDEX(address)];
1298
1299 if (!IOMMU_PTE_PRESENT(*pte)) {
1300 page = (u64 *)get_zeroed_page(gfp);
1301 if (!page)
1302 return NULL;
1303 *pte = IOMMU_L1_PDE(virt_to_phys(page));
1304 }
1305
1306 pte = IOMMU_PTE_PAGE(*pte);
1307
1308 if (pte_page)
1309 *pte_page = pte;
1310
1311 pte = &pte[IOMMU_PTE_L0_INDEX(address)];
1312
1313 return pte;
1314}
1315
1316/*
1317 * This function fetches the PTE for a given address in the aperture
1318 */
1319static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1320 unsigned long address)
1321{
1322 struct aperture_range *aperture;
1323 u64 *pte, *pte_page;
1324
1325 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1326 if (!aperture)
1327 return NULL;
1328
1329 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1330 if (!pte) {
1331 pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
1332 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1333 } else
1334 pte += IOMMU_PTE_L0_INDEX(address);
1335
1336 return pte;
1337}
1338
1339/*
1147 * This is the generic map function. It maps one 4kb page at paddr to 1340 * This is the generic map function. It maps one 4kb page at paddr to
1148 * the given address in the DMA address space for the domain. 1341 * the given address in the DMA address space for the domain.
1149 */ 1342 */
@@ -1159,8 +1352,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1159 1352
1160 paddr &= PAGE_MASK; 1353 paddr &= PAGE_MASK;
1161 1354
1162 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; 1355 pte = dma_ops_get_pte(dom, address);
1163 pte += IOMMU_PTE_L0_INDEX(address); 1356 if (!pte)
1357 return bad_dma_address;
1164 1358
1165 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; 1359 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1166 1360
@@ -1185,14 +1379,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
1185 struct dma_ops_domain *dom, 1379 struct dma_ops_domain *dom,
1186 unsigned long address) 1380 unsigned long address)
1187{ 1381{
1382 struct aperture_range *aperture;
1188 u64 *pte; 1383 u64 *pte;
1189 1384
1190 if (address >= dom->aperture_size) 1385 if (address >= dom->aperture_size)
1191 return; 1386 return;
1192 1387
1193 WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); 1388 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1389 if (!aperture)
1390 return;
1391
1392 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1393 if (!pte)
1394 return;
1194 1395
1195 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
1196 pte += IOMMU_PTE_L0_INDEX(address); 1396 pte += IOMMU_PTE_L0_INDEX(address);
1197 1397
1198 WARN_ON(!*pte); 1398 WARN_ON(!*pte);
@@ -1216,7 +1416,7 @@ static dma_addr_t __map_single(struct device *dev,
1216 u64 dma_mask) 1416 u64 dma_mask)
1217{ 1417{
1218 dma_addr_t offset = paddr & ~PAGE_MASK; 1418 dma_addr_t offset = paddr & ~PAGE_MASK;
1219 dma_addr_t address, start; 1419 dma_addr_t address, start, ret;
1220 unsigned int pages; 1420 unsigned int pages;
1221 unsigned long align_mask = 0; 1421 unsigned long align_mask = 0;
1222 int i; 1422 int i;
@@ -1232,14 +1432,33 @@ static dma_addr_t __map_single(struct device *dev,
1232 if (align) 1432 if (align)
1233 align_mask = (1UL << get_order(size)) - 1; 1433 align_mask = (1UL << get_order(size)) - 1;
1234 1434
1435retry:
1235 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, 1436 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1236 dma_mask); 1437 dma_mask);
1237 if (unlikely(address == bad_dma_address)) 1438 if (unlikely(address == bad_dma_address)) {
1238 goto out; 1439 /*
1440 * setting next_address here will let the address
1441 * allocator only scan the new allocated range in the
1442 * first run. This is a small optimization.
1443 */
1444 dma_dom->next_address = dma_dom->aperture_size;
1445
1446 if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
1447 goto out;
1448
1449 /*
1450 * aperture was sucessfully enlarged by 128 MB, try
1451 * allocation again
1452 */
1453 goto retry;
1454 }
1239 1455
1240 start = address; 1456 start = address;
1241 for (i = 0; i < pages; ++i) { 1457 for (i = 0; i < pages; ++i) {
1242 dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); 1458 ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
1459 if (ret == bad_dma_address)
1460 goto out_unmap;
1461
1243 paddr += PAGE_SIZE; 1462 paddr += PAGE_SIZE;
1244 start += PAGE_SIZE; 1463 start += PAGE_SIZE;
1245 } 1464 }
@@ -1255,6 +1474,17 @@ static dma_addr_t __map_single(struct device *dev,
1255 1474
1256out: 1475out:
1257 return address; 1476 return address;
1477
1478out_unmap:
1479
1480 for (--i; i >= 0; --i) {
1481 start -= PAGE_SIZE;
1482 dma_ops_domain_unmap(iommu, dma_dom, start);
1483 }
1484
1485 dma_ops_free_addresses(dma_dom, address, pages);
1486
1487 return bad_dma_address;
1258} 1488}
1259 1489
1260/* 1490/*
@@ -1537,8 +1767,10 @@ static void *alloc_coherent(struct device *dev, size_t size,
1537 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1767 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1538 size, DMA_BIDIRECTIONAL, true, dma_mask); 1768 size, DMA_BIDIRECTIONAL, true, dma_mask);
1539 1769
1540 if (*dma_addr == bad_dma_address) 1770 if (*dma_addr == bad_dma_address) {
1771 spin_unlock_irqrestore(&domain->lock, flags);
1541 goto out_free; 1772 goto out_free;
1773 }
1542 1774
1543 iommu_completion_wait(iommu); 1775 iommu_completion_wait(iommu);
1544 1776
@@ -1625,7 +1857,6 @@ static void prealloc_protection_domains(void)
1625 struct pci_dev *dev = NULL; 1857 struct pci_dev *dev = NULL;
1626 struct dma_ops_domain *dma_dom; 1858 struct dma_ops_domain *dma_dom;
1627 struct amd_iommu *iommu; 1859 struct amd_iommu *iommu;
1628 int order = amd_iommu_aperture_order;
1629 u16 devid; 1860 u16 devid;
1630 1861
1631 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1862 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1638,7 +1869,7 @@ static void prealloc_protection_domains(void)
1638 iommu = amd_iommu_rlookup_table[devid]; 1869 iommu = amd_iommu_rlookup_table[devid];
1639 if (!iommu) 1870 if (!iommu)
1640 continue; 1871 continue;
1641 dma_dom = dma_ops_domain_alloc(iommu, order); 1872 dma_dom = dma_ops_domain_alloc(iommu);
1642 if (!dma_dom) 1873 if (!dma_dom)
1643 continue; 1874 continue;
1644 init_unity_mappings_for_device(dma_dom, devid); 1875 init_unity_mappings_for_device(dma_dom, devid);
@@ -1664,7 +1895,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
1664int __init amd_iommu_init_dma_ops(void) 1895int __init amd_iommu_init_dma_ops(void)
1665{ 1896{
1666 struct amd_iommu *iommu; 1897 struct amd_iommu *iommu;
1667 int order = amd_iommu_aperture_order;
1668 int ret; 1898 int ret;
1669 1899
1670 /* 1900 /*
@@ -1672,8 +1902,8 @@ int __init amd_iommu_init_dma_ops(void)
1672 * found in the system. Devices not assigned to any other 1902 * found in the system. Devices not assigned to any other
1673 * protection domain will be assigned to the default one. 1903 * protection domain will be assigned to the default one.
1674 */ 1904 */
1675 list_for_each_entry(iommu, &amd_iommu_list, list) { 1905 for_each_iommu(iommu) {
1676 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1906 iommu->default_dom = dma_ops_domain_alloc(iommu);
1677 if (iommu->default_dom == NULL) 1907 if (iommu->default_dom == NULL)
1678 return -ENOMEM; 1908 return -ENOMEM;
1679 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 1909 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1710,7 +1940,7 @@ int __init amd_iommu_init_dma_ops(void)
1710 1940
1711free_domains: 1941free_domains:
1712 1942
1713 list_for_each_entry(iommu, &amd_iommu_list, list) { 1943 for_each_iommu(iommu) {
1714 if (iommu->default_dom) 1944 if (iommu->default_dom)
1715 dma_ops_domain_free(iommu->default_dom); 1945 dma_ops_domain_free(iommu->default_dom);
1716 } 1946 }
@@ -1842,7 +2072,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
1842 2072
1843 old_domain = domain_for_device(devid); 2073 old_domain = domain_for_device(devid);
1844 if (old_domain) 2074 if (old_domain)
1845 return -EBUSY; 2075 detach_device(old_domain, devid);
1846 2076
1847 attach_device(iommu, domain, devid); 2077 attach_device(iommu, domain, devid);
1848 2078
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be0902dac..238989ec077d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -115,15 +115,21 @@ struct ivmd_header {
115 u64 range_length; 115 u64 range_length;
116} __attribute__((packed)); 116} __attribute__((packed));
117 117
118bool amd_iommu_dump;
119
118static int __initdata amd_iommu_detected; 120static int __initdata amd_iommu_detected;
119 121
120u16 amd_iommu_last_bdf; /* largest PCI device id we have 122u16 amd_iommu_last_bdf; /* largest PCI device id we have
121 to handle */ 123 to handle */
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 124LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 125 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 126#ifdef CONFIG_IOMMU_STRESS
127bool amd_iommu_isolate = false;
128#else
125bool amd_iommu_isolate = true; /* if true, device isolation is 129bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */ 130 enabled */
131#endif
132
127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 133bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
128 134
129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 135LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -175,7 +181,7 @@ static inline void update_last_devid(u16 devid)
175static inline unsigned long tbl_size(int entry_size) 181static inline unsigned long tbl_size(int entry_size)
176{ 182{
177 unsigned shift = PAGE_SHIFT + 183 unsigned shift = PAGE_SHIFT +
178 get_order(amd_iommu_last_bdf * entry_size); 184 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
179 185
180 return 1UL << shift; 186 return 1UL << shift;
181} 187}
@@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size)
193 * This function set the exclusion range in the IOMMU. DMA accesses to the 199 * This function set the exclusion range in the IOMMU. DMA accesses to the
194 * exclusion range are passed through untranslated 200 * exclusion range are passed through untranslated
195 */ 201 */
196static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) 202static void iommu_set_exclusion_range(struct amd_iommu *iommu)
197{ 203{
198 u64 start = iommu->exclusion_start & PAGE_MASK; 204 u64 start = iommu->exclusion_start & PAGE_MASK;
199 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; 205 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
225} 231}
226 232
227/* Generic functions to enable/disable certain features of the IOMMU. */ 233/* Generic functions to enable/disable certain features of the IOMMU. */
228static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 234static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
229{ 235{
230 u32 ctrl; 236 u32 ctrl;
231 237
@@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
244} 250}
245 251
246/* Function to enable the hardware */ 252/* Function to enable the hardware */
247static void __init iommu_enable(struct amd_iommu *iommu) 253static void iommu_enable(struct amd_iommu *iommu)
248{ 254{
249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", 255 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
250 dev_name(&iommu->dev->dev), iommu->cap_ptr); 256 dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -252,11 +258,9 @@ static void __init iommu_enable(struct amd_iommu *iommu)
252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 258 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
253} 259}
254 260
255/* Function to enable IOMMU event logging and event interrupts */ 261static void iommu_disable(struct amd_iommu *iommu)
256static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
257{ 262{
258 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 263 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
259 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
260} 264}
261 265
262/* 266/*
@@ -413,25 +417,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
413{ 417{
414 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 418 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
415 get_order(CMD_BUFFER_SIZE)); 419 get_order(CMD_BUFFER_SIZE));
416 u64 entry;
417 420
418 if (cmd_buf == NULL) 421 if (cmd_buf == NULL)
419 return NULL; 422 return NULL;
420 423
421 iommu->cmd_buf_size = CMD_BUFFER_SIZE; 424 iommu->cmd_buf_size = CMD_BUFFER_SIZE;
422 425
423 entry = (u64)virt_to_phys(cmd_buf); 426 return cmd_buf;
427}
428
429/*
430 * This function writes the command buffer address to the hardware and
431 * enables it.
432 */
433static void iommu_enable_command_buffer(struct amd_iommu *iommu)
434{
435 u64 entry;
436
437 BUG_ON(iommu->cmd_buf == NULL);
438
439 entry = (u64)virt_to_phys(iommu->cmd_buf);
424 entry |= MMIO_CMD_SIZE_512; 440 entry |= MMIO_CMD_SIZE_512;
441
425 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 442 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
426 &entry, sizeof(entry)); 443 &entry, sizeof(entry));
427 444
428 /* set head and tail to zero manually */ 445 /* set head and tail to zero manually */
429 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 446 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
430 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 447 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
431 448
432 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 449 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
433
434 return cmd_buf;
435} 450}
436 451
437static void __init free_command_buffer(struct amd_iommu *iommu) 452static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +458,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
443/* allocates the memory where the IOMMU will log its events to */ 458/* allocates the memory where the IOMMU will log its events to */
444static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) 459static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
445{ 460{
446 u64 entry;
447 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 461 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
448 get_order(EVT_BUFFER_SIZE)); 462 get_order(EVT_BUFFER_SIZE));
449 463
450 if (iommu->evt_buf == NULL) 464 if (iommu->evt_buf == NULL)
451 return NULL; 465 return NULL;
452 466
467 return iommu->evt_buf;
468}
469
470static void iommu_enable_event_buffer(struct amd_iommu *iommu)
471{
472 u64 entry;
473
474 BUG_ON(iommu->evt_buf == NULL);
475
453 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; 476 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
477
454 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 478 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
455 &entry, sizeof(entry)); 479 &entry, sizeof(entry));
456 480
457 iommu->evt_buf_size = EVT_BUFFER_SIZE; 481 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
458
459 return iommu->evt_buf;
460} 482}
461 483
462static void __init free_event_buffer(struct amd_iommu *iommu) 484static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -596,32 +618,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
596 p += sizeof(struct ivhd_header); 618 p += sizeof(struct ivhd_header);
597 end += h->length; 619 end += h->length;
598 620
621
599 while (p < end) { 622 while (p < end) {
600 e = (struct ivhd_entry *)p; 623 e = (struct ivhd_entry *)p;
601 switch (e->type) { 624 switch (e->type) {
602 case IVHD_DEV_ALL: 625 case IVHD_DEV_ALL:
626
627 DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
628 " last device %02x:%02x.%x flags: %02x\n",
629 PCI_BUS(iommu->first_device),
630 PCI_SLOT(iommu->first_device),
631 PCI_FUNC(iommu->first_device),
632 PCI_BUS(iommu->last_device),
633 PCI_SLOT(iommu->last_device),
634 PCI_FUNC(iommu->last_device),
635 e->flags);
636
603 for (dev_i = iommu->first_device; 637 for (dev_i = iommu->first_device;
604 dev_i <= iommu->last_device; ++dev_i) 638 dev_i <= iommu->last_device; ++dev_i)
605 set_dev_entry_from_acpi(iommu, dev_i, 639 set_dev_entry_from_acpi(iommu, dev_i,
606 e->flags, 0); 640 e->flags, 0);
607 break; 641 break;
608 case IVHD_DEV_SELECT: 642 case IVHD_DEV_SELECT:
643
644 DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
645 "flags: %02x\n",
646 PCI_BUS(e->devid),
647 PCI_SLOT(e->devid),
648 PCI_FUNC(e->devid),
649 e->flags);
650
609 devid = e->devid; 651 devid = e->devid;
610 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 652 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
611 break; 653 break;
612 case IVHD_DEV_SELECT_RANGE_START: 654 case IVHD_DEV_SELECT_RANGE_START:
655
656 DUMP_printk(" DEV_SELECT_RANGE_START\t "
657 "devid: %02x:%02x.%x flags: %02x\n",
658 PCI_BUS(e->devid),
659 PCI_SLOT(e->devid),
660 PCI_FUNC(e->devid),
661 e->flags);
662
613 devid_start = e->devid; 663 devid_start = e->devid;
614 flags = e->flags; 664 flags = e->flags;
615 ext_flags = 0; 665 ext_flags = 0;
616 alias = false; 666 alias = false;
617 break; 667 break;
618 case IVHD_DEV_ALIAS: 668 case IVHD_DEV_ALIAS:
669
670 DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
671 "flags: %02x devid_to: %02x:%02x.%x\n",
672 PCI_BUS(e->devid),
673 PCI_SLOT(e->devid),
674 PCI_FUNC(e->devid),
675 e->flags,
676 PCI_BUS(e->ext >> 8),
677 PCI_SLOT(e->ext >> 8),
678 PCI_FUNC(e->ext >> 8));
679
619 devid = e->devid; 680 devid = e->devid;
620 devid_to = e->ext >> 8; 681 devid_to = e->ext >> 8;
621 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 682 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
622 amd_iommu_alias_table[devid] = devid_to; 683 amd_iommu_alias_table[devid] = devid_to;
623 break; 684 break;
624 case IVHD_DEV_ALIAS_RANGE: 685 case IVHD_DEV_ALIAS_RANGE:
686
687 DUMP_printk(" DEV_ALIAS_RANGE\t\t "
688 "devid: %02x:%02x.%x flags: %02x "
689 "devid_to: %02x:%02x.%x\n",
690 PCI_BUS(e->devid),
691 PCI_SLOT(e->devid),
692 PCI_FUNC(e->devid),
693 e->flags,
694 PCI_BUS(e->ext >> 8),
695 PCI_SLOT(e->ext >> 8),
696 PCI_FUNC(e->ext >> 8));
697
625 devid_start = e->devid; 698 devid_start = e->devid;
626 flags = e->flags; 699 flags = e->flags;
627 devid_to = e->ext >> 8; 700 devid_to = e->ext >> 8;
@@ -629,17 +702,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
629 alias = true; 702 alias = true;
630 break; 703 break;
631 case IVHD_DEV_EXT_SELECT: 704 case IVHD_DEV_EXT_SELECT:
705
706 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
707 "flags: %02x ext: %08x\n",
708 PCI_BUS(e->devid),
709 PCI_SLOT(e->devid),
710 PCI_FUNC(e->devid),
711 e->flags, e->ext);
712
632 devid = e->devid; 713 devid = e->devid;
633 set_dev_entry_from_acpi(iommu, devid, e->flags, 714 set_dev_entry_from_acpi(iommu, devid, e->flags,
634 e->ext); 715 e->ext);
635 break; 716 break;
636 case IVHD_DEV_EXT_SELECT_RANGE: 717 case IVHD_DEV_EXT_SELECT_RANGE:
718
719 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
720 "%02x:%02x.%x flags: %02x ext: %08x\n",
721 PCI_BUS(e->devid),
722 PCI_SLOT(e->devid),
723 PCI_FUNC(e->devid),
724 e->flags, e->ext);
725
637 devid_start = e->devid; 726 devid_start = e->devid;
638 flags = e->flags; 727 flags = e->flags;
639 ext_flags = e->ext; 728 ext_flags = e->ext;
640 alias = false; 729 alias = false;
641 break; 730 break;
642 case IVHD_DEV_RANGE_END: 731 case IVHD_DEV_RANGE_END:
732
733 DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
734 PCI_BUS(e->devid),
735 PCI_SLOT(e->devid),
736 PCI_FUNC(e->devid));
737
643 devid = e->devid; 738 devid = e->devid;
644 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 739 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
645 if (alias) 740 if (alias)
@@ -679,7 +774,7 @@ static void __init free_iommu_all(void)
679{ 774{
680 struct amd_iommu *iommu, *next; 775 struct amd_iommu *iommu, *next;
681 776
682 list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { 777 for_each_iommu_safe(iommu, next) {
683 list_del(&iommu->list); 778 list_del(&iommu->list);
684 free_iommu_one(iommu); 779 free_iommu_one(iommu);
685 kfree(iommu); 780 kfree(iommu);
@@ -710,7 +805,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
710 if (!iommu->mmio_base) 805 if (!iommu->mmio_base)
711 return -ENOMEM; 806 return -ENOMEM;
712 807
713 iommu_set_device_table(iommu);
714 iommu->cmd_buf = alloc_command_buffer(iommu); 808 iommu->cmd_buf = alloc_command_buffer(iommu);
715 if (!iommu->cmd_buf) 809 if (!iommu->cmd_buf)
716 return -ENOMEM; 810 return -ENOMEM;
@@ -746,6 +840,15 @@ static int __init init_iommu_all(struct acpi_table_header *table)
746 h = (struct ivhd_header *)p; 840 h = (struct ivhd_header *)p;
747 switch (*p) { 841 switch (*p) {
748 case ACPI_IVHD_TYPE: 842 case ACPI_IVHD_TYPE:
843
844 DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
845 "seg: %d flags: %01x info %04x\n",
846 PCI_BUS(h->devid), PCI_SLOT(h->devid),
847 PCI_FUNC(h->devid), h->cap_ptr,
848 h->pci_seg, h->flags, h->info);
849 DUMP_printk(" mmio-addr: %016llx\n",
850 h->mmio_phys);
851
749 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 852 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
750 if (iommu == NULL) 853 if (iommu == NULL)
751 return -ENOMEM; 854 return -ENOMEM;
@@ -773,56 +876,9 @@ static int __init init_iommu_all(struct acpi_table_header *table)
773 * 876 *
774 ****************************************************************************/ 877 ****************************************************************************/
775 878
776static int __init iommu_setup_msix(struct amd_iommu *iommu)
777{
778 struct amd_iommu *curr;
779 struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
780 int nvec = 0, i;
781
782 list_for_each_entry(curr, &amd_iommu_list, list) {
783 if (curr->dev == iommu->dev) {
784 entries[nvec].entry = curr->evt_msi_num;
785 entries[nvec].vector = 0;
786 curr->int_enabled = true;
787 nvec++;
788 }
789 }
790
791 if (pci_enable_msix(iommu->dev, entries, nvec)) {
792 pci_disable_msix(iommu->dev);
793 return 1;
794 }
795
796 for (i = 0; i < nvec; ++i) {
797 int r = request_irq(entries->vector, amd_iommu_int_handler,
798 IRQF_SAMPLE_RANDOM,
799 "AMD IOMMU",
800 NULL);
801 if (r)
802 goto out_free;
803 }
804
805 return 0;
806
807out_free:
808 for (i -= 1; i >= 0; --i)
809 free_irq(entries->vector, NULL);
810
811 pci_disable_msix(iommu->dev);
812
813 return 1;
814}
815
816static int __init iommu_setup_msi(struct amd_iommu *iommu) 879static int __init iommu_setup_msi(struct amd_iommu *iommu)
817{ 880{
818 int r; 881 int r;
819 struct amd_iommu *curr;
820
821 list_for_each_entry(curr, &amd_iommu_list, list) {
822 if (curr->dev == iommu->dev)
823 curr->int_enabled = true;
824 }
825
826 882
827 if (pci_enable_msi(iommu->dev)) 883 if (pci_enable_msi(iommu->dev))
828 return 1; 884 return 1;
@@ -837,17 +893,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
837 return 1; 893 return 1;
838 } 894 }
839 895
896 iommu->int_enabled = true;
897 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
898
840 return 0; 899 return 0;
841} 900}
842 901
843static int __init iommu_init_msi(struct amd_iommu *iommu) 902static int iommu_init_msi(struct amd_iommu *iommu)
844{ 903{
845 if (iommu->int_enabled) 904 if (iommu->int_enabled)
846 return 0; 905 return 0;
847 906
848 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) 907 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
849 return iommu_setup_msix(iommu);
850 else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
851 return iommu_setup_msi(iommu); 908 return iommu_setup_msi(iommu);
852 909
853 return 1; 910 return 1;
@@ -899,6 +956,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
899static int __init init_unity_map_range(struct ivmd_header *m) 956static int __init init_unity_map_range(struct ivmd_header *m)
900{ 957{
901 struct unity_map_entry *e = 0; 958 struct unity_map_entry *e = 0;
959 char *s;
902 960
903 e = kzalloc(sizeof(*e), GFP_KERNEL); 961 e = kzalloc(sizeof(*e), GFP_KERNEL);
904 if (e == NULL) 962 if (e == NULL)
@@ -906,14 +964,19 @@ static int __init init_unity_map_range(struct ivmd_header *m)
906 964
907 switch (m->type) { 965 switch (m->type) {
908 default: 966 default:
967 kfree(e);
968 return 0;
909 case ACPI_IVMD_TYPE: 969 case ACPI_IVMD_TYPE:
970 s = "IVMD_TYPEi\t\t\t";
910 e->devid_start = e->devid_end = m->devid; 971 e->devid_start = e->devid_end = m->devid;
911 break; 972 break;
912 case ACPI_IVMD_TYPE_ALL: 973 case ACPI_IVMD_TYPE_ALL:
974 s = "IVMD_TYPE_ALL\t\t";
913 e->devid_start = 0; 975 e->devid_start = 0;
914 e->devid_end = amd_iommu_last_bdf; 976 e->devid_end = amd_iommu_last_bdf;
915 break; 977 break;
916 case ACPI_IVMD_TYPE_RANGE: 978 case ACPI_IVMD_TYPE_RANGE:
979 s = "IVMD_TYPE_RANGE\t\t";
917 e->devid_start = m->devid; 980 e->devid_start = m->devid;
918 e->devid_end = m->aux; 981 e->devid_end = m->aux;
919 break; 982 break;
@@ -922,6 +985,13 @@ static int __init init_unity_map_range(struct ivmd_header *m)
922 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 985 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
923 e->prot = m->flags >> 1; 986 e->prot = m->flags >> 1;
924 987
988 DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
989 " range_start: %016llx range_end: %016llx flags: %x\n", s,
990 PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
991 PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
992 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
993 e->address_start, e->address_end, m->flags);
994
925 list_add_tail(&e->list, &amd_iommu_unity_map); 995 list_add_tail(&e->list, &amd_iommu_unity_map);
926 996
927 return 0; 997 return 0;
@@ -967,18 +1037,28 @@ static void init_device_table(void)
967 * This function finally enables all IOMMUs found in the system after 1037 * This function finally enables all IOMMUs found in the system after
968 * they have been initialized 1038 * they have been initialized
969 */ 1039 */
970static void __init enable_iommus(void) 1040static void enable_iommus(void)
971{ 1041{
972 struct amd_iommu *iommu; 1042 struct amd_iommu *iommu;
973 1043
974 list_for_each_entry(iommu, &amd_iommu_list, list) { 1044 for_each_iommu(iommu) {
1045 iommu_set_device_table(iommu);
1046 iommu_enable_command_buffer(iommu);
1047 iommu_enable_event_buffer(iommu);
975 iommu_set_exclusion_range(iommu); 1048 iommu_set_exclusion_range(iommu);
976 iommu_init_msi(iommu); 1049 iommu_init_msi(iommu);
977 iommu_enable_event_logging(iommu);
978 iommu_enable(iommu); 1050 iommu_enable(iommu);
979 } 1051 }
980} 1052}
981 1053
1054static void disable_iommus(void)
1055{
1056 struct amd_iommu *iommu;
1057
1058 for_each_iommu(iommu)
1059 iommu_disable(iommu);
1060}
1061
982/* 1062/*
983 * Suspend/Resume support 1063 * Suspend/Resume support
984 * disable suspend until real resume implemented 1064 * disable suspend until real resume implemented
@@ -986,12 +1066,31 @@ static void __init enable_iommus(void)
986 1066
987static int amd_iommu_resume(struct sys_device *dev) 1067static int amd_iommu_resume(struct sys_device *dev)
988{ 1068{
1069 /*
1070 * Disable IOMMUs before reprogramming the hardware registers.
1071 * IOMMU is still enabled from the resume kernel.
1072 */
1073 disable_iommus();
1074
1075 /* re-load the hardware */
1076 enable_iommus();
1077
1078 /*
1079 * we have to flush after the IOMMUs are enabled because a
1080 * disabled IOMMU will never execute the commands we send
1081 */
1082 amd_iommu_flush_all_domains();
1083 amd_iommu_flush_all_devices();
1084
989 return 0; 1085 return 0;
990} 1086}
991 1087
992static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) 1088static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
993{ 1089{
994 return -EINVAL; 1090 /* disable IOMMUs to go out of the way for BIOS */
1091 disable_iommus();
1092
1093 return 0;
995} 1094}
996 1095
997static struct sysdev_class amd_iommu_sysdev_class = { 1096static struct sysdev_class amd_iommu_sysdev_class = {
@@ -1137,9 +1236,6 @@ int __init amd_iommu_init(void)
1137 1236
1138 enable_iommus(); 1237 enable_iommus();
1139 1238
1140 printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
1141 (1 << (amd_iommu_aperture_order-20)));
1142
1143 printk(KERN_INFO "AMD IOMMU: device isolation "); 1239 printk(KERN_INFO "AMD IOMMU: device isolation ");
1144 if (amd_iommu_isolate) 1240 if (amd_iommu_isolate)
1145 printk("enabled\n"); 1241 printk("enabled\n");
@@ -1211,6 +1307,13 @@ void __init amd_iommu_detect(void)
1211 * 1307 *
1212 ****************************************************************************/ 1308 ****************************************************************************/
1213 1309
1310static int __init parse_amd_iommu_dump(char *str)
1311{
1312 amd_iommu_dump = true;
1313
1314 return 1;
1315}
1316
1214static int __init parse_amd_iommu_options(char *str) 1317static int __init parse_amd_iommu_options(char *str)
1215{ 1318{
1216 for (; *str; ++str) { 1319 for (; *str; ++str) {
@@ -1225,15 +1328,5 @@ static int __init parse_amd_iommu_options(char *str)
1225 return 1; 1328 return 1;
1226} 1329}
1227 1330
1228static int __init parse_amd_iommu_size_options(char *str) 1331__setup("amd_iommu_dump", parse_amd_iommu_dump);
1229{
1230 unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
1231
1232 if ((order > 24) && (order < 31))
1233 amd_iommu_aperture_order = order;
1234
1235 return 1;
1236}
1237
1238__setup("amd_iommu=", parse_amd_iommu_options); 1332__setup("amd_iommu=", parse_amd_iommu_options);
1239__setup("amd_iommu_size=", parse_amd_iommu_size_options);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 755c21e906f3..971a3bec47a8 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = {
186 186
187static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; 187static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
188 188
189/* enable this to stress test the chip's TCE cache */
190#ifdef CONFIG_IOMMU_DEBUG
191static int debugging = 1;
192
193static inline unsigned long verify_bit_range(unsigned long* bitmap,
194 int expected, unsigned long start, unsigned long end)
195{
196 unsigned long idx = start;
197
198 BUG_ON(start >= end);
199
200 while (idx < end) {
201 if (!!test_bit(idx, bitmap) != expected)
202 return idx;
203 ++idx;
204 }
205
206 /* all bits have the expected value */
207 return ~0UL;
208}
209#else /* debugging is disabled */
210static int debugging;
211
212static inline unsigned long verify_bit_range(unsigned long* bitmap,
213 int expected, unsigned long start, unsigned long end)
214{
215 return ~0UL;
216}
217
218#endif /* CONFIG_IOMMU_DEBUG */
219
220static inline int translation_enabled(struct iommu_table *tbl) 189static inline int translation_enabled(struct iommu_table *tbl)
221{ 190{
222 /* only PHBs with translation enabled have an IOMMU table */ 191 /* only PHBs with translation enabled have an IOMMU table */
@@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
228{ 197{
229 unsigned long index; 198 unsigned long index;
230 unsigned long end; 199 unsigned long end;
231 unsigned long badbit;
232 unsigned long flags; 200 unsigned long flags;
233 201
234 index = start_addr >> PAGE_SHIFT; 202 index = start_addr >> PAGE_SHIFT;
@@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
243 211
244 spin_lock_irqsave(&tbl->it_lock, flags); 212 spin_lock_irqsave(&tbl->it_lock, flags);
245 213
246 badbit = verify_bit_range(tbl->it_map, 0, index, end);
247 if (badbit != ~0UL) {
248 if (printk_ratelimit())
249 printk(KERN_ERR "Calgary: entry already allocated at "
250 "0x%lx tbl %p dma 0x%lx npages %u\n",
251 badbit, tbl, start_addr, npages);
252 }
253
254 iommu_area_reserve(tbl->it_map, index, npages); 214 iommu_area_reserve(tbl->it_map, index, npages);
255 215
256 spin_unlock_irqrestore(&tbl->it_lock, flags); 216 spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
326 unsigned int npages) 286 unsigned int npages)
327{ 287{
328 unsigned long entry; 288 unsigned long entry;
329 unsigned long badbit;
330 unsigned long badend; 289 unsigned long badend;
331 unsigned long flags; 290 unsigned long flags;
332 291
@@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
346 305
347 spin_lock_irqsave(&tbl->it_lock, flags); 306 spin_lock_irqsave(&tbl->it_lock, flags);
348 307
349 badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
350 if (badbit != ~0UL) {
351 if (printk_ratelimit())
352 printk(KERN_ERR "Calgary: bit is off at 0x%lx "
353 "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
354 badbit, tbl, dma_addr, entry, npages);
355 }
356
357 iommu_area_free(tbl->it_map, entry, npages); 308 iommu_area_free(tbl->it_map, entry, npages);
358 309
359 spin_unlock_irqrestore(&tbl->it_lock, flags); 310 spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -1488,9 +1439,8 @@ void __init detect_calgary(void)
1488 iommu_detected = 1; 1439 iommu_detected = 1;
1489 calgary_detected = 1; 1440 calgary_detected = 1;
1490 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); 1441 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
1491 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " 1442 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
1492 "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, 1443 specified_table_size);
1493 debugging ? "enabled" : "disabled");
1494 1444
1495 /* swiotlb for devices that aren't behind the Calgary. */ 1445 /* swiotlb for devices that aren't behind the Calgary. */
1496 if (max_pfn > MAX_DMA32_PFN) 1446 if (max_pfn > MAX_DMA32_PFN)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b284b58c035c..cfd9f9063896 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -144,48 +144,21 @@ static void flush_gart(void)
144} 144}
145 145
146#ifdef CONFIG_IOMMU_LEAK 146#ifdef CONFIG_IOMMU_LEAK
147
148#define SET_LEAK(x) \
149 do { \
150 if (iommu_leak_tab) \
151 iommu_leak_tab[x] = __builtin_return_address(0);\
152 } while (0)
153
154#define CLEAR_LEAK(x) \
155 do { \
156 if (iommu_leak_tab) \
157 iommu_leak_tab[x] = NULL; \
158 } while (0)
159
160/* Debugging aid for drivers that don't free their IOMMU tables */ 147/* Debugging aid for drivers that don't free their IOMMU tables */
161static void **iommu_leak_tab;
162static int leak_trace; 148static int leak_trace;
163static int iommu_leak_pages = 20; 149static int iommu_leak_pages = 20;
164 150
165static void dump_leak(void) 151static void dump_leak(void)
166{ 152{
167 int i;
168 static int dump; 153 static int dump;
169 154
170 if (dump || !iommu_leak_tab) 155 if (dump)
171 return; 156 return;
172 dump = 1; 157 dump = 1;
173 show_stack(NULL, NULL);
174 158
175 /* Very crude. dump some from the end of the table too */ 159 show_stack(NULL, NULL);
176 printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", 160 debug_dma_dump_mappings(NULL);
177 iommu_leak_pages);
178 for (i = 0; i < iommu_leak_pages; i += 2) {
179 printk(KERN_DEBUG "%lu: ", iommu_pages-i);
180 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
181 0);
182 printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
183 }
184 printk(KERN_DEBUG "\n");
185} 161}
186#else
187# define SET_LEAK(x)
188# define CLEAR_LEAK(x)
189#endif 162#endif
190 163
191static void iommu_full(struct device *dev, size_t size, int dir) 164static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
248 221
249 for (i = 0; i < npages; i++) { 222 for (i = 0; i < npages; i++) {
250 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); 223 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
251 SET_LEAK(iommu_page + i);
252 phys_mem += PAGE_SIZE; 224 phys_mem += PAGE_SIZE;
253 } 225 }
254 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); 226 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
294 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 266 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
295 for (i = 0; i < npages; i++) { 267 for (i = 0; i < npages; i++) {
296 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 268 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
297 CLEAR_LEAK(iommu_page + i);
298 } 269 }
299 free_iommu(iommu_page, npages); 270 free_iommu(iommu_page, npages);
300} 271}
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
377 pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); 348 pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
378 while (pages--) { 349 while (pages--) {
379 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); 350 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
380 SET_LEAK(iommu_page);
381 addr += PAGE_SIZE; 351 addr += PAGE_SIZE;
382 iommu_page++; 352 iommu_page++;
383 } 353 }
@@ -688,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
688 658
689 agp_gatt_table = gatt; 659 agp_gatt_table = gatt;
690 660
691 enable_gart_translations();
692
693 error = sysdev_class_register(&gart_sysdev_class); 661 error = sysdev_class_register(&gart_sysdev_class);
694 if (!error) 662 if (!error)
695 error = sysdev_register(&device_gart); 663 error = sysdev_register(&device_gart);
@@ -801,11 +769,12 @@ void __init gart_iommu_init(void)
801 769
802#ifdef CONFIG_IOMMU_LEAK 770#ifdef CONFIG_IOMMU_LEAK
803 if (leak_trace) { 771 if (leak_trace) {
804 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 772 int ret;
805 get_order(iommu_pages*sizeof(void *))); 773
806 if (!iommu_leak_tab) 774 ret = dma_debug_resize_entries(iommu_pages);
775 if (ret)
807 printk(KERN_DEBUG 776 printk(KERN_DEBUG
808 "PCI-DMA: Cannot allocate leak trace area\n"); 777 "PCI-DMA: Cannot trace all the entries\n");
809 } 778 }
810#endif 779#endif
811 780
@@ -845,6 +814,14 @@ void __init gart_iommu_init(void)
845 * the pages as Not-Present: 814 * the pages as Not-Present:
846 */ 815 */
847 wbinvd(); 816 wbinvd();
817
818 /*
819 * Now all caches are flushed and we can safely enable
820 * GART hardware. Doing it early leaves the possibility
821 * of stale cache entries that can lead to GART PTE
822 * errors.
823 */
824 enable_gart_translations();
848 825
849 /* 826 /*
850 * Try to workaround a bug (thanks to BenH): 827 * Try to workaround a bug (thanks to BenH):
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 221a3853e268..a1712f2b50f1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
28 return paddr; 28 return paddr;
29} 29}
30 30
31phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) 31phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
32{ 32{
33 return baddr; 33 return baddr;
34} 34}
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 28d53cb7b5a2..171ad8aedc83 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);
32 32
33extern void dma_debug_init(u32 num_entries); 33extern void dma_debug_init(u32 num_entries);
34 34
35extern int dma_debug_resize_entries(u32 num_entries);
36
35extern void debug_dma_map_page(struct device *dev, struct page *page, 37extern void debug_dma_map_page(struct device *dev, struct page *page,
36 size_t offset, size_t size, 38 size_t offset, size_t size,
37 int direction, dma_addr_t dma_addr, 39 int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
91{ 93{
92} 94}
93 95
96static inline int dma_debug_resize_entries(u32 num_entries)
97{
98 return 0;
99}
100
94static inline void debug_dma_map_page(struct device *dev, struct page *page, 101static inline void debug_dma_map_page(struct device *dev, struct page *page,
95 size_t offset, size_t size, 102 size_t offset, size_t size,
96 int direction, dma_addr_t dma_addr, 103 int direction, dma_addr_t dma_addr,
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ac9ff54f7cb3..cb1a6631b8f4 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
29 29
30extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, 30extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
31 phys_addr_t address); 31 phys_addr_t address);
32extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); 32extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
33 dma_addr_t address);
33 34
34extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); 35extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
35 36
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 69da09a085a1..ad65fc0317d9 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -23,9 +23,11 @@
23#include <linux/dma-debug.h> 23#include <linux/dma-debug.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/uaccess.h>
26#include <linux/device.h> 27#include <linux/device.h>
27#include <linux/types.h> 28#include <linux/types.h>
28#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/ctype.h>
29#include <linux/list.h> 31#include <linux/list.h>
30#include <linux/slab.h> 32#include <linux/slab.h>
31 33
@@ -85,6 +87,7 @@ static u32 show_num_errors = 1;
85 87
86static u32 num_free_entries; 88static u32 num_free_entries;
87static u32 min_free_entries; 89static u32 min_free_entries;
90static u32 nr_total_entries;
88 91
89/* number of preallocated entries requested by kernel cmdline */ 92/* number of preallocated entries requested by kernel cmdline */
90static u32 req_entries; 93static u32 req_entries;
@@ -97,6 +100,16 @@ static struct dentry *show_all_errors_dent __read_mostly;
97static struct dentry *show_num_errors_dent __read_mostly; 100static struct dentry *show_num_errors_dent __read_mostly;
98static struct dentry *num_free_entries_dent __read_mostly; 101static struct dentry *num_free_entries_dent __read_mostly;
99static struct dentry *min_free_entries_dent __read_mostly; 102static struct dentry *min_free_entries_dent __read_mostly;
103static struct dentry *filter_dent __read_mostly;
104
105/* per-driver filter related state */
106
107#define NAME_MAX_LEN 64
108
109static char current_driver_name[NAME_MAX_LEN] __read_mostly;
110static struct device_driver *current_driver __read_mostly;
111
112static DEFINE_RWLOCK(driver_name_lock);
100 113
101static const char *type2name[4] = { "single", "page", 114static const char *type2name[4] = { "single", "page",
102 "scather-gather", "coherent" }; 115 "scather-gather", "coherent" };
@@ -104,6 +117,11 @@ static const char *type2name[4] = { "single", "page",
104static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", 117static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
105 "DMA_FROM_DEVICE", "DMA_NONE" }; 118 "DMA_FROM_DEVICE", "DMA_NONE" };
106 119
120/* little merge helper - remove it after the merge window */
121#ifndef BUS_NOTIFY_UNBOUND_DRIVER
122#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
123#endif
124
107/* 125/*
108 * The access to some variables in this macro is racy. We can't use atomic_t 126 * The access to some variables in this macro is racy. We can't use atomic_t
109 * here because all these variables are exported to debugfs. Some of them even 127 * here because all these variables are exported to debugfs. Some of them even
@@ -121,15 +139,54 @@ static inline void dump_entry_trace(struct dma_debug_entry *entry)
121{ 139{
122#ifdef CONFIG_STACKTRACE 140#ifdef CONFIG_STACKTRACE
123 if (entry) { 141 if (entry) {
124 printk(KERN_WARNING "Mapped at:\n"); 142 pr_warning("Mapped at:\n");
125 print_stack_trace(&entry->stacktrace, 0); 143 print_stack_trace(&entry->stacktrace, 0);
126 } 144 }
127#endif 145#endif
128} 146}
129 147
148static bool driver_filter(struct device *dev)
149{
150 struct device_driver *drv;
151 unsigned long flags;
152 bool ret;
153
154 /* driver filter off */
155 if (likely(!current_driver_name[0]))
156 return true;
157
158 /* driver filter on and initialized */
159 if (current_driver && dev->driver == current_driver)
160 return true;
161
162 if (current_driver || !current_driver_name[0])
163 return false;
164
165 /* driver filter on but not yet initialized */
166 drv = get_driver(dev->driver);
167 if (!drv)
168 return false;
169
170 /* lock to protect against change of current_driver_name */
171 read_lock_irqsave(&driver_name_lock, flags);
172
173 ret = false;
174 if (drv->name &&
175 strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
176 current_driver = drv;
177 ret = true;
178 }
179
180 read_unlock_irqrestore(&driver_name_lock, flags);
181 put_driver(drv);
182
183 return ret;
184}
185
130#define err_printk(dev, entry, format, arg...) do { \ 186#define err_printk(dev, entry, format, arg...) do { \
131 error_count += 1; \ 187 error_count += 1; \
132 if (show_all_errors || show_num_errors > 0) { \ 188 if (driver_filter(dev) && \
189 (show_all_errors || show_num_errors > 0)) { \
133 WARN(1, "%s %s: " format, \ 190 WARN(1, "%s %s: " format, \
134 dev_driver_string(dev), \ 191 dev_driver_string(dev), \
135 dev_name(dev) , ## arg); \ 192 dev_name(dev) , ## arg); \
@@ -185,15 +242,50 @@ static void put_hash_bucket(struct hash_bucket *bucket,
185static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, 242static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
186 struct dma_debug_entry *ref) 243 struct dma_debug_entry *ref)
187{ 244{
188 struct dma_debug_entry *entry; 245 struct dma_debug_entry *entry, *ret = NULL;
246 int matches = 0, match_lvl, last_lvl = 0;
189 247
190 list_for_each_entry(entry, &bucket->list, list) { 248 list_for_each_entry(entry, &bucket->list, list) {
191 if ((entry->dev_addr == ref->dev_addr) && 249 if ((entry->dev_addr != ref->dev_addr) ||
192 (entry->dev == ref->dev)) 250 (entry->dev != ref->dev))
251 continue;
252
253 /*
254 * Some drivers map the same physical address multiple
255 * times. Without a hardware IOMMU this results in the
256 * same device addresses being put into the dma-debug
257 * hash multiple times too. This can result in false
258 * positives being reported. Therfore we implement a
259 * best-fit algorithm here which returns the entry from
260 * the hash which fits best to the reference value
261 * instead of the first-fit.
262 */
263 matches += 1;
264 match_lvl = 0;
265 entry->size == ref->size ? ++match_lvl : match_lvl;
266 entry->type == ref->type ? ++match_lvl : match_lvl;
267 entry->direction == ref->direction ? ++match_lvl : match_lvl;
268
269 if (match_lvl == 3) {
270 /* perfect-fit - return the result */
193 return entry; 271 return entry;
272 } else if (match_lvl > last_lvl) {
273 /*
274 * We found an entry that fits better then the
275 * previous one
276 */
277 last_lvl = match_lvl;
278 ret = entry;
279 }
194 } 280 }
195 281
196 return NULL; 282 /*
283 * If we have multiple matches but no perfect-fit, just return
284 * NULL.
285 */
286 ret = (matches == 1) ? ret : NULL;
287
288 return ret;
197} 289}
198 290
199/* 291/*
@@ -257,6 +349,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
257 put_hash_bucket(bucket, &flags); 349 put_hash_bucket(bucket, &flags);
258} 350}
259 351
352static struct dma_debug_entry *__dma_entry_alloc(void)
353{
354 struct dma_debug_entry *entry;
355
356 entry = list_entry(free_entries.next, struct dma_debug_entry, list);
357 list_del(&entry->list);
358 memset(entry, 0, sizeof(*entry));
359
360 num_free_entries -= 1;
361 if (num_free_entries < min_free_entries)
362 min_free_entries = num_free_entries;
363
364 return entry;
365}
366
260/* struct dma_entry allocator 367/* struct dma_entry allocator
261 * 368 *
262 * The next two functions implement the allocator for 369 * The next two functions implement the allocator for
@@ -270,15 +377,12 @@ static struct dma_debug_entry *dma_entry_alloc(void)
270 spin_lock_irqsave(&free_entries_lock, flags); 377 spin_lock_irqsave(&free_entries_lock, flags);
271 378
272 if (list_empty(&free_entries)) { 379 if (list_empty(&free_entries)) {
273 printk(KERN_ERR "DMA-API: debugging out of memory " 380 pr_err("DMA-API: debugging out of memory - disabling\n");
274 "- disabling\n");
275 global_disable = true; 381 global_disable = true;
276 goto out; 382 goto out;
277 } 383 }
278 384
279 entry = list_entry(free_entries.next, struct dma_debug_entry, list); 385 entry = __dma_entry_alloc();
280 list_del(&entry->list);
281 memset(entry, 0, sizeof(*entry));
282 386
283#ifdef CONFIG_STACKTRACE 387#ifdef CONFIG_STACKTRACE
284 entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; 388 entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +390,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
286 entry->stacktrace.skip = 2; 390 entry->stacktrace.skip = 2;
287 save_stack_trace(&entry->stacktrace); 391 save_stack_trace(&entry->stacktrace);
288#endif 392#endif
289 num_free_entries -= 1;
290 if (num_free_entries < min_free_entries)
291 min_free_entries = num_free_entries;
292 393
293out: 394out:
294 spin_unlock_irqrestore(&free_entries_lock, flags); 395 spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +411,53 @@ static void dma_entry_free(struct dma_debug_entry *entry)
310 spin_unlock_irqrestore(&free_entries_lock, flags); 411 spin_unlock_irqrestore(&free_entries_lock, flags);
311} 412}
312 413
414int dma_debug_resize_entries(u32 num_entries)
415{
416 int i, delta, ret = 0;
417 unsigned long flags;
418 struct dma_debug_entry *entry;
419 LIST_HEAD(tmp);
420
421 spin_lock_irqsave(&free_entries_lock, flags);
422
423 if (nr_total_entries < num_entries) {
424 delta = num_entries - nr_total_entries;
425
426 spin_unlock_irqrestore(&free_entries_lock, flags);
427
428 for (i = 0; i < delta; i++) {
429 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
430 if (!entry)
431 break;
432
433 list_add_tail(&entry->list, &tmp);
434 }
435
436 spin_lock_irqsave(&free_entries_lock, flags);
437
438 list_splice(&tmp, &free_entries);
439 nr_total_entries += i;
440 num_free_entries += i;
441 } else {
442 delta = nr_total_entries - num_entries;
443
444 for (i = 0; i < delta && !list_empty(&free_entries); i++) {
445 entry = __dma_entry_alloc();
446 kfree(entry);
447 }
448
449 nr_total_entries -= i;
450 }
451
452 if (nr_total_entries != num_entries)
453 ret = 1;
454
455 spin_unlock_irqrestore(&free_entries_lock, flags);
456
457 return ret;
458}
459EXPORT_SYMBOL(dma_debug_resize_entries);
460
313/* 461/*
314 * DMA-API debugging init code 462 * DMA-API debugging init code
315 * 463 *
@@ -334,8 +482,7 @@ static int prealloc_memory(u32 num_entries)
334 num_free_entries = num_entries; 482 num_free_entries = num_entries;
335 min_free_entries = num_entries; 483 min_free_entries = num_entries;
336 484
337 printk(KERN_INFO "DMA-API: preallocated %d debug entries\n", 485 pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
338 num_entries);
339 486
340 return 0; 487 return 0;
341 488
@@ -349,11 +496,102 @@ out_err:
349 return -ENOMEM; 496 return -ENOMEM;
350} 497}
351 498
499static ssize_t filter_read(struct file *file, char __user *user_buf,
500 size_t count, loff_t *ppos)
501{
502 char buf[NAME_MAX_LEN + 1];
503 unsigned long flags;
504 int len;
505
506 if (!current_driver_name[0])
507 return 0;
508
509 /*
510 * We can't copy to userspace directly because current_driver_name can
511 * only be read under the driver_name_lock with irqs disabled. So
512 * create a temporary copy first.
513 */
514 read_lock_irqsave(&driver_name_lock, flags);
515 len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
516 read_unlock_irqrestore(&driver_name_lock, flags);
517
518 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
519}
520
521static ssize_t filter_write(struct file *file, const char __user *userbuf,
522 size_t count, loff_t *ppos)
523{
524 char buf[NAME_MAX_LEN];
525 unsigned long flags;
526 size_t len;
527 int i;
528
529 /*
530 * We can't copy from userspace directly. Access to
531 * current_driver_name is protected with a write_lock with irqs
532 * disabled. Since copy_from_user can fault and may sleep we
533 * need to copy to temporary buffer first
534 */
535 len = min(count, (size_t)(NAME_MAX_LEN - 1));
536 if (copy_from_user(buf, userbuf, len))
537 return -EFAULT;
538
539 buf[len] = 0;
540
541 write_lock_irqsave(&driver_name_lock, flags);
542
543 /*
544 * Now handle the string we got from userspace very carefully.
545 * The rules are:
546 * - only use the first token we got
547 * - token delimiter is everything looking like a space
548 * character (' ', '\n', '\t' ...)
549 *
550 */
551 if (!isalnum(buf[0])) {
552 /*
553 * If the first character userspace gave us is not
554 * alphanumerical then assume the filter should be
555 * switched off.
556 */
557 if (current_driver_name[0])
558 pr_info("DMA-API: switching off dma-debug driver filter\n");
559 current_driver_name[0] = 0;
560 current_driver = NULL;
561 goto out_unlock;
562 }
563
564 /*
565 * Now parse out the first token and use it as the name for the
566 * driver to filter for.
567 */
568 for (i = 0; i < NAME_MAX_LEN; ++i) {
569 current_driver_name[i] = buf[i];
570 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
571 break;
572 }
573 current_driver_name[i] = 0;
574 current_driver = NULL;
575
576 pr_info("DMA-API: enable driver filter for driver [%s]\n",
577 current_driver_name);
578
579out_unlock:
580 write_unlock_irqrestore(&driver_name_lock, flags);
581
582 return count;
583}
584
585const struct file_operations filter_fops = {
586 .read = filter_read,
587 .write = filter_write,
588};
589
352static int dma_debug_fs_init(void) 590static int dma_debug_fs_init(void)
353{ 591{
354 dma_debug_dent = debugfs_create_dir("dma-api", NULL); 592 dma_debug_dent = debugfs_create_dir("dma-api", NULL);
355 if (!dma_debug_dent) { 593 if (!dma_debug_dent) {
356 printk(KERN_ERR "DMA-API: can not create debugfs directory\n"); 594 pr_err("DMA-API: can not create debugfs directory\n");
357 return -ENOMEM; 595 return -ENOMEM;
358 } 596 }
359 597
@@ -392,6 +630,11 @@ static int dma_debug_fs_init(void)
392 if (!min_free_entries_dent) 630 if (!min_free_entries_dent)
393 goto out_err; 631 goto out_err;
394 632
633 filter_dent = debugfs_create_file("driver_filter", 0644,
634 dma_debug_dent, NULL, &filter_fops);
635 if (!filter_dent)
636 goto out_err;
637
395 return 0; 638 return 0;
396 639
397out_err: 640out_err:
@@ -400,9 +643,64 @@ out_err:
400 return -ENOMEM; 643 return -ENOMEM;
401} 644}
402 645
646static int device_dma_allocations(struct device *dev)
647{
648 struct dma_debug_entry *entry;
649 unsigned long flags;
650 int count = 0, i;
651
652 local_irq_save(flags);
653
654 for (i = 0; i < HASH_SIZE; ++i) {
655 spin_lock(&dma_entry_hash[i].lock);
656 list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
657 if (entry->dev == dev)
658 count += 1;
659 }
660 spin_unlock(&dma_entry_hash[i].lock);
661 }
662
663 local_irq_restore(flags);
664
665 return count;
666}
667
668static int dma_debug_device_change(struct notifier_block *nb,
669 unsigned long action, void *data)
670{
671 struct device *dev = data;
672 int count;
673
674
675 switch (action) {
676 case BUS_NOTIFY_UNBOUND_DRIVER:
677 count = device_dma_allocations(dev);
678 if (count == 0)
679 break;
680 err_printk(dev, NULL, "DMA-API: device driver has pending "
681 "DMA allocations while released from device "
682 "[count=%d]\n", count);
683 break;
684 default:
685 break;
686 }
687
688 return 0;
689}
690
403void dma_debug_add_bus(struct bus_type *bus) 691void dma_debug_add_bus(struct bus_type *bus)
404{ 692{
405 /* FIXME: register notifier */ 693 struct notifier_block *nb;
694
695 nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
696 if (nb == NULL) {
697 pr_err("dma_debug_add_bus: out of memory\n");
698 return;
699 }
700
701 nb->notifier_call = dma_debug_device_change;
702
703 bus_register_notifier(bus, nb);
406} 704}
407 705
408/* 706/*
@@ -421,8 +719,7 @@ void dma_debug_init(u32 num_entries)
421 } 719 }
422 720
423 if (dma_debug_fs_init() != 0) { 721 if (dma_debug_fs_init() != 0) {
424 printk(KERN_ERR "DMA-API: error creating debugfs entries " 722 pr_err("DMA-API: error creating debugfs entries - disabling\n");
425 "- disabling\n");
426 global_disable = true; 723 global_disable = true;
427 724
428 return; 725 return;
@@ -432,14 +729,15 @@ void dma_debug_init(u32 num_entries)
432 num_entries = req_entries; 729 num_entries = req_entries;
433 730
434 if (prealloc_memory(num_entries) != 0) { 731 if (prealloc_memory(num_entries) != 0) {
435 printk(KERN_ERR "DMA-API: debugging out of memory error " 732 pr_err("DMA-API: debugging out of memory error - disabled\n");
436 "- disabled\n");
437 global_disable = true; 733 global_disable = true;
438 734
439 return; 735 return;
440 } 736 }
441 737
442 printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n"); 738 nr_total_entries = num_free_entries;
739
740 pr_info("DMA-API: debugging enabled by kernel config\n");
443} 741}
444 742
445static __init int dma_debug_cmdline(char *str) 743static __init int dma_debug_cmdline(char *str)
@@ -448,8 +746,7 @@ static __init int dma_debug_cmdline(char *str)
448 return -EINVAL; 746 return -EINVAL;
449 747
450 if (strncmp(str, "off", 3) == 0) { 748 if (strncmp(str, "off", 3) == 0) {
451 printk(KERN_INFO "DMA-API: debugging disabled on kernel " 749 pr_info("DMA-API: debugging disabled on kernel command line\n");
452 "command line\n");
453 global_disable = true; 750 global_disable = true;
454 } 751 }
455 752
@@ -723,15 +1020,15 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
723 entry->type = dma_debug_sg; 1020 entry->type = dma_debug_sg;
724 entry->dev = dev; 1021 entry->dev = dev;
725 entry->paddr = sg_phys(s); 1022 entry->paddr = sg_phys(s);
726 entry->size = s->length; 1023 entry->size = sg_dma_len(s);
727 entry->dev_addr = s->dma_address; 1024 entry->dev_addr = sg_dma_address(s);
728 entry->direction = direction; 1025 entry->direction = direction;
729 entry->sg_call_ents = nents; 1026 entry->sg_call_ents = nents;
730 entry->sg_mapped_ents = mapped_ents; 1027 entry->sg_mapped_ents = mapped_ents;
731 1028
732 if (!PageHighMem(sg_page(s))) { 1029 if (!PageHighMem(sg_page(s))) {
733 check_for_stack(dev, sg_virt(s)); 1030 check_for_stack(dev, sg_virt(s));
734 check_for_illegal_area(dev, sg_virt(s), s->length); 1031 check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
735 } 1032 }
736 1033
737 add_dma_entry(entry); 1034 add_dma_entry(entry);
@@ -739,13 +1036,33 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
739} 1036}
740EXPORT_SYMBOL(debug_dma_map_sg); 1037EXPORT_SYMBOL(debug_dma_map_sg);
741 1038
1039static int get_nr_mapped_entries(struct device *dev, struct scatterlist *s)
1040{
1041 struct dma_debug_entry *entry, ref;
1042 struct hash_bucket *bucket;
1043 unsigned long flags;
1044 int mapped_ents;
1045
1046 ref.dev = dev;
1047 ref.dev_addr = sg_dma_address(s);
1048 ref.size = sg_dma_len(s),
1049
1050 bucket = get_hash_bucket(&ref, &flags);
1051 entry = hash_bucket_find(bucket, &ref);
1052 mapped_ents = 0;
1053
1054 if (entry)
1055 mapped_ents = entry->sg_mapped_ents;
1056 put_hash_bucket(bucket, &flags);
1057
1058 return mapped_ents;
1059}
1060
742void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, 1061void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
743 int nelems, int dir) 1062 int nelems, int dir)
744{ 1063{
745 struct dma_debug_entry *entry;
746 struct scatterlist *s; 1064 struct scatterlist *s;
747 int mapped_ents = 0, i; 1065 int mapped_ents = 0, i;
748 unsigned long flags;
749 1066
750 if (unlikely(global_disable)) 1067 if (unlikely(global_disable))
751 return; 1068 return;
@@ -756,8 +1073,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
756 .type = dma_debug_sg, 1073 .type = dma_debug_sg,
757 .dev = dev, 1074 .dev = dev,
758 .paddr = sg_phys(s), 1075 .paddr = sg_phys(s),
759 .dev_addr = s->dma_address, 1076 .dev_addr = sg_dma_address(s),
760 .size = s->length, 1077 .size = sg_dma_len(s),
761 .direction = dir, 1078 .direction = dir,
762 .sg_call_ents = 0, 1079 .sg_call_ents = 0,
763 }; 1080 };
@@ -765,14 +1082,9 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
765 if (mapped_ents && i >= mapped_ents) 1082 if (mapped_ents && i >= mapped_ents)
766 break; 1083 break;
767 1084
768 if (mapped_ents == 0) { 1085 if (!i) {
769 struct hash_bucket *bucket;
770 ref.sg_call_ents = nelems; 1086 ref.sg_call_ents = nelems;
771 bucket = get_hash_bucket(&ref, &flags); 1087 mapped_ents = get_nr_mapped_entries(dev, s);
772 entry = hash_bucket_find(bucket, &ref);
773 if (entry)
774 mapped_ents = entry->sg_mapped_ents;
775 put_hash_bucket(bucket, &flags);
776 } 1088 }
777 1089
778 check_unmap(&ref); 1090 check_unmap(&ref);
@@ -874,14 +1186,20 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
874 int nelems, int direction) 1186 int nelems, int direction)
875{ 1187{
876 struct scatterlist *s; 1188 struct scatterlist *s;
877 int i; 1189 int mapped_ents = 0, i;
878 1190
879 if (unlikely(global_disable)) 1191 if (unlikely(global_disable))
880 return; 1192 return;
881 1193
882 for_each_sg(sg, s, nelems, i) { 1194 for_each_sg(sg, s, nelems, i) {
883 check_sync(dev, s->dma_address, s->dma_length, 0, 1195 if (!i)
884 direction, true); 1196 mapped_ents = get_nr_mapped_entries(dev, s);
1197
1198 if (i >= mapped_ents)
1199 break;
1200
1201 check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
1202 direction, true);
885 } 1203 }
886} 1204}
887EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); 1205EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
@@ -890,15 +1208,39 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
890 int nelems, int direction) 1208 int nelems, int direction)
891{ 1209{
892 struct scatterlist *s; 1210 struct scatterlist *s;
893 int i; 1211 int mapped_ents = 0, i;
894 1212
895 if (unlikely(global_disable)) 1213 if (unlikely(global_disable))
896 return; 1214 return;
897 1215
898 for_each_sg(sg, s, nelems, i) { 1216 for_each_sg(sg, s, nelems, i) {
899 check_sync(dev, s->dma_address, s->dma_length, 0, 1217 if (!i)
900 direction, false); 1218 mapped_ents = get_nr_mapped_entries(dev, s);
1219
1220 if (i >= mapped_ents)
1221 break;
1222
1223 check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
1224 direction, false);
901 } 1225 }
902} 1226}
903EXPORT_SYMBOL(debug_dma_sync_sg_for_device); 1227EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
904 1228
1229static int __init dma_debug_driver_setup(char *str)
1230{
1231 int i;
1232
1233 for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
1234 current_driver_name[i] = *str;
1235 if (*str == 0)
1236 break;
1237 }
1238
1239 if (current_driver_name[0])
1240 pr_info("DMA-API: enable driver filter for driver [%s]\n",
1241 current_driver_name);
1242
1243
1244 return 1;
1245}
1246__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 2b0b5a7d2ced..bffe6d7ef9d9 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -60,8 +60,8 @@ enum dma_sync_target {
60int swiotlb_force; 60int swiotlb_force;
61 61
62/* 62/*
63 * Used to do a quick range check in swiotlb_unmap_single and 63 * Used to do a quick range check in unmap_single and
64 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this 64 * sync_single_*, to see if the memory was in fact allocated by this
65 * API. 65 * API.
66 */ 66 */
67static char *io_tlb_start, *io_tlb_end; 67static char *io_tlb_start, *io_tlb_end;
@@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
129 return paddr; 129 return paddr;
130} 130}
131 131
132phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr) 132phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
133{ 133{
134 return baddr; 134 return baddr;
135} 135}
@@ -140,9 +140,15 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
140 return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); 140 return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
141} 141}
142 142
143static void *swiotlb_bus_to_virt(dma_addr_t address) 143void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
144{ 144{
145 return phys_to_virt(swiotlb_bus_to_phys(address)); 145 return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
146}
147
148int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
149 dma_addr_t addr, size_t size)
150{
151 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
146} 152}
147 153
148int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) 154int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -309,10 +315,10 @@ cleanup1:
309 return -ENOMEM; 315 return -ENOMEM;
310} 316}
311 317
312static int 318static inline int
313address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) 319address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
314{ 320{
315 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); 321 return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
316} 322}
317 323
318static inline int range_needs_mapping(phys_addr_t paddr, size_t size) 324static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -341,7 +347,7 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
341 unsigned long flags; 347 unsigned long flags;
342 348
343 while (size) { 349 while (size) {
344 sz = min(PAGE_SIZE - offset, size); 350 sz = min_t(size_t, PAGE_SIZE - offset, size);
345 351
346 local_irq_save(flags); 352 local_irq_save(flags);
347 buffer = kmap_atomic(pfn_to_page(pfn), 353 buffer = kmap_atomic(pfn_to_page(pfn),
@@ -476,7 +482,7 @@ found:
476 * dma_addr is the kernel virtual address of the bounce buffer to unmap. 482 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
477 */ 483 */
478static void 484static void
479unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) 485do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
480{ 486{
481 unsigned long flags; 487 unsigned long flags;
482 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 488 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -560,7 +566,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
560 size)) { 566 size)) {
561 /* 567 /*
562 * The allocated memory isn't reachable by the device. 568 * The allocated memory isn't reachable by the device.
563 * Fall back on swiotlb_map_single().
564 */ 569 */
565 free_pages((unsigned long) ret, order); 570 free_pages((unsigned long) ret, order);
566 ret = NULL; 571 ret = NULL;
@@ -568,9 +573,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
568 if (!ret) { 573 if (!ret) {
569 /* 574 /*
570 * We are either out of memory or the device can't DMA 575 * We are either out of memory or the device can't DMA
571 * to GFP_DMA memory; fall back on 576 * to GFP_DMA memory; fall back on map_single(), which
572 * swiotlb_map_single(), which will grab memory from 577 * will grab memory from the lowest available address range.
573 * the lowest available address range.
574 */ 578 */
575 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); 579 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
576 if (!ret) 580 if (!ret)
@@ -587,7 +591,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
587 (unsigned long long)dev_addr); 591 (unsigned long long)dev_addr);
588 592
589 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 593 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
590 unmap_single(hwdev, ret, size, DMA_TO_DEVICE); 594 do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
591 return NULL; 595 return NULL;
592 } 596 }
593 *dma_handle = dev_addr; 597 *dma_handle = dev_addr;
@@ -604,7 +608,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
604 free_pages((unsigned long) vaddr, get_order(size)); 608 free_pages((unsigned long) vaddr, get_order(size));
605 else 609 else
606 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 610 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
607 unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); 611 do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
608} 612}
609EXPORT_SYMBOL(swiotlb_free_coherent); 613EXPORT_SYMBOL(swiotlb_free_coherent);
610 614
@@ -634,7 +638,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
634 * physical address to use is returned. 638 * physical address to use is returned.
635 * 639 *
636 * Once the device is given the dma address, the device owns this memory until 640 * Once the device is given the dma address, the device owns this memory until
637 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. 641 * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
638 */ 642 */
639dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, 643dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
640 unsigned long offset, size_t size, 644 unsigned long offset, size_t size,
@@ -642,18 +646,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
642 struct dma_attrs *attrs) 646 struct dma_attrs *attrs)
643{ 647{
644 phys_addr_t phys = page_to_phys(page) + offset; 648 phys_addr_t phys = page_to_phys(page) + offset;
645 void *ptr = page_address(page) + offset;
646 dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); 649 dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
647 void *map; 650 void *map;
648 651
649 BUG_ON(dir == DMA_NONE); 652 BUG_ON(dir == DMA_NONE);
650 /* 653 /*
651 * If the pointer passed in happens to be in the device's DMA window, 654 * If the address happens to be in the device's DMA window,
652 * we can safely return the device addr and not worry about bounce 655 * we can safely return the device addr and not worry about bounce
653 * buffering it. 656 * buffering it.
654 */ 657 */
655 if (!address_needs_mapping(dev, dev_addr, size) && 658 if (!address_needs_mapping(dev, dev_addr, size) &&
656 !range_needs_mapping(virt_to_phys(ptr), size)) 659 !range_needs_mapping(phys, size))
657 return dev_addr; 660 return dev_addr;
658 661
659 /* 662 /*
@@ -679,23 +682,35 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
679 682
680/* 683/*
681 * Unmap a single streaming mode DMA translation. The dma_addr and size must 684 * Unmap a single streaming mode DMA translation. The dma_addr and size must
682 * match what was provided for in a previous swiotlb_map_single call. All 685 * match what was provided for in a previous swiotlb_map_page call. All
683 * other usages are undefined. 686 * other usages are undefined.
684 * 687 *
685 * After this call, reads by the cpu to the buffer are guaranteed to see 688 * After this call, reads by the cpu to the buffer are guaranteed to see
686 * whatever the device wrote there. 689 * whatever the device wrote there.
687 */ 690 */
691static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
692 size_t size, int dir)
693{
694 char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
695
696 BUG_ON(dir == DMA_NONE);
697
698 if (is_swiotlb_buffer(dma_addr)) {
699 do_unmap_single(hwdev, dma_addr, size, dir);
700 return;
701 }
702
703 if (dir != DMA_FROM_DEVICE)
704 return;
705
706 dma_mark_clean(dma_addr, size);
707}
708
688void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 709void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
689 size_t size, enum dma_data_direction dir, 710 size_t size, enum dma_data_direction dir,
690 struct dma_attrs *attrs) 711 struct dma_attrs *attrs)
691{ 712{
692 char *dma_addr = swiotlb_bus_to_virt(dev_addr); 713 unmap_single(hwdev, dev_addr, size, dir);
693
694 BUG_ON(dir == DMA_NONE);
695 if (is_swiotlb_buffer(dma_addr))
696 unmap_single(hwdev, dma_addr, size, dir);
697 else if (dir == DMA_FROM_DEVICE)
698 dma_mark_clean(dma_addr, size);
699} 714}
700EXPORT_SYMBOL_GPL(swiotlb_unmap_page); 715EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
701 716
@@ -703,7 +718,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
703 * Make physical memory consistent for a single streaming mode DMA translation 718 * Make physical memory consistent for a single streaming mode DMA translation
704 * after a transfer. 719 * after a transfer.
705 * 720 *
706 * If you perform a swiotlb_map_single() but wish to interrogate the buffer 721 * If you perform a swiotlb_map_page() but wish to interrogate the buffer
707 * using the cpu, yet do not wish to teardown the dma mapping, you must 722 * using the cpu, yet do not wish to teardown the dma mapping, you must
708 * call this function before doing so. At the next point you give the dma 723 * call this function before doing so. At the next point you give the dma
709 * address back to the card, you must first perform a 724 * address back to the card, you must first perform a
@@ -713,13 +728,19 @@ static void
713swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, 728swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
714 size_t size, int dir, int target) 729 size_t size, int dir, int target)
715{ 730{
716 char *dma_addr = swiotlb_bus_to_virt(dev_addr); 731 char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
717 732
718 BUG_ON(dir == DMA_NONE); 733 BUG_ON(dir == DMA_NONE);
719 if (is_swiotlb_buffer(dma_addr)) 734
735 if (is_swiotlb_buffer(dma_addr)) {
720 sync_single(hwdev, dma_addr, size, dir, target); 736 sync_single(hwdev, dma_addr, size, dir, target);
721 else if (dir == DMA_FROM_DEVICE) 737 return;
722 dma_mark_clean(dma_addr, size); 738 }
739
740 if (dir != DMA_FROM_DEVICE)
741 return;
742
743 dma_mark_clean(dma_addr, size);
723} 744}
724 745
725void 746void
@@ -746,13 +767,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
746 unsigned long offset, size_t size, 767 unsigned long offset, size_t size,
747 int dir, int target) 768 int dir, int target)
748{ 769{
749 char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset; 770 swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
750
751 BUG_ON(dir == DMA_NONE);
752 if (is_swiotlb_buffer(dma_addr))
753 sync_single(hwdev, dma_addr, size, dir, target);
754 else if (dir == DMA_FROM_DEVICE)
755 dma_mark_clean(dma_addr, size);
756} 771}
757 772
758void 773void
@@ -777,7 +792,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
777 792
778/* 793/*
779 * Map a set of buffers described by scatterlist in streaming mode for DMA. 794 * Map a set of buffers described by scatterlist in streaming mode for DMA.
780 * This is the scatter-gather version of the above swiotlb_map_single 795 * This is the scatter-gather version of the above swiotlb_map_page
781 * interface. Here the scatter gather list elements are each tagged with the 796 * interface. Here the scatter gather list elements are each tagged with the
782 * appropriate dma address and length. They are obtained via 797 * appropriate dma address and length. They are obtained via
783 * sg_dma_{address,length}(SG). 798 * sg_dma_{address,length}(SG).
@@ -788,7 +803,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
788 * The routine returns the number of addr/length pairs actually 803 * The routine returns the number of addr/length pairs actually
789 * used, at most nents. 804 * used, at most nents.
790 * 805 *
791 * Device ownership issues as mentioned above for swiotlb_map_single are the 806 * Device ownership issues as mentioned above for swiotlb_map_page are the
792 * same here. 807 * same here.
793 */ 808 */
794int 809int
@@ -836,7 +851,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
836 851
837/* 852/*
838 * Unmap a set of streaming mode DMA translations. Again, cpu read rules 853 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
839 * concerning calls here are the same as for swiotlb_unmap_single() above. 854 * concerning calls here are the same as for swiotlb_unmap_page() above.
840 */ 855 */
841void 856void
842swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, 857swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
@@ -847,13 +862,9 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
847 862
848 BUG_ON(dir == DMA_NONE); 863 BUG_ON(dir == DMA_NONE);
849 864
850 for_each_sg(sgl, sg, nelems, i) { 865 for_each_sg(sgl, sg, nelems, i)
851 if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg))) 866 unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
852 unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), 867
853 sg->dma_length, dir);
854 else if (dir == DMA_FROM_DEVICE)
855 dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
856 }
857} 868}
858EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); 869EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
859 870
@@ -879,15 +890,9 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
879 struct scatterlist *sg; 890 struct scatterlist *sg;
880 int i; 891 int i;
881 892
882 BUG_ON(dir == DMA_NONE); 893 for_each_sg(sgl, sg, nelems, i)
883 894 swiotlb_sync_single(hwdev, sg->dma_address,
884 for_each_sg(sgl, sg, nelems, i) {
885 if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
886 sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
887 sg->dma_length, dir, target); 895 sg->dma_length, dir, target);
888 else if (dir == DMA_FROM_DEVICE)
889 dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
890 }
891} 896}
892 897
893void 898void