aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h55
-rw-r--r--arch/x86/kernel/amd_iommu.c486
-rw-r--r--arch/x86/kernel/amd_iommu_init.c267
6 files changed, 594 insertions, 229 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbfbcc75..5b776c6e7964 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -329,11 +329,6 @@ and is between 256 and 4096 characters. It is defined in the file
329 flushed before they will be reused, which 329 flushed before they will be reused, which
330 is a lot of faster 330 is a lot of faster
331 331
332 amd_iommu_size= [HW,X86-64]
333 Define the size of the aperture for the AMD IOMMU
334 driver. Possible values are:
335 '32M', '64M' (default), '128M', '256M', '512M', '1G'
336
337 amijoy.map= [HW,JOY] Amiga joystick support 332 amijoy.map= [HW,JOY] Amiga joystick support
338 Map of devices attached to JOY0DAT and JOY1DAT 333 Map of devices attached to JOY0DAT and JOY1DAT
339 Format: <a>,<b> 334 Format: <a>,<b>
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 5865712d105d..33fac6bbe1c2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -159,6 +159,14 @@ config IOMMU_DEBUG
159 options. See Documentation/x86_64/boot-options.txt for more 159 options. See Documentation/x86_64/boot-options.txt for more
160 details. 160 details.
161 161
162config IOMMU_STRESS
163 bool "Enable IOMMU stress-test mode"
164 ---help---
165 This option disables various optimizations in IOMMU related
166 code to do real stress testing of the IOMMU code. This option
167 will cause a performance drop and should only be enabled for
168 testing.
169
162config IOMMU_LEAK 170config IOMMU_LEAK
163 bool "IOMMU leak tracing" 171 bool "IOMMU leak tracing"
164 depends on IOMMU_DEBUG && DMA_API_DEBUG 172 depends on IOMMU_DEBUG && DMA_API_DEBUG
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index f712344329bc..262e02820049 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -27,6 +27,8 @@ extern int amd_iommu_init(void);
27extern int amd_iommu_init_dma_ops(void); 27extern int amd_iommu_init_dma_ops(void);
28extern void amd_iommu_detect(void); 28extern void amd_iommu_detect(void);
29extern irqreturn_t amd_iommu_int_handler(int irq, void *data); 29extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
30extern void amd_iommu_flush_all_domains(void);
31extern void amd_iommu_flush_all_devices(void);
30#else 32#else
31static inline int amd_iommu_init(void) { return -ENODEV; } 33static inline int amd_iommu_init(void) { return -ENODEV; }
32static inline void amd_iommu_detect(void) { } 34static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9d22b5..0c878caaa0a2 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -194,6 +194,27 @@
194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ 194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops 195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
196 domain for an IOMMU */ 196 domain for an IOMMU */
197extern bool amd_iommu_dump;
198#define DUMP_printk(format, arg...) \
199 do { \
200 if (amd_iommu_dump) \
201 printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
202 } while(0);
203
204/*
205 * Make iterating over all IOMMUs easier
206 */
207#define for_each_iommu(iommu) \
208 list_for_each_entry((iommu), &amd_iommu_list, list)
209#define for_each_iommu_safe(iommu, next) \
210 list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
211
212#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
213#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
214#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
215#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
216#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
217#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
197 218
198/* 219/*
199 * This structure contains generic data for IOMMU protection domains 220 * This structure contains generic data for IOMMU protection domains
@@ -210,6 +231,26 @@ struct protection_domain {
210}; 231};
211 232
212/* 233/*
234 * For dynamic growth the aperture size is split into ranges of 128MB of
235 * DMA address space each. This struct represents one such range.
236 */
237struct aperture_range {
238
239 /* address allocation bitmap */
240 unsigned long *bitmap;
241
242 /*
243 * Array of PTE pages for the aperture. In this array we save all the
244 * leaf pages of the domain page table used for the aperture. This way
245 * we don't need to walk the page table to find a specific PTE. We can
246 * just calculate its address in constant time.
247 */
248 u64 *pte_pages[64];
249
250 unsigned long offset;
251};
252
253/*
213 * Data container for a dma_ops specific protection domain 254 * Data container for a dma_ops specific protection domain
214 */ 255 */
215struct dma_ops_domain { 256struct dma_ops_domain {
@@ -222,18 +263,10 @@ struct dma_ops_domain {
222 unsigned long aperture_size; 263 unsigned long aperture_size;
223 264
224 /* address we start to search for free addresses */ 265 /* address we start to search for free addresses */
225 unsigned long next_bit; 266 unsigned long next_address;
226
227 /* address allocation bitmap */
228 unsigned long *bitmap;
229 267
230 /* 268 /* address space relevant data */
231 * Array of PTE pages for the aperture. In this array we save all the 269 struct aperture_range *aperture[APERTURE_MAX_RANGES];
232 * leaf pages of the domain page table used for the aperture. This way
233 * we don't need to walk the page table to find a specific PTE. We can
234 * just calculate its address in constant time.
235 */
236 u64 **pte_pages;
237 270
238 /* This will be set to true when TLB needs to be flushed */ 271 /* This will be set to true when TLB needs to be flushed */
239 bool need_flush; 272 bool need_flush;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d6898833c363..2c63d8748133 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -55,7 +55,12 @@ struct iommu_cmd {
55static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 55static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
56 struct unity_map_entry *e); 56 struct unity_map_entry *e);
57static struct dma_ops_domain *find_protection_domain(u16 devid); 57static struct dma_ops_domain *find_protection_domain(u16 devid);
58 58static u64* alloc_pte(struct protection_domain *dom,
59 unsigned long address, u64
60 **pte_page, gfp_t gfp);
61static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
62 unsigned long start_page,
63 unsigned int pages);
59 64
60#ifndef BUS_NOTIFY_UNBOUND_DRIVER 65#ifndef BUS_NOTIFY_UNBOUND_DRIVER
61#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 66#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
@@ -217,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
217{ 222{
218 struct amd_iommu *iommu; 223 struct amd_iommu *iommu;
219 224
220 list_for_each_entry(iommu, &amd_iommu_list, list) 225 for_each_iommu(iommu)
221 iommu_poll_events(iommu); 226 iommu_poll_events(iommu);
222 227
223 return IRQ_HANDLED; 228 return IRQ_HANDLED;
@@ -444,7 +449,7 @@ static void iommu_flush_domain(u16 domid)
444 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 449 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
445 domid, 1, 1); 450 domid, 1, 1);
446 451
447 list_for_each_entry(iommu, &amd_iommu_list, list) { 452 for_each_iommu(iommu) {
448 spin_lock_irqsave(&iommu->lock, flags); 453 spin_lock_irqsave(&iommu->lock, flags);
449 __iommu_queue_command(iommu, &cmd); 454 __iommu_queue_command(iommu, &cmd);
450 __iommu_completion_wait(iommu); 455 __iommu_completion_wait(iommu);
@@ -453,6 +458,35 @@ static void iommu_flush_domain(u16 domid)
453 } 458 }
454} 459}
455 460
461void amd_iommu_flush_all_domains(void)
462{
463 int i;
464
465 for (i = 1; i < MAX_DOMAIN_ID; ++i) {
466 if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
467 continue;
468 iommu_flush_domain(i);
469 }
470}
471
472void amd_iommu_flush_all_devices(void)
473{
474 struct amd_iommu *iommu;
475 int i;
476
477 for (i = 0; i <= amd_iommu_last_bdf; ++i) {
478 if (amd_iommu_pd_table[i] == NULL)
479 continue;
480
481 iommu = amd_iommu_rlookup_table[i];
482 if (!iommu)
483 continue;
484
485 iommu_queue_inv_dev_entry(iommu, i);
486 iommu_completion_wait(iommu);
487 }
488}
489
456/**************************************************************************** 490/****************************************************************************
457 * 491 *
458 * The functions below are used the create the page table mappings for 492 * The functions below are used the create the page table mappings for
@@ -472,7 +506,7 @@ static int iommu_map_page(struct protection_domain *dom,
472 unsigned long phys_addr, 506 unsigned long phys_addr,
473 int prot) 507 int prot)
474{ 508{
475 u64 __pte, *pte, *page; 509 u64 __pte, *pte;
476 510
477 bus_addr = PAGE_ALIGN(bus_addr); 511 bus_addr = PAGE_ALIGN(bus_addr);
478 phys_addr = PAGE_ALIGN(phys_addr); 512 phys_addr = PAGE_ALIGN(phys_addr);
@@ -481,27 +515,7 @@ static int iommu_map_page(struct protection_domain *dom,
481 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) 515 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
482 return -EINVAL; 516 return -EINVAL;
483 517
484 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; 518 pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
485
486 if (!IOMMU_PTE_PRESENT(*pte)) {
487 page = (u64 *)get_zeroed_page(GFP_KERNEL);
488 if (!page)
489 return -ENOMEM;
490 *pte = IOMMU_L2_PDE(virt_to_phys(page));
491 }
492
493 pte = IOMMU_PTE_PAGE(*pte);
494 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
495
496 if (!IOMMU_PTE_PRESENT(*pte)) {
497 page = (u64 *)get_zeroed_page(GFP_KERNEL);
498 if (!page)
499 return -ENOMEM;
500 *pte = IOMMU_L1_PDE(virt_to_phys(page));
501 }
502
503 pte = IOMMU_PTE_PAGE(*pte);
504 pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
505 519
506 if (IOMMU_PTE_PRESENT(*pte)) 520 if (IOMMU_PTE_PRESENT(*pte))
507 return -EBUSY; 521 return -EBUSY;
@@ -599,7 +613,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
599 * as allocated in the aperture 613 * as allocated in the aperture
600 */ 614 */
601 if (addr < dma_dom->aperture_size) 615 if (addr < dma_dom->aperture_size)
602 __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap); 616 __set_bit(addr >> PAGE_SHIFT,
617 dma_dom->aperture[0]->bitmap);
603 } 618 }
604 619
605 return 0; 620 return 0;
@@ -636,42 +651,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
636 ****************************************************************************/ 651 ****************************************************************************/
637 652
638/* 653/*
639 * The address allocator core function. 654 * The address allocator core functions.
640 * 655 *
641 * called with domain->lock held 656 * called with domain->lock held
642 */ 657 */
658
659/*
660 * This function checks if there is a PTE for a given dma address. If
661 * there is one, it returns the pointer to it.
662 */
663static u64* fetch_pte(struct protection_domain *domain,
664 unsigned long address)
665{
666 u64 *pte;
667
668 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
669
670 if (!IOMMU_PTE_PRESENT(*pte))
671 return NULL;
672
673 pte = IOMMU_PTE_PAGE(*pte);
674 pte = &pte[IOMMU_PTE_L1_INDEX(address)];
675
676 if (!IOMMU_PTE_PRESENT(*pte))
677 return NULL;
678
679 pte = IOMMU_PTE_PAGE(*pte);
680 pte = &pte[IOMMU_PTE_L0_INDEX(address)];
681
682 return pte;
683}
684
685/*
686 * This function is used to add a new aperture range to an existing
687 * aperture in case of dma_ops domain allocation or address allocation
688 * failure.
689 */
690static int alloc_new_range(struct amd_iommu *iommu,
691 struct dma_ops_domain *dma_dom,
692 bool populate, gfp_t gfp)
693{
694 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
695 int i;
696
697#ifdef CONFIG_IOMMU_STRESS
698 populate = false;
699#endif
700
701 if (index >= APERTURE_MAX_RANGES)
702 return -ENOMEM;
703
704 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
705 if (!dma_dom->aperture[index])
706 return -ENOMEM;
707
708 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
709 if (!dma_dom->aperture[index]->bitmap)
710 goto out_free;
711
712 dma_dom->aperture[index]->offset = dma_dom->aperture_size;
713
714 if (populate) {
715 unsigned long address = dma_dom->aperture_size;
716 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
717 u64 *pte, *pte_page;
718
719 for (i = 0; i < num_ptes; ++i) {
720 pte = alloc_pte(&dma_dom->domain, address,
721 &pte_page, gfp);
722 if (!pte)
723 goto out_free;
724
725 dma_dom->aperture[index]->pte_pages[i] = pte_page;
726
727 address += APERTURE_RANGE_SIZE / 64;
728 }
729 }
730
731 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
732
733 /* Intialize the exclusion range if necessary */
734 if (iommu->exclusion_start &&
735 iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
736 iommu->exclusion_start < dma_dom->aperture_size) {
737 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
738 int pages = iommu_num_pages(iommu->exclusion_start,
739 iommu->exclusion_length,
740 PAGE_SIZE);
741 dma_ops_reserve_addresses(dma_dom, startpage, pages);
742 }
743
744 /*
745 * Check for areas already mapped as present in the new aperture
746 * range and mark those pages as reserved in the allocator. Such
747 * mappings may already exist as a result of requested unity
748 * mappings for devices.
749 */
750 for (i = dma_dom->aperture[index]->offset;
751 i < dma_dom->aperture_size;
752 i += PAGE_SIZE) {
753 u64 *pte = fetch_pte(&dma_dom->domain, i);
754 if (!pte || !IOMMU_PTE_PRESENT(*pte))
755 continue;
756
757 dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
758 }
759
760 return 0;
761
762out_free:
763 free_page((unsigned long)dma_dom->aperture[index]->bitmap);
764
765 kfree(dma_dom->aperture[index]);
766 dma_dom->aperture[index] = NULL;
767
768 return -ENOMEM;
769}
770
771static unsigned long dma_ops_area_alloc(struct device *dev,
772 struct dma_ops_domain *dom,
773 unsigned int pages,
774 unsigned long align_mask,
775 u64 dma_mask,
776 unsigned long start)
777{
778 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
779 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
780 int i = start >> APERTURE_RANGE_SHIFT;
781 unsigned long boundary_size;
782 unsigned long address = -1;
783 unsigned long limit;
784
785 next_bit >>= PAGE_SHIFT;
786
787 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
788 PAGE_SIZE) >> PAGE_SHIFT;
789
790 for (;i < max_index; ++i) {
791 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
792
793 if (dom->aperture[i]->offset >= dma_mask)
794 break;
795
796 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
797 dma_mask >> PAGE_SHIFT);
798
799 address = iommu_area_alloc(dom->aperture[i]->bitmap,
800 limit, next_bit, pages, 0,
801 boundary_size, align_mask);
802 if (address != -1) {
803 address = dom->aperture[i]->offset +
804 (address << PAGE_SHIFT);
805 dom->next_address = address + (pages << PAGE_SHIFT);
806 break;
807 }
808
809 next_bit = 0;
810 }
811
812 return address;
813}
814
643static unsigned long dma_ops_alloc_addresses(struct device *dev, 815static unsigned long dma_ops_alloc_addresses(struct device *dev,
644 struct dma_ops_domain *dom, 816 struct dma_ops_domain *dom,
645 unsigned int pages, 817 unsigned int pages,
646 unsigned long align_mask, 818 unsigned long align_mask,
647 u64 dma_mask) 819 u64 dma_mask)
648{ 820{
649 unsigned long limit;
650 unsigned long address; 821 unsigned long address;
651 unsigned long boundary_size;
652 822
653 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 823#ifdef CONFIG_IOMMU_STRESS
654 PAGE_SIZE) >> PAGE_SHIFT; 824 dom->next_address = 0;
655 limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, 825 dom->need_flush = true;
656 dma_mask >> PAGE_SHIFT); 826#endif
657 827
658 if (dom->next_bit >= limit) { 828 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
659 dom->next_bit = 0; 829 dma_mask, dom->next_address);
660 dom->need_flush = true;
661 }
662 830
663 address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
664 0 , boundary_size, align_mask);
665 if (address == -1) { 831 if (address == -1) {
666 address = iommu_area_alloc(dom->bitmap, limit, 0, pages, 832 dom->next_address = 0;
667 0, boundary_size, align_mask); 833 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
834 dma_mask, 0);
668 dom->need_flush = true; 835 dom->need_flush = true;
669 } 836 }
670 837
671 if (likely(address != -1)) { 838 if (unlikely(address == -1))
672 dom->next_bit = address + pages;
673 address <<= PAGE_SHIFT;
674 } else
675 address = bad_dma_address; 839 address = bad_dma_address;
676 840
677 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); 841 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -688,11 +852,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
688 unsigned long address, 852 unsigned long address,
689 unsigned int pages) 853 unsigned int pages)
690{ 854{
691 address >>= PAGE_SHIFT; 855 unsigned i = address >> APERTURE_RANGE_SHIFT;
692 iommu_area_free(dom->bitmap, address, pages); 856 struct aperture_range *range = dom->aperture[i];
857
858 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
859
860#ifdef CONFIG_IOMMU_STRESS
861 if (i < 4)
862 return;
863#endif
693 864
694 if (address >= dom->next_bit) 865 if (address >= dom->next_address)
695 dom->need_flush = true; 866 dom->need_flush = true;
867
868 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
869
870 iommu_area_free(range->bitmap, address, pages);
871
696} 872}
697 873
698/**************************************************************************** 874/****************************************************************************
@@ -740,12 +916,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
740 unsigned long start_page, 916 unsigned long start_page,
741 unsigned int pages) 917 unsigned int pages)
742{ 918{
743 unsigned int last_page = dom->aperture_size >> PAGE_SHIFT; 919 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
744 920
745 if (start_page + pages > last_page) 921 if (start_page + pages > last_page)
746 pages = last_page - start_page; 922 pages = last_page - start_page;
747 923
748 iommu_area_reserve(dom->bitmap, start_page, pages); 924 for (i = start_page; i < start_page + pages; ++i) {
925 int index = i / APERTURE_RANGE_PAGES;
926 int page = i % APERTURE_RANGE_PAGES;
927 __set_bit(page, dom->aperture[index]->bitmap);
928 }
749} 929}
750 930
751static void free_pagetable(struct protection_domain *domain) 931static void free_pagetable(struct protection_domain *domain)
@@ -784,14 +964,19 @@ static void free_pagetable(struct protection_domain *domain)
784 */ 964 */
785static void dma_ops_domain_free(struct dma_ops_domain *dom) 965static void dma_ops_domain_free(struct dma_ops_domain *dom)
786{ 966{
967 int i;
968
787 if (!dom) 969 if (!dom)
788 return; 970 return;
789 971
790 free_pagetable(&dom->domain); 972 free_pagetable(&dom->domain);
791 973
792 kfree(dom->pte_pages); 974 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
793 975 if (!dom->aperture[i])
794 kfree(dom->bitmap); 976 continue;
977 free_page((unsigned long)dom->aperture[i]->bitmap);
978 kfree(dom->aperture[i]);
979 }
795 980
796 kfree(dom); 981 kfree(dom);
797} 982}
@@ -801,19 +986,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
801 * It also intializes the page table and the address allocator data 986 * It also intializes the page table and the address allocator data
802 * structures required for the dma_ops interface 987 * structures required for the dma_ops interface
803 */ 988 */
804static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, 989static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
805 unsigned order)
806{ 990{
807 struct dma_ops_domain *dma_dom; 991 struct dma_ops_domain *dma_dom;
808 unsigned i, num_pte_pages;
809 u64 *l2_pde;
810 u64 address;
811
812 /*
813 * Currently the DMA aperture must be between 32 MB and 1GB in size
814 */
815 if ((order < 25) || (order > 30))
816 return NULL;
817 992
818 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); 993 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
819 if (!dma_dom) 994 if (!dma_dom)
@@ -830,55 +1005,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
830 dma_dom->domain.priv = dma_dom; 1005 dma_dom->domain.priv = dma_dom;
831 if (!dma_dom->domain.pt_root) 1006 if (!dma_dom->domain.pt_root)
832 goto free_dma_dom; 1007 goto free_dma_dom;
833 dma_dom->aperture_size = (1ULL << order);
834 dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
835 GFP_KERNEL);
836 if (!dma_dom->bitmap)
837 goto free_dma_dom;
838 /*
839 * mark the first page as allocated so we never return 0 as
840 * a valid dma-address. So we can use 0 as error value
841 */
842 dma_dom->bitmap[0] = 1;
843 dma_dom->next_bit = 0;
844 1008
845 dma_dom->need_flush = false; 1009 dma_dom->need_flush = false;
846 dma_dom->target_dev = 0xffff; 1010 dma_dom->target_dev = 0xffff;
847 1011
848 /* Intialize the exclusion range if necessary */ 1012 if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
849 if (iommu->exclusion_start && 1013 goto free_dma_dom;
850 iommu->exclusion_start < dma_dom->aperture_size) {
851 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
852 int pages = iommu_num_pages(iommu->exclusion_start,
853 iommu->exclusion_length,
854 PAGE_SIZE);
855 dma_ops_reserve_addresses(dma_dom, startpage, pages);
856 }
857 1014
858 /* 1015 /*
859 * At the last step, build the page tables so we don't need to 1016 * mark the first page as allocated so we never return 0 as
860 * allocate page table pages in the dma_ops mapping/unmapping 1017 * a valid dma-address. So we can use 0 as error value
861 * path.
862 */ 1018 */
863 num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); 1019 dma_dom->aperture[0]->bitmap[0] = 1;
864 dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), 1020 dma_dom->next_address = 0;
865 GFP_KERNEL);
866 if (!dma_dom->pte_pages)
867 goto free_dma_dom;
868
869 l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
870 if (l2_pde == NULL)
871 goto free_dma_dom;
872 1021
873 dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
874
875 for (i = 0; i < num_pte_pages; ++i) {
876 dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
877 if (!dma_dom->pte_pages[i])
878 goto free_dma_dom;
879 address = virt_to_phys(dma_dom->pte_pages[i]);
880 l2_pde[i] = IOMMU_L1_PDE(address);
881 }
882 1022
883 return dma_dom; 1023 return dma_dom;
884 1024
@@ -987,7 +1127,6 @@ static int device_change_notifier(struct notifier_block *nb,
987 struct protection_domain *domain; 1127 struct protection_domain *domain;
988 struct dma_ops_domain *dma_domain; 1128 struct dma_ops_domain *dma_domain;
989 struct amd_iommu *iommu; 1129 struct amd_iommu *iommu;
990 int order = amd_iommu_aperture_order;
991 unsigned long flags; 1130 unsigned long flags;
992 1131
993 if (devid > amd_iommu_last_bdf) 1132 if (devid > amd_iommu_last_bdf)
@@ -1013,8 +1152,9 @@ static int device_change_notifier(struct notifier_block *nb,
1013 if (!dma_domain) 1152 if (!dma_domain)
1014 dma_domain = iommu->default_dom; 1153 dma_domain = iommu->default_dom;
1015 attach_device(iommu, &dma_domain->domain, devid); 1154 attach_device(iommu, &dma_domain->domain, devid);
1016 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1155 DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
1017 "device %s\n", dma_domain->domain.id, dev_name(dev)); 1156 "%d for device %s\n",
1157 dma_domain->domain.id, dev_name(dev));
1018 break; 1158 break;
1019 case BUS_NOTIFY_UNBOUND_DRIVER: 1159 case BUS_NOTIFY_UNBOUND_DRIVER:
1020 if (!domain) 1160 if (!domain)
@@ -1026,7 +1166,7 @@ static int device_change_notifier(struct notifier_block *nb,
1026 dma_domain = find_protection_domain(devid); 1166 dma_domain = find_protection_domain(devid);
1027 if (dma_domain) 1167 if (dma_domain)
1028 goto out; 1168 goto out;
1029 dma_domain = dma_ops_domain_alloc(iommu, order); 1169 dma_domain = dma_ops_domain_alloc(iommu);
1030 if (!dma_domain) 1170 if (!dma_domain)
1031 goto out; 1171 goto out;
1032 dma_domain->target_dev = devid; 1172 dma_domain->target_dev = devid;
@@ -1137,8 +1277,9 @@ static int get_device_resources(struct device *dev,
1137 dma_dom = (*iommu)->default_dom; 1277 dma_dom = (*iommu)->default_dom;
1138 *domain = &dma_dom->domain; 1278 *domain = &dma_dom->domain;
1139 attach_device(*iommu, *domain, *bdf); 1279 attach_device(*iommu, *domain, *bdf);
1140 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1280 DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
1141 "device %s\n", (*domain)->id, dev_name(dev)); 1281 "%d for device %s\n",
1282 (*domain)->id, dev_name(dev));
1142 } 1283 }
1143 1284
1144 if (domain_for_device(_bdf) == NULL) 1285 if (domain_for_device(_bdf) == NULL)
@@ -1148,6 +1289,66 @@ static int get_device_resources(struct device *dev,
1148} 1289}
1149 1290
1150/* 1291/*
1292 * If the pte_page is not yet allocated this function is called
1293 */
1294static u64* alloc_pte(struct protection_domain *dom,
1295 unsigned long address, u64 **pte_page, gfp_t gfp)
1296{
1297 u64 *pte, *page;
1298
1299 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
1300
1301 if (!IOMMU_PTE_PRESENT(*pte)) {
1302 page = (u64 *)get_zeroed_page(gfp);
1303 if (!page)
1304 return NULL;
1305 *pte = IOMMU_L2_PDE(virt_to_phys(page));
1306 }
1307
1308 pte = IOMMU_PTE_PAGE(*pte);
1309 pte = &pte[IOMMU_PTE_L1_INDEX(address)];
1310
1311 if (!IOMMU_PTE_PRESENT(*pte)) {
1312 page = (u64 *)get_zeroed_page(gfp);
1313 if (!page)
1314 return NULL;
1315 *pte = IOMMU_L1_PDE(virt_to_phys(page));
1316 }
1317
1318 pte = IOMMU_PTE_PAGE(*pte);
1319
1320 if (pte_page)
1321 *pte_page = pte;
1322
1323 pte = &pte[IOMMU_PTE_L0_INDEX(address)];
1324
1325 return pte;
1326}
1327
1328/*
1329 * This function fetches the PTE for a given address in the aperture
1330 */
1331static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1332 unsigned long address)
1333{
1334 struct aperture_range *aperture;
1335 u64 *pte, *pte_page;
1336
1337 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1338 if (!aperture)
1339 return NULL;
1340
1341 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1342 if (!pte) {
1343 pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
1344 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1345 } else
1346 pte += IOMMU_PTE_L0_INDEX(address);
1347
1348 return pte;
1349}
1350
1351/*
1151 * This is the generic map function. It maps one 4kb page at paddr to 1352 * This is the generic map function. It maps one 4kb page at paddr to
1152 * the given address in the DMA address space for the domain. 1353 * the given address in the DMA address space for the domain.
1153 */ 1354 */
@@ -1163,8 +1364,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1163 1364
1164 paddr &= PAGE_MASK; 1365 paddr &= PAGE_MASK;
1165 1366
1166 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; 1367 pte = dma_ops_get_pte(dom, address);
1167 pte += IOMMU_PTE_L0_INDEX(address); 1368 if (!pte)
1369 return bad_dma_address;
1168 1370
1169 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; 1371 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1170 1372
@@ -1189,14 +1391,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
1189 struct dma_ops_domain *dom, 1391 struct dma_ops_domain *dom,
1190 unsigned long address) 1392 unsigned long address)
1191{ 1393{
1394 struct aperture_range *aperture;
1192 u64 *pte; 1395 u64 *pte;
1193 1396
1194 if (address >= dom->aperture_size) 1397 if (address >= dom->aperture_size)
1195 return; 1398 return;
1196 1399
1197 WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); 1400 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1401 if (!aperture)
1402 return;
1403
1404 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1405 if (!pte)
1406 return;
1198 1407
1199 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
1200 pte += IOMMU_PTE_L0_INDEX(address); 1408 pte += IOMMU_PTE_L0_INDEX(address);
1201 1409
1202 WARN_ON(!*pte); 1410 WARN_ON(!*pte);
@@ -1220,7 +1428,7 @@ static dma_addr_t __map_single(struct device *dev,
1220 u64 dma_mask) 1428 u64 dma_mask)
1221{ 1429{
1222 dma_addr_t offset = paddr & ~PAGE_MASK; 1430 dma_addr_t offset = paddr & ~PAGE_MASK;
1223 dma_addr_t address, start; 1431 dma_addr_t address, start, ret;
1224 unsigned int pages; 1432 unsigned int pages;
1225 unsigned long align_mask = 0; 1433 unsigned long align_mask = 0;
1226 int i; 1434 int i;
@@ -1236,14 +1444,33 @@ static dma_addr_t __map_single(struct device *dev,
1236 if (align) 1444 if (align)
1237 align_mask = (1UL << get_order(size)) - 1; 1445 align_mask = (1UL << get_order(size)) - 1;
1238 1446
1447retry:
1239 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, 1448 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1240 dma_mask); 1449 dma_mask);
1241 if (unlikely(address == bad_dma_address)) 1450 if (unlikely(address == bad_dma_address)) {
1242 goto out; 1451 /*
1452 * setting next_address here will let the address
1453 * allocator only scan the new allocated range in the
1454 * first run. This is a small optimization.
1455 */
1456 dma_dom->next_address = dma_dom->aperture_size;
1457
1458 if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
1459 goto out;
1460
1461 /*
1462 * aperture was sucessfully enlarged by 128 MB, try
1463 * allocation again
1464 */
1465 goto retry;
1466 }
1243 1467
1244 start = address; 1468 start = address;
1245 for (i = 0; i < pages; ++i) { 1469 for (i = 0; i < pages; ++i) {
1246 dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); 1470 ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
1471 if (ret == bad_dma_address)
1472 goto out_unmap;
1473
1247 paddr += PAGE_SIZE; 1474 paddr += PAGE_SIZE;
1248 start += PAGE_SIZE; 1475 start += PAGE_SIZE;
1249 } 1476 }
@@ -1259,6 +1486,17 @@ static dma_addr_t __map_single(struct device *dev,
1259 1486
1260out: 1487out:
1261 return address; 1488 return address;
1489
1490out_unmap:
1491
1492 for (--i; i >= 0; --i) {
1493 start -= PAGE_SIZE;
1494 dma_ops_domain_unmap(iommu, dma_dom, start);
1495 }
1496
1497 dma_ops_free_addresses(dma_dom, address, pages);
1498
1499 return bad_dma_address;
1262} 1500}
1263 1501
1264/* 1502/*
@@ -1629,7 +1867,6 @@ static void prealloc_protection_domains(void)
1629 struct pci_dev *dev = NULL; 1867 struct pci_dev *dev = NULL;
1630 struct dma_ops_domain *dma_dom; 1868 struct dma_ops_domain *dma_dom;
1631 struct amd_iommu *iommu; 1869 struct amd_iommu *iommu;
1632 int order = amd_iommu_aperture_order;
1633 u16 devid; 1870 u16 devid;
1634 1871
1635 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1872 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1642,7 +1879,7 @@ static void prealloc_protection_domains(void)
1642 iommu = amd_iommu_rlookup_table[devid]; 1879 iommu = amd_iommu_rlookup_table[devid];
1643 if (!iommu) 1880 if (!iommu)
1644 continue; 1881 continue;
1645 dma_dom = dma_ops_domain_alloc(iommu, order); 1882 dma_dom = dma_ops_domain_alloc(iommu);
1646 if (!dma_dom) 1883 if (!dma_dom)
1647 continue; 1884 continue;
1648 init_unity_mappings_for_device(dma_dom, devid); 1885 init_unity_mappings_for_device(dma_dom, devid);
@@ -1668,7 +1905,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
1668int __init amd_iommu_init_dma_ops(void) 1905int __init amd_iommu_init_dma_ops(void)
1669{ 1906{
1670 struct amd_iommu *iommu; 1907 struct amd_iommu *iommu;
1671 int order = amd_iommu_aperture_order;
1672 int ret; 1908 int ret;
1673 1909
1674 /* 1910 /*
@@ -1676,8 +1912,8 @@ int __init amd_iommu_init_dma_ops(void)
1676 * found in the system. Devices not assigned to any other 1912 * found in the system. Devices not assigned to any other
1677 * protection domain will be assigned to the default one. 1913 * protection domain will be assigned to the default one.
1678 */ 1914 */
1679 list_for_each_entry(iommu, &amd_iommu_list, list) { 1915 for_each_iommu(iommu) {
1680 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1916 iommu->default_dom = dma_ops_domain_alloc(iommu);
1681 if (iommu->default_dom == NULL) 1917 if (iommu->default_dom == NULL)
1682 return -ENOMEM; 1918 return -ENOMEM;
1683 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 1919 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1714,7 +1950,7 @@ int __init amd_iommu_init_dma_ops(void)
1714 1950
1715free_domains: 1951free_domains:
1716 1952
1717 list_for_each_entry(iommu, &amd_iommu_list, list) { 1953 for_each_iommu(iommu) {
1718 if (iommu->default_dom) 1954 if (iommu->default_dom)
1719 dma_ops_domain_free(iommu->default_dom); 1955 dma_ops_domain_free(iommu->default_dom);
1720 } 1956 }
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index a3a2b98bb39e..238989ec077d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -115,15 +115,21 @@ struct ivmd_header {
115 u64 range_length; 115 u64 range_length;
116} __attribute__((packed)); 116} __attribute__((packed));
117 117
118bool amd_iommu_dump;
119
118static int __initdata amd_iommu_detected; 120static int __initdata amd_iommu_detected;
119 121
120u16 amd_iommu_last_bdf; /* largest PCI device id we have 122u16 amd_iommu_last_bdf; /* largest PCI device id we have
121 to handle */ 123 to handle */
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 124LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 125 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 126#ifdef CONFIG_IOMMU_STRESS
127bool amd_iommu_isolate = false;
128#else
125bool amd_iommu_isolate = true; /* if true, device isolation is 129bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */ 130 enabled */
131#endif
132
127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 133bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
128 134
129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 135LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size)
193 * This function set the exclusion range in the IOMMU. DMA accesses to the 199 * This function set the exclusion range in the IOMMU. DMA accesses to the
194 * exclusion range are passed through untranslated 200 * exclusion range are passed through untranslated
195 */ 201 */
196static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) 202static void iommu_set_exclusion_range(struct amd_iommu *iommu)
197{ 203{
198 u64 start = iommu->exclusion_start & PAGE_MASK; 204 u64 start = iommu->exclusion_start & PAGE_MASK;
199 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; 205 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
225} 231}
226 232
227/* Generic functions to enable/disable certain features of the IOMMU. */ 233/* Generic functions to enable/disable certain features of the IOMMU. */
228static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 234static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
229{ 235{
230 u32 ctrl; 236 u32 ctrl;
231 237
@@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
244} 250}
245 251
246/* Function to enable the hardware */ 252/* Function to enable the hardware */
247static void __init iommu_enable(struct amd_iommu *iommu) 253static void iommu_enable(struct amd_iommu *iommu)
248{ 254{
249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", 255 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
250 dev_name(&iommu->dev->dev), iommu->cap_ptr); 256 dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -252,11 +258,9 @@ static void __init iommu_enable(struct amd_iommu *iommu)
252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 258 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
253} 259}
254 260
255/* Function to enable IOMMU event logging and event interrupts */ 261static void iommu_disable(struct amd_iommu *iommu)
256static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
257{ 262{
258 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 263 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
259 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
260} 264}
261 265
262/* 266/*
@@ -413,25 +417,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
413{ 417{
414 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 418 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
415 get_order(CMD_BUFFER_SIZE)); 419 get_order(CMD_BUFFER_SIZE));
416 u64 entry;
417 420
418 if (cmd_buf == NULL) 421 if (cmd_buf == NULL)
419 return NULL; 422 return NULL;
420 423
421 iommu->cmd_buf_size = CMD_BUFFER_SIZE; 424 iommu->cmd_buf_size = CMD_BUFFER_SIZE;
422 425
423 entry = (u64)virt_to_phys(cmd_buf); 426 return cmd_buf;
427}
428
429/*
430 * This function writes the command buffer address to the hardware and
431 * enables it.
432 */
433static void iommu_enable_command_buffer(struct amd_iommu *iommu)
434{
435 u64 entry;
436
437 BUG_ON(iommu->cmd_buf == NULL);
438
439 entry = (u64)virt_to_phys(iommu->cmd_buf);
424 entry |= MMIO_CMD_SIZE_512; 440 entry |= MMIO_CMD_SIZE_512;
441
425 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 442 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
426 &entry, sizeof(entry)); 443 &entry, sizeof(entry));
427 444
428 /* set head and tail to zero manually */ 445 /* set head and tail to zero manually */
429 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 446 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
430 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 447 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
431 448
432 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 449 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
433
434 return cmd_buf;
435} 450}
436 451
437static void __init free_command_buffer(struct amd_iommu *iommu) 452static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +458,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
443/* allocates the memory where the IOMMU will log its events to */ 458/* allocates the memory where the IOMMU will log its events to */
444static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) 459static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
445{ 460{
446 u64 entry;
447 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 461 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
448 get_order(EVT_BUFFER_SIZE)); 462 get_order(EVT_BUFFER_SIZE));
449 463
450 if (iommu->evt_buf == NULL) 464 if (iommu->evt_buf == NULL)
451 return NULL; 465 return NULL;
452 466
467 return iommu->evt_buf;
468}
469
470static void iommu_enable_event_buffer(struct amd_iommu *iommu)
471{
472 u64 entry;
473
474 BUG_ON(iommu->evt_buf == NULL);
475
453 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; 476 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
477
454 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 478 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
455 &entry, sizeof(entry)); 479 &entry, sizeof(entry));
456 480
457 iommu->evt_buf_size = EVT_BUFFER_SIZE; 481 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
458
459 return iommu->evt_buf;
460} 482}
461 483
462static void __init free_event_buffer(struct amd_iommu *iommu) 484static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -596,32 +618,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
596 p += sizeof(struct ivhd_header); 618 p += sizeof(struct ivhd_header);
597 end += h->length; 619 end += h->length;
598 620
621
599 while (p < end) { 622 while (p < end) {
600 e = (struct ivhd_entry *)p; 623 e = (struct ivhd_entry *)p;
601 switch (e->type) { 624 switch (e->type) {
602 case IVHD_DEV_ALL: 625 case IVHD_DEV_ALL:
626
627 DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
628 " last device %02x:%02x.%x flags: %02x\n",
629 PCI_BUS(iommu->first_device),
630 PCI_SLOT(iommu->first_device),
631 PCI_FUNC(iommu->first_device),
632 PCI_BUS(iommu->last_device),
633 PCI_SLOT(iommu->last_device),
634 PCI_FUNC(iommu->last_device),
635 e->flags);
636
603 for (dev_i = iommu->first_device; 637 for (dev_i = iommu->first_device;
604 dev_i <= iommu->last_device; ++dev_i) 638 dev_i <= iommu->last_device; ++dev_i)
605 set_dev_entry_from_acpi(iommu, dev_i, 639 set_dev_entry_from_acpi(iommu, dev_i,
606 e->flags, 0); 640 e->flags, 0);
607 break; 641 break;
608 case IVHD_DEV_SELECT: 642 case IVHD_DEV_SELECT:
643
644 DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
645 "flags: %02x\n",
646 PCI_BUS(e->devid),
647 PCI_SLOT(e->devid),
648 PCI_FUNC(e->devid),
649 e->flags);
650
609 devid = e->devid; 651 devid = e->devid;
610 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 652 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
611 break; 653 break;
612 case IVHD_DEV_SELECT_RANGE_START: 654 case IVHD_DEV_SELECT_RANGE_START:
655
656 DUMP_printk(" DEV_SELECT_RANGE_START\t "
657 "devid: %02x:%02x.%x flags: %02x\n",
658 PCI_BUS(e->devid),
659 PCI_SLOT(e->devid),
660 PCI_FUNC(e->devid),
661 e->flags);
662
613 devid_start = e->devid; 663 devid_start = e->devid;
614 flags = e->flags; 664 flags = e->flags;
615 ext_flags = 0; 665 ext_flags = 0;
616 alias = false; 666 alias = false;
617 break; 667 break;
618 case IVHD_DEV_ALIAS: 668 case IVHD_DEV_ALIAS:
669
670 DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
671 "flags: %02x devid_to: %02x:%02x.%x\n",
672 PCI_BUS(e->devid),
673 PCI_SLOT(e->devid),
674 PCI_FUNC(e->devid),
675 e->flags,
676 PCI_BUS(e->ext >> 8),
677 PCI_SLOT(e->ext >> 8),
678 PCI_FUNC(e->ext >> 8));
679
619 devid = e->devid; 680 devid = e->devid;
620 devid_to = e->ext >> 8; 681 devid_to = e->ext >> 8;
621 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 682 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
622 amd_iommu_alias_table[devid] = devid_to; 683 amd_iommu_alias_table[devid] = devid_to;
623 break; 684 break;
624 case IVHD_DEV_ALIAS_RANGE: 685 case IVHD_DEV_ALIAS_RANGE:
686
687 DUMP_printk(" DEV_ALIAS_RANGE\t\t "
688 "devid: %02x:%02x.%x flags: %02x "
689 "devid_to: %02x:%02x.%x\n",
690 PCI_BUS(e->devid),
691 PCI_SLOT(e->devid),
692 PCI_FUNC(e->devid),
693 e->flags,
694 PCI_BUS(e->ext >> 8),
695 PCI_SLOT(e->ext >> 8),
696 PCI_FUNC(e->ext >> 8));
697
625 devid_start = e->devid; 698 devid_start = e->devid;
626 flags = e->flags; 699 flags = e->flags;
627 devid_to = e->ext >> 8; 700 devid_to = e->ext >> 8;
@@ -629,17 +702,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
629 alias = true; 702 alias = true;
630 break; 703 break;
631 case IVHD_DEV_EXT_SELECT: 704 case IVHD_DEV_EXT_SELECT:
705
706 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
707 "flags: %02x ext: %08x\n",
708 PCI_BUS(e->devid),
709 PCI_SLOT(e->devid),
710 PCI_FUNC(e->devid),
711 e->flags, e->ext);
712
632 devid = e->devid; 713 devid = e->devid;
633 set_dev_entry_from_acpi(iommu, devid, e->flags, 714 set_dev_entry_from_acpi(iommu, devid, e->flags,
634 e->ext); 715 e->ext);
635 break; 716 break;
636 case IVHD_DEV_EXT_SELECT_RANGE: 717 case IVHD_DEV_EXT_SELECT_RANGE:
718
719 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
720 "%02x:%02x.%x flags: %02x ext: %08x\n",
721 PCI_BUS(e->devid),
722 PCI_SLOT(e->devid),
723 PCI_FUNC(e->devid),
724 e->flags, e->ext);
725
637 devid_start = e->devid; 726 devid_start = e->devid;
638 flags = e->flags; 727 flags = e->flags;
639 ext_flags = e->ext; 728 ext_flags = e->ext;
640 alias = false; 729 alias = false;
641 break; 730 break;
642 case IVHD_DEV_RANGE_END: 731 case IVHD_DEV_RANGE_END:
732
733 DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
734 PCI_BUS(e->devid),
735 PCI_SLOT(e->devid),
736 PCI_FUNC(e->devid));
737
643 devid = e->devid; 738 devid = e->devid;
644 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 739 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
645 if (alias) 740 if (alias)
@@ -679,7 +774,7 @@ static void __init free_iommu_all(void)
679{ 774{
680 struct amd_iommu *iommu, *next; 775 struct amd_iommu *iommu, *next;
681 776
682 list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { 777 for_each_iommu_safe(iommu, next) {
683 list_del(&iommu->list); 778 list_del(&iommu->list);
684 free_iommu_one(iommu); 779 free_iommu_one(iommu);
685 kfree(iommu); 780 kfree(iommu);
@@ -710,7 +805,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
710 if (!iommu->mmio_base) 805 if (!iommu->mmio_base)
711 return -ENOMEM; 806 return -ENOMEM;
712 807
713 iommu_set_device_table(iommu);
714 iommu->cmd_buf = alloc_command_buffer(iommu); 808 iommu->cmd_buf = alloc_command_buffer(iommu);
715 if (!iommu->cmd_buf) 809 if (!iommu->cmd_buf)
716 return -ENOMEM; 810 return -ENOMEM;
@@ -746,6 +840,15 @@ static int __init init_iommu_all(struct acpi_table_header *table)
746 h = (struct ivhd_header *)p; 840 h = (struct ivhd_header *)p;
747 switch (*p) { 841 switch (*p) {
748 case ACPI_IVHD_TYPE: 842 case ACPI_IVHD_TYPE:
843
844 DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
845 "seg: %d flags: %01x info %04x\n",
846 PCI_BUS(h->devid), PCI_SLOT(h->devid),
847 PCI_FUNC(h->devid), h->cap_ptr,
848 h->pci_seg, h->flags, h->info);
849 DUMP_printk(" mmio-addr: %016llx\n",
850 h->mmio_phys);
851
749 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 852 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
750 if (iommu == NULL) 853 if (iommu == NULL)
751 return -ENOMEM; 854 return -ENOMEM;
@@ -773,56 +876,9 @@ static int __init init_iommu_all(struct acpi_table_header *table)
773 * 876 *
774 ****************************************************************************/ 877 ****************************************************************************/
775 878
776static int __init iommu_setup_msix(struct amd_iommu *iommu)
777{
778 struct amd_iommu *curr;
779 struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
780 int nvec = 0, i;
781
782 list_for_each_entry(curr, &amd_iommu_list, list) {
783 if (curr->dev == iommu->dev) {
784 entries[nvec].entry = curr->evt_msi_num;
785 entries[nvec].vector = 0;
786 curr->int_enabled = true;
787 nvec++;
788 }
789 }
790
791 if (pci_enable_msix(iommu->dev, entries, nvec)) {
792 pci_disable_msix(iommu->dev);
793 return 1;
794 }
795
796 for (i = 0; i < nvec; ++i) {
797 int r = request_irq(entries->vector, amd_iommu_int_handler,
798 IRQF_SAMPLE_RANDOM,
799 "AMD IOMMU",
800 NULL);
801 if (r)
802 goto out_free;
803 }
804
805 return 0;
806
807out_free:
808 for (i -= 1; i >= 0; --i)
809 free_irq(entries->vector, NULL);
810
811 pci_disable_msix(iommu->dev);
812
813 return 1;
814}
815
816static int __init iommu_setup_msi(struct amd_iommu *iommu) 879static int __init iommu_setup_msi(struct amd_iommu *iommu)
817{ 880{
818 int r; 881 int r;
819 struct amd_iommu *curr;
820
821 list_for_each_entry(curr, &amd_iommu_list, list) {
822 if (curr->dev == iommu->dev)
823 curr->int_enabled = true;
824 }
825
826 882
827 if (pci_enable_msi(iommu->dev)) 883 if (pci_enable_msi(iommu->dev))
828 return 1; 884 return 1;
@@ -837,17 +893,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
837 return 1; 893 return 1;
838 } 894 }
839 895
896 iommu->int_enabled = true;
897 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
898
840 return 0; 899 return 0;
841} 900}
842 901
843static int __init iommu_init_msi(struct amd_iommu *iommu) 902static int iommu_init_msi(struct amd_iommu *iommu)
844{ 903{
845 if (iommu->int_enabled) 904 if (iommu->int_enabled)
846 return 0; 905 return 0;
847 906
848 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) 907 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
849 return iommu_setup_msix(iommu);
850 else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
851 return iommu_setup_msi(iommu); 908 return iommu_setup_msi(iommu);
852 909
853 return 1; 910 return 1;
@@ -899,6 +956,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
899static int __init init_unity_map_range(struct ivmd_header *m) 956static int __init init_unity_map_range(struct ivmd_header *m)
900{ 957{
901 struct unity_map_entry *e = 0; 958 struct unity_map_entry *e = 0;
959 char *s;
902 960
903 e = kzalloc(sizeof(*e), GFP_KERNEL); 961 e = kzalloc(sizeof(*e), GFP_KERNEL);
904 if (e == NULL) 962 if (e == NULL)
@@ -909,13 +967,16 @@ static int __init init_unity_map_range(struct ivmd_header *m)
909 kfree(e); 967 kfree(e);
910 return 0; 968 return 0;
911 case ACPI_IVMD_TYPE: 969 case ACPI_IVMD_TYPE:
970 s = "IVMD_TYPEi\t\t\t";
912 e->devid_start = e->devid_end = m->devid; 971 e->devid_start = e->devid_end = m->devid;
913 break; 972 break;
914 case ACPI_IVMD_TYPE_ALL: 973 case ACPI_IVMD_TYPE_ALL:
974 s = "IVMD_TYPE_ALL\t\t";
915 e->devid_start = 0; 975 e->devid_start = 0;
916 e->devid_end = amd_iommu_last_bdf; 976 e->devid_end = amd_iommu_last_bdf;
917 break; 977 break;
918 case ACPI_IVMD_TYPE_RANGE: 978 case ACPI_IVMD_TYPE_RANGE:
979 s = "IVMD_TYPE_RANGE\t\t";
919 e->devid_start = m->devid; 980 e->devid_start = m->devid;
920 e->devid_end = m->aux; 981 e->devid_end = m->aux;
921 break; 982 break;
@@ -924,6 +985,13 @@ static int __init init_unity_map_range(struct ivmd_header *m)
924 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 985 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
925 e->prot = m->flags >> 1; 986 e->prot = m->flags >> 1;
926 987
988 DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
989 " range_start: %016llx range_end: %016llx flags: %x\n", s,
990 PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
991 PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
992 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
993 e->address_start, e->address_end, m->flags);
994
927 list_add_tail(&e->list, &amd_iommu_unity_map); 995 list_add_tail(&e->list, &amd_iommu_unity_map);
928 996
929 return 0; 997 return 0;
@@ -969,18 +1037,28 @@ static void init_device_table(void)
969 * This function finally enables all IOMMUs found in the system after 1037 * This function finally enables all IOMMUs found in the system after
970 * they have been initialized 1038 * they have been initialized
971 */ 1039 */
972static void __init enable_iommus(void) 1040static void enable_iommus(void)
973{ 1041{
974 struct amd_iommu *iommu; 1042 struct amd_iommu *iommu;
975 1043
976 list_for_each_entry(iommu, &amd_iommu_list, list) { 1044 for_each_iommu(iommu) {
1045 iommu_set_device_table(iommu);
1046 iommu_enable_command_buffer(iommu);
1047 iommu_enable_event_buffer(iommu);
977 iommu_set_exclusion_range(iommu); 1048 iommu_set_exclusion_range(iommu);
978 iommu_init_msi(iommu); 1049 iommu_init_msi(iommu);
979 iommu_enable_event_logging(iommu);
980 iommu_enable(iommu); 1050 iommu_enable(iommu);
981 } 1051 }
982} 1052}
983 1053
1054static void disable_iommus(void)
1055{
1056 struct amd_iommu *iommu;
1057
1058 for_each_iommu(iommu)
1059 iommu_disable(iommu);
1060}
1061
984/* 1062/*
985 * Suspend/Resume support 1063 * Suspend/Resume support
986 * disable suspend until real resume implemented 1064 * disable suspend until real resume implemented
@@ -988,12 +1066,31 @@ static void __init enable_iommus(void)
988 1066
989static int amd_iommu_resume(struct sys_device *dev) 1067static int amd_iommu_resume(struct sys_device *dev)
990{ 1068{
1069 /*
1070 * Disable IOMMUs before reprogramming the hardware registers.
1071 * IOMMU is still enabled from the resume kernel.
1072 */
1073 disable_iommus();
1074
1075 /* re-load the hardware */
1076 enable_iommus();
1077
1078 /*
1079 * we have to flush after the IOMMUs are enabled because a
1080 * disabled IOMMU will never execute the commands we send
1081 */
1082 amd_iommu_flush_all_domains();
1083 amd_iommu_flush_all_devices();
1084
991 return 0; 1085 return 0;
992} 1086}
993 1087
994static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) 1088static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
995{ 1089{
996 return -EINVAL; 1090 /* disable IOMMUs to go out of the way for BIOS */
1091 disable_iommus();
1092
1093 return 0;
997} 1094}
998 1095
999static struct sysdev_class amd_iommu_sysdev_class = { 1096static struct sysdev_class amd_iommu_sysdev_class = {
@@ -1139,9 +1236,6 @@ int __init amd_iommu_init(void)
1139 1236
1140 enable_iommus(); 1237 enable_iommus();
1141 1238
1142 printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
1143 (1 << (amd_iommu_aperture_order-20)));
1144
1145 printk(KERN_INFO "AMD IOMMU: device isolation "); 1239 printk(KERN_INFO "AMD IOMMU: device isolation ");
1146 if (amd_iommu_isolate) 1240 if (amd_iommu_isolate)
1147 printk("enabled\n"); 1241 printk("enabled\n");
@@ -1213,6 +1307,13 @@ void __init amd_iommu_detect(void)
1213 * 1307 *
1214 ****************************************************************************/ 1308 ****************************************************************************/
1215 1309
1310static int __init parse_amd_iommu_dump(char *str)
1311{
1312 amd_iommu_dump = true;
1313
1314 return 1;
1315}
1316
1216static int __init parse_amd_iommu_options(char *str) 1317static int __init parse_amd_iommu_options(char *str)
1217{ 1318{
1218 for (; *str; ++str) { 1319 for (; *str; ++str) {
@@ -1227,15 +1328,5 @@ static int __init parse_amd_iommu_options(char *str)
1227 return 1; 1328 return 1;
1228} 1329}
1229 1330
1230static int __init parse_amd_iommu_size_options(char *str) 1331__setup("amd_iommu_dump", parse_amd_iommu_dump);
1231{
1232 unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
1233
1234 if ((order > 24) && (order < 31))
1235 amd_iommu_aperture_order = order;
1236
1237 return 1;
1238}
1239
1240__setup("amd_iommu=", parse_amd_iommu_options); 1332__setup("amd_iommu=", parse_amd_iommu_options);
1241__setup("amd_iommu_size=", parse_amd_iommu_size_options);