aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/ia64/include/asm/dma-mapping.h4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/amd_iommu.c328
-rw-r--r--arch/x86/kernel/amd_iommu_init.c194
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/k8.c5
-rw-r--r--arch/x86/kernel/pci-calgary_64.c18
-rw-r--r--arch/x86/kernel/pci-dma.c177
-rw-r--r--arch/x86/kernel/pci-gart_64.c125
-rw-r--r--arch/x86/kernel/pci-nommu.c10
-rw-r--r--drivers/pci/intel-iommu.c2
-rw-r--r--include/asm-x86/amd_iommu.h3
-rw-r--r--include/asm-x86/amd_iommu_types.h64
-rw-r--r--include/asm-x86/bitops.h10
-rw-r--r--include/asm-x86/dma-mapping.h87
-rw-r--r--include/asm-x86/gart.h2
-rw-r--r--include/asm-x86/iommu.h1
-rw-r--r--include/linux/dma-mapping.h12
-rw-r--r--include/linux/iommu-helper.h16
-rw-r--r--include/linux/pci_ids.h10
-rw-r--r--kernel/dma-coherent.c2
-rw-r--r--lib/iommu-helper.c5
-rw-r--r--lib/swiotlb.c49
25 files changed, 803 insertions, 346 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d0fff6302f68..2ca9c8f8c8d8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -284,6 +284,11 @@ and is between 256 and 4096 characters. It is defined in the file
284 isolate - enable device isolation (each device, as far 284 isolate - enable device isolation (each device, as far
285 as possible, will get its own protection 285 as possible, will get its own protection
286 domain) 286 domain)
287 fullflush - enable flushing of IO/TLB entries when
288 they are unmapped. Otherwise they are
289 flushed before they will be reused, which
290 is a lot of faster
291
287 amd_iommu_size= [HW,X86-64] 292 amd_iommu_size= [HW,X86-64]
288 Define the size of the aperture for the AMD IOMMU 293 Define the size of the aperture for the AMD IOMMU
289 driver. Possible values are: 294 driver. Possible values are:
diff --git a/MAINTAINERS b/MAINTAINERS
index 85be56a6efbf..68781ed2b734 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -387,6 +387,7 @@ AMD IOMMU (AMD-VI)
387P: Joerg Roedel 387P: Joerg Roedel
388M: joerg.roedel@amd.com 388M: joerg.roedel@amd.com
389L: iommu@lists.linux-foundation.org 389L: iommu@lists.linux-foundation.org
390T: git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git
390S: Supported 391S: Supported
391 392
392AMS (Apple Motion Sensor) DRIVER 393AMS (Apple Motion Sensor) DRIVER
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index 9f0df9bd46b7..06ff1ba21465 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -8,7 +8,9 @@
8#include <asm/machvec.h> 8#include <asm/machvec.h>
9#include <linux/scatterlist.h> 9#include <linux/scatterlist.h>
10 10
11#define dma_alloc_coherent platform_dma_alloc_coherent 11#define dma_alloc_coherent(dev, size, handle, gfp) \
12 platform_dma_alloc_coherent(dev, size, handle, (gfp) | GFP_DMA)
13
12/* coherent mem. is cheap */ 14/* coherent mem. is cheap */
13static inline void * 15static inline void *
14dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 16dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 97f0d2b6dc0c..0d7cdbbfc1ee 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -554,6 +554,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
554config AMD_IOMMU 554config AMD_IOMMU
555 bool "AMD IOMMU support" 555 bool "AMD IOMMU support"
556 select SWIOTLB 556 select SWIOTLB
557 select PCI_MSI
557 depends on X86_64 && PCI && ACPI 558 depends on X86_64 && PCI && ACPI
558 help 559 help
559 With this option you can enable support for AMD IOMMU hardware in 560 With this option you can enable support for AMD IOMMU hardware in
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 042fdc27bc92..34e4d112b1ef 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -33,6 +33,10 @@
33 33
34static DEFINE_RWLOCK(amd_iommu_devtable_lock); 34static DEFINE_RWLOCK(amd_iommu_devtable_lock);
35 35
36/* A list of preallocated protection domains */
37static LIST_HEAD(iommu_pd_list);
38static DEFINE_SPINLOCK(iommu_pd_list_lock);
39
36/* 40/*
37 * general struct to manage commands send to an IOMMU 41 * general struct to manage commands send to an IOMMU
38 */ 42 */
@@ -51,6 +55,102 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
51 55
52/**************************************************************************** 56/****************************************************************************
53 * 57 *
58 * Interrupt handling functions
59 *
60 ****************************************************************************/
61
62static void iommu_print_event(void *__evt)
63{
64 u32 *event = __evt;
65 int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
66 int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
67 int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
68 int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
69 u64 address = (u64)(((u64)event[3]) << 32) | event[2];
70
71 printk(KERN_ERR "AMD IOMMU: Event logged [");
72
73 switch (type) {
74 case EVENT_TYPE_ILL_DEV:
75 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
76 "address=0x%016llx flags=0x%04x]\n",
77 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
78 address, flags);
79 break;
80 case EVENT_TYPE_IO_FAULT:
81 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
82 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
83 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
84 domid, address, flags);
85 break;
86 case EVENT_TYPE_DEV_TAB_ERR:
87 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
88 "address=0x%016llx flags=0x%04x]\n",
89 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
90 address, flags);
91 break;
92 case EVENT_TYPE_PAGE_TAB_ERR:
93 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
94 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
95 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
96 domid, address, flags);
97 break;
98 case EVENT_TYPE_ILL_CMD:
99 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
100 break;
101 case EVENT_TYPE_CMD_HARD_ERR:
102 printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
103 "flags=0x%04x]\n", address, flags);
104 break;
105 case EVENT_TYPE_IOTLB_INV_TO:
106 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
107 "address=0x%016llx]\n",
108 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
109 address);
110 break;
111 case EVENT_TYPE_INV_DEV_REQ:
112 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
113 "address=0x%016llx flags=0x%04x]\n",
114 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
115 address, flags);
116 break;
117 default:
118 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
119 }
120}
121
122static void iommu_poll_events(struct amd_iommu *iommu)
123{
124 u32 head, tail;
125 unsigned long flags;
126
127 spin_lock_irqsave(&iommu->lock, flags);
128
129 head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
130 tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
131
132 while (head != tail) {
133 iommu_print_event(iommu->evt_buf + head);
134 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
135 }
136
137 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
138
139 spin_unlock_irqrestore(&iommu->lock, flags);
140}
141
142irqreturn_t amd_iommu_int_handler(int irq, void *data)
143{
144 struct amd_iommu *iommu;
145
146 list_for_each_entry(iommu, &amd_iommu_list, list)
147 iommu_poll_events(iommu);
148
149 return IRQ_HANDLED;
150}
151
152/****************************************************************************
153 *
54 * IOMMU command queuing functions 154 * IOMMU command queuing functions
55 * 155 *
56 ****************************************************************************/ 156 ****************************************************************************/
@@ -213,6 +313,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
213 return 0; 313 return 0;
214} 314}
215 315
316/* Flush the whole IO/TLB for a given protection domain */
317static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
318{
319 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
320
321 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
322}
323
216/**************************************************************************** 324/****************************************************************************
217 * 325 *
218 * The functions below are used the create the page table mappings for 326 * The functions below are used the create the page table mappings for
@@ -372,11 +480,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
372 * efficient allocator. 480 * efficient allocator.
373 * 481 *
374 ****************************************************************************/ 482 ****************************************************************************/
375static unsigned long dma_mask_to_pages(unsigned long mask)
376{
377 return (mask >> PAGE_SHIFT) +
378 (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
379}
380 483
381/* 484/*
382 * The address allocator core function. 485 * The address allocator core function.
@@ -385,25 +488,31 @@ static unsigned long dma_mask_to_pages(unsigned long mask)
385 */ 488 */
386static unsigned long dma_ops_alloc_addresses(struct device *dev, 489static unsigned long dma_ops_alloc_addresses(struct device *dev,
387 struct dma_ops_domain *dom, 490 struct dma_ops_domain *dom,
388 unsigned int pages) 491 unsigned int pages,
492 unsigned long align_mask,
493 u64 dma_mask)
389{ 494{
390 unsigned long limit = dma_mask_to_pages(*dev->dma_mask); 495 unsigned long limit;
391 unsigned long address; 496 unsigned long address;
392 unsigned long size = dom->aperture_size >> PAGE_SHIFT;
393 unsigned long boundary_size; 497 unsigned long boundary_size;
394 498
395 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 499 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
396 PAGE_SIZE) >> PAGE_SHIFT; 500 PAGE_SIZE) >> PAGE_SHIFT;
397 limit = limit < size ? limit : size; 501 limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
502 dma_mask >> PAGE_SHIFT);
398 503
399 if (dom->next_bit >= limit) 504 if (dom->next_bit >= limit) {
400 dom->next_bit = 0; 505 dom->next_bit = 0;
506 dom->need_flush = true;
507 }
401 508
402 address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, 509 address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
403 0 , boundary_size, 0); 510 0 , boundary_size, align_mask);
404 if (address == -1) 511 if (address == -1) {
405 address = iommu_area_alloc(dom->bitmap, limit, 0, pages, 512 address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
406 0, boundary_size, 0); 513 0, boundary_size, align_mask);
514 dom->need_flush = true;
515 }
407 516
408 if (likely(address != -1)) { 517 if (likely(address != -1)) {
409 dom->next_bit = address + pages; 518 dom->next_bit = address + pages;
@@ -469,7 +578,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
469 if (start_page + pages > last_page) 578 if (start_page + pages > last_page)
470 pages = last_page - start_page; 579 pages = last_page - start_page;
471 580
472 set_bit_string(dom->bitmap, start_page, pages); 581 iommu_area_reserve(dom->bitmap, start_page, pages);
473} 582}
474 583
475static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 584static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
@@ -563,6 +672,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
563 dma_dom->bitmap[0] = 1; 672 dma_dom->bitmap[0] = 1;
564 dma_dom->next_bit = 0; 673 dma_dom->next_bit = 0;
565 674
675 dma_dom->need_flush = false;
676 dma_dom->target_dev = 0xffff;
677
566 /* Intialize the exclusion range if necessary */ 678 /* Intialize the exclusion range if necessary */
567 if (iommu->exclusion_start && 679 if (iommu->exclusion_start &&
568 iommu->exclusion_start < dma_dom->aperture_size) { 680 iommu->exclusion_start < dma_dom->aperture_size) {
@@ -633,12 +745,13 @@ static void set_device_domain(struct amd_iommu *iommu,
633 745
634 u64 pte_root = virt_to_phys(domain->pt_root); 746 u64 pte_root = virt_to_phys(domain->pt_root);
635 747
636 pte_root |= (domain->mode & 0x07) << 9; 748 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
637 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; 749 << DEV_ENTRY_MODE_SHIFT;
750 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
638 751
639 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 752 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
640 amd_iommu_dev_table[devid].data[0] = pte_root; 753 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
641 amd_iommu_dev_table[devid].data[1] = pte_root >> 32; 754 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
642 amd_iommu_dev_table[devid].data[2] = domain->id; 755 amd_iommu_dev_table[devid].data[2] = domain->id;
643 756
644 amd_iommu_pd_table[devid] = domain; 757 amd_iommu_pd_table[devid] = domain;
@@ -656,6 +769,45 @@ static void set_device_domain(struct amd_iommu *iommu,
656 *****************************************************************************/ 769 *****************************************************************************/
657 770
658/* 771/*
772 * This function checks if the driver got a valid device from the caller to
773 * avoid dereferencing invalid pointers.
774 */
775static bool check_device(struct device *dev)
776{
777 if (!dev || !dev->dma_mask)
778 return false;
779
780 return true;
781}
782
783/*
784 * In this function the list of preallocated protection domains is traversed to
785 * find the domain for a specific device
786 */
787static struct dma_ops_domain *find_protection_domain(u16 devid)
788{
789 struct dma_ops_domain *entry, *ret = NULL;
790 unsigned long flags;
791
792 if (list_empty(&iommu_pd_list))
793 return NULL;
794
795 spin_lock_irqsave(&iommu_pd_list_lock, flags);
796
797 list_for_each_entry(entry, &iommu_pd_list, list) {
798 if (entry->target_dev == devid) {
799 ret = entry;
800 list_del(&ret->list);
801 break;
802 }
803 }
804
805 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
806
807 return ret;
808}
809
810/*
659 * In the dma_ops path we only have the struct device. This function 811 * In the dma_ops path we only have the struct device. This function
660 * finds the corresponding IOMMU, the protection domain and the 812 * finds the corresponding IOMMU, the protection domain and the
661 * requestor id for a given device. 813 * requestor id for a given device.
@@ -671,27 +823,30 @@ static int get_device_resources(struct device *dev,
671 struct pci_dev *pcidev; 823 struct pci_dev *pcidev;
672 u16 _bdf; 824 u16 _bdf;
673 825
674 BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); 826 *iommu = NULL;
827 *domain = NULL;
828 *bdf = 0xffff;
829
830 if (dev->bus != &pci_bus_type)
831 return 0;
675 832
676 pcidev = to_pci_dev(dev); 833 pcidev = to_pci_dev(dev);
677 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); 834 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
678 835
679 /* device not translated by any IOMMU in the system? */ 836 /* device not translated by any IOMMU in the system? */
680 if (_bdf > amd_iommu_last_bdf) { 837 if (_bdf > amd_iommu_last_bdf)
681 *iommu = NULL;
682 *domain = NULL;
683 *bdf = 0xffff;
684 return 0; 838 return 0;
685 }
686 839
687 *bdf = amd_iommu_alias_table[_bdf]; 840 *bdf = amd_iommu_alias_table[_bdf];
688 841
689 *iommu = amd_iommu_rlookup_table[*bdf]; 842 *iommu = amd_iommu_rlookup_table[*bdf];
690 if (*iommu == NULL) 843 if (*iommu == NULL)
691 return 0; 844 return 0;
692 dma_dom = (*iommu)->default_dom;
693 *domain = domain_for_device(*bdf); 845 *domain = domain_for_device(*bdf);
694 if (*domain == NULL) { 846 if (*domain == NULL) {
847 dma_dom = find_protection_domain(*bdf);
848 if (!dma_dom)
849 dma_dom = (*iommu)->default_dom;
695 *domain = &dma_dom->domain; 850 *domain = &dma_dom->domain;
696 set_device_domain(*iommu, *domain, *bdf); 851 set_device_domain(*iommu, *domain, *bdf);
697 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 852 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
@@ -770,17 +925,24 @@ static dma_addr_t __map_single(struct device *dev,
770 struct dma_ops_domain *dma_dom, 925 struct dma_ops_domain *dma_dom,
771 phys_addr_t paddr, 926 phys_addr_t paddr,
772 size_t size, 927 size_t size,
773 int dir) 928 int dir,
929 bool align,
930 u64 dma_mask)
774{ 931{
775 dma_addr_t offset = paddr & ~PAGE_MASK; 932 dma_addr_t offset = paddr & ~PAGE_MASK;
776 dma_addr_t address, start; 933 dma_addr_t address, start;
777 unsigned int pages; 934 unsigned int pages;
935 unsigned long align_mask = 0;
778 int i; 936 int i;
779 937
780 pages = iommu_num_pages(paddr, size); 938 pages = iommu_num_pages(paddr, size);
781 paddr &= PAGE_MASK; 939 paddr &= PAGE_MASK;
782 940
783 address = dma_ops_alloc_addresses(dev, dma_dom, pages); 941 if (align)
942 align_mask = (1UL << get_order(size)) - 1;
943
944 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
945 dma_mask);
784 if (unlikely(address == bad_dma_address)) 946 if (unlikely(address == bad_dma_address))
785 goto out; 947 goto out;
786 948
@@ -792,6 +954,12 @@ static dma_addr_t __map_single(struct device *dev,
792 } 954 }
793 address += offset; 955 address += offset;
794 956
957 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
958 iommu_flush_tlb(iommu, dma_dom->domain.id);
959 dma_dom->need_flush = false;
960 } else if (unlikely(iommu_has_npcache(iommu)))
961 iommu_flush_pages(iommu, dma_dom->domain.id, address, size);
962
795out: 963out:
796 return address; 964 return address;
797} 965}
@@ -822,6 +990,9 @@ static void __unmap_single(struct amd_iommu *iommu,
822 } 990 }
823 991
824 dma_ops_free_addresses(dma_dom, dma_addr, pages); 992 dma_ops_free_addresses(dma_dom, dma_addr, pages);
993
994 if (amd_iommu_unmap_flush)
995 iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
825} 996}
826 997
827/* 998/*
@@ -835,6 +1006,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
835 struct protection_domain *domain; 1006 struct protection_domain *domain;
836 u16 devid; 1007 u16 devid;
837 dma_addr_t addr; 1008 dma_addr_t addr;
1009 u64 dma_mask;
1010
1011 if (!check_device(dev))
1012 return bad_dma_address;
1013
1014 dma_mask = *dev->dma_mask;
838 1015
839 get_device_resources(dev, &iommu, &domain, &devid); 1016 get_device_resources(dev, &iommu, &domain, &devid);
840 1017
@@ -843,14 +1020,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
843 return (dma_addr_t)paddr; 1020 return (dma_addr_t)paddr;
844 1021
845 spin_lock_irqsave(&domain->lock, flags); 1022 spin_lock_irqsave(&domain->lock, flags);
846 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); 1023 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1024 dma_mask);
847 if (addr == bad_dma_address) 1025 if (addr == bad_dma_address)
848 goto out; 1026 goto out;
849 1027
850 if (iommu_has_npcache(iommu)) 1028 if (unlikely(iommu->need_sync))
851 iommu_flush_pages(iommu, domain->id, addr, size);
852
853 if (iommu->need_sync)
854 iommu_completion_wait(iommu); 1029 iommu_completion_wait(iommu);
855 1030
856out: 1031out:
@@ -870,7 +1045,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
870 struct protection_domain *domain; 1045 struct protection_domain *domain;
871 u16 devid; 1046 u16 devid;
872 1047
873 if (!get_device_resources(dev, &iommu, &domain, &devid)) 1048 if (!check_device(dev) ||
1049 !get_device_resources(dev, &iommu, &domain, &devid))
874 /* device not handled by any AMD IOMMU */ 1050 /* device not handled by any AMD IOMMU */
875 return; 1051 return;
876 1052
@@ -878,9 +1054,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
878 1054
879 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1055 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
880 1056
881 iommu_flush_pages(iommu, domain->id, dma_addr, size); 1057 if (unlikely(iommu->need_sync))
882
883 if (iommu->need_sync)
884 iommu_completion_wait(iommu); 1058 iommu_completion_wait(iommu);
885 1059
886 spin_unlock_irqrestore(&domain->lock, flags); 1060 spin_unlock_irqrestore(&domain->lock, flags);
@@ -919,6 +1093,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
919 struct scatterlist *s; 1093 struct scatterlist *s;
920 phys_addr_t paddr; 1094 phys_addr_t paddr;
921 int mapped_elems = 0; 1095 int mapped_elems = 0;
1096 u64 dma_mask;
1097
1098 if (!check_device(dev))
1099 return 0;
1100
1101 dma_mask = *dev->dma_mask;
922 1102
923 get_device_resources(dev, &iommu, &domain, &devid); 1103 get_device_resources(dev, &iommu, &domain, &devid);
924 1104
@@ -931,19 +1111,17 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
931 paddr = sg_phys(s); 1111 paddr = sg_phys(s);
932 1112
933 s->dma_address = __map_single(dev, iommu, domain->priv, 1113 s->dma_address = __map_single(dev, iommu, domain->priv,
934 paddr, s->length, dir); 1114 paddr, s->length, dir, false,
1115 dma_mask);
935 1116
936 if (s->dma_address) { 1117 if (s->dma_address) {
937 s->dma_length = s->length; 1118 s->dma_length = s->length;
938 mapped_elems++; 1119 mapped_elems++;
939 } else 1120 } else
940 goto unmap; 1121 goto unmap;
941 if (iommu_has_npcache(iommu))
942 iommu_flush_pages(iommu, domain->id, s->dma_address,
943 s->dma_length);
944 } 1122 }
945 1123
946 if (iommu->need_sync) 1124 if (unlikely(iommu->need_sync))
947 iommu_completion_wait(iommu); 1125 iommu_completion_wait(iommu);
948 1126
949out: 1127out:
@@ -977,7 +1155,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
977 u16 devid; 1155 u16 devid;
978 int i; 1156 int i;
979 1157
980 if (!get_device_resources(dev, &iommu, &domain, &devid)) 1158 if (!check_device(dev) ||
1159 !get_device_resources(dev, &iommu, &domain, &devid))
981 return; 1160 return;
982 1161
983 spin_lock_irqsave(&domain->lock, flags); 1162 spin_lock_irqsave(&domain->lock, flags);
@@ -985,12 +1164,10 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
985 for_each_sg(sglist, s, nelems, i) { 1164 for_each_sg(sglist, s, nelems, i) {
986 __unmap_single(iommu, domain->priv, s->dma_address, 1165 __unmap_single(iommu, domain->priv, s->dma_address,
987 s->dma_length, dir); 1166 s->dma_length, dir);
988 iommu_flush_pages(iommu, domain->id, s->dma_address,
989 s->dma_length);
990 s->dma_address = s->dma_length = 0; 1167 s->dma_address = s->dma_length = 0;
991 } 1168 }
992 1169
993 if (iommu->need_sync) 1170 if (unlikely(iommu->need_sync))
994 iommu_completion_wait(iommu); 1171 iommu_completion_wait(iommu);
995 1172
996 spin_unlock_irqrestore(&domain->lock, flags); 1173 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1008,25 +1185,33 @@ static void *alloc_coherent(struct device *dev, size_t size,
1008 struct protection_domain *domain; 1185 struct protection_domain *domain;
1009 u16 devid; 1186 u16 devid;
1010 phys_addr_t paddr; 1187 phys_addr_t paddr;
1188 u64 dma_mask = dev->coherent_dma_mask;
1189
1190 if (!check_device(dev))
1191 return NULL;
1192
1193 if (!get_device_resources(dev, &iommu, &domain, &devid))
1194 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
1011 1195
1196 flag |= __GFP_ZERO;
1012 virt_addr = (void *)__get_free_pages(flag, get_order(size)); 1197 virt_addr = (void *)__get_free_pages(flag, get_order(size));
1013 if (!virt_addr) 1198 if (!virt_addr)
1014 return 0; 1199 return 0;
1015 1200
1016 memset(virt_addr, 0, size);
1017 paddr = virt_to_phys(virt_addr); 1201 paddr = virt_to_phys(virt_addr);
1018 1202
1019 get_device_resources(dev, &iommu, &domain, &devid);
1020
1021 if (!iommu || !domain) { 1203 if (!iommu || !domain) {
1022 *dma_addr = (dma_addr_t)paddr; 1204 *dma_addr = (dma_addr_t)paddr;
1023 return virt_addr; 1205 return virt_addr;
1024 } 1206 }
1025 1207
1208 if (!dma_mask)
1209 dma_mask = *dev->dma_mask;
1210
1026 spin_lock_irqsave(&domain->lock, flags); 1211 spin_lock_irqsave(&domain->lock, flags);
1027 1212
1028 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1213 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1029 size, DMA_BIDIRECTIONAL); 1214 size, DMA_BIDIRECTIONAL, true, dma_mask);
1030 1215
1031 if (*dma_addr == bad_dma_address) { 1216 if (*dma_addr == bad_dma_address) {
1032 free_pages((unsigned long)virt_addr, get_order(size)); 1217 free_pages((unsigned long)virt_addr, get_order(size));
@@ -1034,10 +1219,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
1034 goto out; 1219 goto out;
1035 } 1220 }
1036 1221
1037 if (iommu_has_npcache(iommu)) 1222 if (unlikely(iommu->need_sync))
1038 iommu_flush_pages(iommu, domain->id, *dma_addr, size);
1039
1040 if (iommu->need_sync)
1041 iommu_completion_wait(iommu); 1223 iommu_completion_wait(iommu);
1042 1224
1043out: 1225out:
@@ -1048,8 +1230,6 @@ out:
1048 1230
1049/* 1231/*
1050 * The exported free_coherent function for dma_ops. 1232 * The exported free_coherent function for dma_ops.
1051 * FIXME: fix the generic x86 DMA layer so that it actually calls that
1052 * function.
1053 */ 1233 */
1054static void free_coherent(struct device *dev, size_t size, 1234static void free_coherent(struct device *dev, size_t size,
1055 void *virt_addr, dma_addr_t dma_addr) 1235 void *virt_addr, dma_addr_t dma_addr)
@@ -1059,6 +1239,9 @@ static void free_coherent(struct device *dev, size_t size,
1059 struct protection_domain *domain; 1239 struct protection_domain *domain;
1060 u16 devid; 1240 u16 devid;
1061 1241
1242 if (!check_device(dev))
1243 return;
1244
1062 get_device_resources(dev, &iommu, &domain, &devid); 1245 get_device_resources(dev, &iommu, &domain, &devid);
1063 1246
1064 if (!iommu || !domain) 1247 if (!iommu || !domain)
@@ -1067,9 +1250,8 @@ static void free_coherent(struct device *dev, size_t size,
1067 spin_lock_irqsave(&domain->lock, flags); 1250 spin_lock_irqsave(&domain->lock, flags);
1068 1251
1069 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1252 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
1070 iommu_flush_pages(iommu, domain->id, dma_addr, size);
1071 1253
1072 if (iommu->need_sync) 1254 if (unlikely(iommu->need_sync))
1073 iommu_completion_wait(iommu); 1255 iommu_completion_wait(iommu);
1074 1256
1075 spin_unlock_irqrestore(&domain->lock, flags); 1257 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1079,6 +1261,30 @@ free_mem:
1079} 1261}
1080 1262
1081/* 1263/*
1264 * This function is called by the DMA layer to find out if we can handle a
1265 * particular device. It is part of the dma_ops.
1266 */
1267static int amd_iommu_dma_supported(struct device *dev, u64 mask)
1268{
1269 u16 bdf;
1270 struct pci_dev *pcidev;
1271
1272 /* No device or no PCI device */
1273 if (!dev || dev->bus != &pci_bus_type)
1274 return 0;
1275
1276 pcidev = to_pci_dev(dev);
1277
1278 bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
1279
1280 /* Out of our scope? */
1281 if (bdf > amd_iommu_last_bdf)
1282 return 0;
1283
1284 return 1;
1285}
1286
1287/*
1082 * The function for pre-allocating protection domains. 1288 * The function for pre-allocating protection domains.
1083 * 1289 *
1084 * If the driver core informs the DMA layer if a driver grabs a device 1290 * If the driver core informs the DMA layer if a driver grabs a device
@@ -1107,10 +1313,9 @@ void prealloc_protection_domains(void)
1107 if (!dma_dom) 1313 if (!dma_dom)
1108 continue; 1314 continue;
1109 init_unity_mappings_for_device(dma_dom, devid); 1315 init_unity_mappings_for_device(dma_dom, devid);
1110 set_device_domain(iommu, &dma_dom->domain, devid); 1316 dma_dom->target_dev = devid;
1111 printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", 1317
1112 dma_dom->domain.id); 1318 list_add_tail(&dma_dom->list, &iommu_pd_list);
1113 print_devid(devid, 1);
1114 } 1319 }
1115} 1320}
1116 1321
@@ -1121,6 +1326,7 @@ static struct dma_mapping_ops amd_iommu_dma_ops = {
1121 .unmap_single = unmap_single, 1326 .unmap_single = unmap_single,
1122 .map_sg = map_sg, 1327 .map_sg = map_sg,
1123 .unmap_sg = unmap_sg, 1328 .unmap_sg = unmap_sg,
1329 .dma_supported = amd_iommu_dma_supported,
1124}; 1330};
1125 1331
1126/* 1332/*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index a69cc0f52042..148fcfe22f17 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -22,6 +22,8 @@
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <linux/list.h> 23#include <linux/list.h>
24#include <linux/sysdev.h> 24#include <linux/sysdev.h>
25#include <linux/interrupt.h>
26#include <linux/msi.h>
25#include <asm/pci-direct.h> 27#include <asm/pci-direct.h>
26#include <asm/amd_iommu_types.h> 28#include <asm/amd_iommu_types.h>
27#include <asm/amd_iommu.h> 29#include <asm/amd_iommu.h>
@@ -30,7 +32,6 @@
30/* 32/*
31 * definitions for the ACPI scanning code 33 * definitions for the ACPI scanning code
32 */ 34 */
33#define PCI_BUS(x) (((x) >> 8) & 0xff)
34#define IVRS_HEADER_LENGTH 48 35#define IVRS_HEADER_LENGTH 48
35 36
36#define ACPI_IVHD_TYPE 0x10 37#define ACPI_IVHD_TYPE 0x10
@@ -121,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
121 we find in ACPI */ 122 we find in ACPI */
122unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 123unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
123int amd_iommu_isolate; /* if 1, device isolation is enabled */ 124int amd_iommu_isolate; /* if 1, device isolation is enabled */
125bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
124 126
125LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 127LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
126 system */ 128 system */
@@ -234,7 +236,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
234{ 236{
235 u32 ctrl; 237 u32 ctrl;
236 238
237 ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); 239 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
238 ctrl &= ~(1 << bit); 240 ctrl &= ~(1 << bit);
239 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 241 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
240} 242}
@@ -242,13 +244,23 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
242/* Function to enable the hardware */ 244/* Function to enable the hardware */
243void __init iommu_enable(struct amd_iommu *iommu) 245void __init iommu_enable(struct amd_iommu *iommu)
244{ 246{
245 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); 247 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
246 print_devid(iommu->devid, 0); 248 "at %02x:%02x.%x cap 0x%hx\n",
247 printk(" cap 0x%hx\n", iommu->cap_ptr); 249 iommu->dev->bus->number,
250 PCI_SLOT(iommu->dev->devfn),
251 PCI_FUNC(iommu->dev->devfn),
252 iommu->cap_ptr);
248 253
249 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 254 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
250} 255}
251 256
257/* Function to enable IOMMU event logging and event interrupts */
258void __init iommu_enable_event_logging(struct amd_iommu *iommu)
259{
260 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
261 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
262}
263
252/* 264/*
253 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 265 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
254 * the system has one. 266 * the system has one.
@@ -286,6 +298,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
286 ****************************************************************************/ 298 ****************************************************************************/
287 299
288/* 300/*
301 * This function calculates the length of a given IVHD entry
302 */
303static inline int ivhd_entry_length(u8 *ivhd)
304{
305 return 0x04 << (*ivhd >> 6);
306}
307
308/*
289 * This function reads the last device id the IOMMU has to handle from the PCI 309 * This function reads the last device id the IOMMU has to handle from the PCI
290 * capability header for this IOMMU 310 * capability header for this IOMMU
291 */ 311 */
@@ -329,7 +349,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
329 default: 349 default:
330 break; 350 break;
331 } 351 }
332 p += 0x04 << (*p >> 6); 352 p += ivhd_entry_length(p);
333 } 353 }
334 354
335 WARN_ON(p != end); 355 WARN_ON(p != end);
@@ -414,7 +434,32 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
414 434
415static void __init free_command_buffer(struct amd_iommu *iommu) 435static void __init free_command_buffer(struct amd_iommu *iommu)
416{ 436{
417 free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); 437 free_pages((unsigned long)iommu->cmd_buf,
438 get_order(iommu->cmd_buf_size));
439}
440
441/* allocates the memory where the IOMMU will log its events to */
442static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
443{
444 u64 entry;
445 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
446 get_order(EVT_BUFFER_SIZE));
447
448 if (iommu->evt_buf == NULL)
449 return NULL;
450
451 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
452 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
453 &entry, sizeof(entry));
454
455 iommu->evt_buf_size = EVT_BUFFER_SIZE;
456
457 return iommu->evt_buf;
458}
459
460static void __init free_event_buffer(struct amd_iommu *iommu)
461{
462 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
418} 463}
419 464
420/* sets a specific bit in the device table entry. */ 465/* sets a specific bit in the device table entry. */
@@ -487,19 +532,21 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
487 */ 532 */
488static void __init init_iommu_from_pci(struct amd_iommu *iommu) 533static void __init init_iommu_from_pci(struct amd_iommu *iommu)
489{ 534{
490 int bus = PCI_BUS(iommu->devid);
491 int dev = PCI_SLOT(iommu->devid);
492 int fn = PCI_FUNC(iommu->devid);
493 int cap_ptr = iommu->cap_ptr; 535 int cap_ptr = iommu->cap_ptr;
494 u32 range; 536 u32 range, misc;
495 537
496 iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); 538 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
539 &iommu->cap);
540 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
541 &range);
542 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
543 &misc);
497 544
498 range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
499 iommu->first_device = calc_devid(MMIO_GET_BUS(range), 545 iommu->first_device = calc_devid(MMIO_GET_BUS(range),
500 MMIO_GET_FD(range)); 546 MMIO_GET_FD(range));
501 iommu->last_device = calc_devid(MMIO_GET_BUS(range), 547 iommu->last_device = calc_devid(MMIO_GET_BUS(range),
502 MMIO_GET_LD(range)); 548 MMIO_GET_LD(range));
549 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
503} 550}
504 551
505/* 552/*
@@ -604,7 +651,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
604 break; 651 break;
605 } 652 }
606 653
607 p += 0x04 << (e->type >> 6); 654 p += ivhd_entry_length(p);
608 } 655 }
609} 656}
610 657
@@ -622,6 +669,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu)
622static void __init free_iommu_one(struct amd_iommu *iommu) 669static void __init free_iommu_one(struct amd_iommu *iommu)
623{ 670{
624 free_command_buffer(iommu); 671 free_command_buffer(iommu);
672 free_event_buffer(iommu);
625 iommu_unmap_mmio_space(iommu); 673 iommu_unmap_mmio_space(iommu);
626} 674}
627 675
@@ -649,8 +697,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
649 /* 697 /*
650 * Copy data from ACPI table entry to the iommu struct 698 * Copy data from ACPI table entry to the iommu struct
651 */ 699 */
652 iommu->devid = h->devid; 700 iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff);
701 if (!iommu->dev)
702 return 1;
703
653 iommu->cap_ptr = h->cap_ptr; 704 iommu->cap_ptr = h->cap_ptr;
705 iommu->pci_seg = h->pci_seg;
654 iommu->mmio_phys = h->mmio_phys; 706 iommu->mmio_phys = h->mmio_phys;
655 iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); 707 iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
656 if (!iommu->mmio_base) 708 if (!iommu->mmio_base)
@@ -661,10 +713,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
661 if (!iommu->cmd_buf) 713 if (!iommu->cmd_buf)
662 return -ENOMEM; 714 return -ENOMEM;
663 715
716 iommu->evt_buf = alloc_event_buffer(iommu);
717 if (!iommu->evt_buf)
718 return -ENOMEM;
719
720 iommu->int_enabled = false;
721
664 init_iommu_from_pci(iommu); 722 init_iommu_from_pci(iommu);
665 init_iommu_from_acpi(iommu, h); 723 init_iommu_from_acpi(iommu, h);
666 init_iommu_devices(iommu); 724 init_iommu_devices(iommu);
667 725
726 pci_enable_device(iommu->dev);
727
668 return 0; 728 return 0;
669} 729}
670 730
@@ -706,6 +766,95 @@ static int __init init_iommu_all(struct acpi_table_header *table)
706 766
707/**************************************************************************** 767/****************************************************************************
708 * 768 *
769 * The following functions initialize the MSI interrupts for all IOMMUs
770 * in the system. Its a bit challenging because there could be multiple
771 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
772 * pci_dev.
773 *
774 ****************************************************************************/
775
776static int __init iommu_setup_msix(struct amd_iommu *iommu)
777{
778 struct amd_iommu *curr;
779 struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
780 int nvec = 0, i;
781
782 list_for_each_entry(curr, &amd_iommu_list, list) {
783 if (curr->dev == iommu->dev) {
784 entries[nvec].entry = curr->evt_msi_num;
785 entries[nvec].vector = 0;
786 curr->int_enabled = true;
787 nvec++;
788 }
789 }
790
791 if (pci_enable_msix(iommu->dev, entries, nvec)) {
792 pci_disable_msix(iommu->dev);
793 return 1;
794 }
795
796 for (i = 0; i < nvec; ++i) {
797 int r = request_irq(entries->vector, amd_iommu_int_handler,
798 IRQF_SAMPLE_RANDOM,
799 "AMD IOMMU",
800 NULL);
801 if (r)
802 goto out_free;
803 }
804
805 return 0;
806
807out_free:
808 for (i -= 1; i >= 0; --i)
809 free_irq(entries->vector, NULL);
810
811 pci_disable_msix(iommu->dev);
812
813 return 1;
814}
815
816static int __init iommu_setup_msi(struct amd_iommu *iommu)
817{
818 int r;
819 struct amd_iommu *curr;
820
821 list_for_each_entry(curr, &amd_iommu_list, list) {
822 if (curr->dev == iommu->dev)
823 curr->int_enabled = true;
824 }
825
826
827 if (pci_enable_msi(iommu->dev))
828 return 1;
829
830 r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
831 IRQF_SAMPLE_RANDOM,
832 "AMD IOMMU",
833 NULL);
834
835 if (r) {
836 pci_disable_msi(iommu->dev);
837 return 1;
838 }
839
840 return 0;
841}
842
843static int __init iommu_init_msi(struct amd_iommu *iommu)
844{
845 if (iommu->int_enabled)
846 return 0;
847
848 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
849 return iommu_setup_msix(iommu);
850 else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
851 return iommu_setup_msi(iommu);
852
853 return 1;
854}
855
856/****************************************************************************
857 *
709 * The next functions belong to the third pass of parsing the ACPI 858 * The next functions belong to the third pass of parsing the ACPI
710 * table. In this last pass the memory mapping requirements are 859 * table. In this last pass the memory mapping requirements are
711 * gathered (like exclusion and unity mapping reanges). 860 * gathered (like exclusion and unity mapping reanges).
@@ -811,7 +960,6 @@ static void init_device_table(void)
811 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { 960 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
812 set_dev_entry_bit(devid, DEV_ENTRY_VALID); 961 set_dev_entry_bit(devid, DEV_ENTRY_VALID);
813 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); 962 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
814 set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
815 } 963 }
816} 964}
817 965
@@ -825,6 +973,8 @@ static void __init enable_iommus(void)
825 973
826 list_for_each_entry(iommu, &amd_iommu_list, list) { 974 list_for_each_entry(iommu, &amd_iommu_list, list) {
827 iommu_set_exclusion_range(iommu); 975 iommu_set_exclusion_range(iommu);
976 iommu_init_msi(iommu);
977 iommu_enable_event_logging(iommu);
828 iommu_enable(iommu); 978 iommu_enable(iommu);
829 } 979 }
830} 980}
@@ -995,11 +1145,17 @@ int __init amd_iommu_init(void)
995 else 1145 else
996 printk("disabled\n"); 1146 printk("disabled\n");
997 1147
1148 if (amd_iommu_unmap_flush)
1149 printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
1150 else
1151 printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
1152
998out: 1153out:
999 return ret; 1154 return ret;
1000 1155
1001free: 1156free:
1002 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); 1157 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1158 get_order(MAX_DOMAIN_ID/8));
1003 1159
1004 free_pages((unsigned long)amd_iommu_pd_table, 1160 free_pages((unsigned long)amd_iommu_pd_table,
1005 get_order(rlookup_table_size)); 1161 get_order(rlookup_table_size));
@@ -1057,8 +1213,10 @@ void __init amd_iommu_detect(void)
1057static int __init parse_amd_iommu_options(char *str) 1213static int __init parse_amd_iommu_options(char *str)
1058{ 1214{
1059 for (; *str; ++str) { 1215 for (; *str; ++str) {
1060 if (strcmp(str, "isolate") == 0) 1216 if (strncmp(str, "isolate", 7) == 0)
1061 amd_iommu_isolate = 1; 1217 amd_iommu_isolate = 1;
1218 if (strncmp(str, "fullflush", 11) == 0)
1219 amd_iommu_unmap_flush = true;
1062 } 1220 }
1063 1221
1064 return 1; 1222 return 1;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 4353cf5e6fac..24bb5faf5efa 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func)
95 95
96} 96}
97 97
98#ifdef CONFIG_DMAR
99static void __init intel_g33_dmar(int num, int slot, int func)
100{
101 struct acpi_table_header *dmar_tbl;
102 acpi_status status;
103
104 status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
105 if (ACPI_SUCCESS(status)) {
106 printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
107 dmar_disabled = 1;
108 }
109}
110#endif
111
98#define QFLAG_APPLY_ONCE 0x1 112#define QFLAG_APPLY_ONCE 0x1
99#define QFLAG_APPLIED 0x2 113#define QFLAG_APPLIED 0x2
100#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) 114#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = {
114 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, 128 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
115 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, 129 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
116 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, 130 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
131#ifdef CONFIG_DMAR
132 { PCI_VENDOR_ID_INTEL, 0x29c0,
133 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
134#endif
117 {} 135 {}
118}; 136};
119 137
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c
index 7377ccb21335..304d8bad6559 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/k8.c
@@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges);
16static u32 *flush_words; 16static u32 *flush_words;
17 17
18struct pci_device_id k8_nb_ids[] = { 18struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, 19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, 20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
21 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
21 {} 22 {}
22}; 23};
23EXPORT_SYMBOL(k8_nb_ids); 24EXPORT_SYMBOL(k8_nb_ids);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index dcdac6c826e9..080d1d27f37a 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
261 badbit, tbl, start_addr, npages); 261 badbit, tbl, start_addr, npages);
262 } 262 }
263 263
264 set_bit_string(tbl->it_map, index, npages); 264 iommu_area_reserve(tbl->it_map, index, npages);
265 265
266 spin_unlock_irqrestore(&tbl->it_lock, flags); 266 spin_unlock_irqrestore(&tbl->it_lock, flags);
267} 267}
@@ -491,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
491 npages = size >> PAGE_SHIFT; 491 npages = size >> PAGE_SHIFT;
492 order = get_order(size); 492 order = get_order(size);
493 493
494 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
495
494 /* alloc enough pages (and possibly more) */ 496 /* alloc enough pages (and possibly more) */
495 ret = (void *)__get_free_pages(flag, order); 497 ret = (void *)__get_free_pages(flag, order);
496 if (!ret) 498 if (!ret)
@@ -510,8 +512,22 @@ error:
510 return ret; 512 return ret;
511} 513}
512 514
515static void calgary_free_coherent(struct device *dev, size_t size,
516 void *vaddr, dma_addr_t dma_handle)
517{
518 unsigned int npages;
519 struct iommu_table *tbl = find_iommu_table(dev);
520
521 size = PAGE_ALIGN(size);
522 npages = size >> PAGE_SHIFT;
523
524 iommu_free(tbl, dma_handle, npages);
525 free_pages((unsigned long)vaddr, get_order(size));
526}
527
513static struct dma_mapping_ops calgary_dma_ops = { 528static struct dma_mapping_ops calgary_dma_ops = {
514 .alloc_coherent = calgary_alloc_coherent, 529 .alloc_coherent = calgary_alloc_coherent,
530 .free_coherent = calgary_free_coherent,
515 .map_single = calgary_map_single, 531 .map_single = calgary_map_single,
516 .unmap_single = calgary_unmap_single, 532 .unmap_single = calgary_unmap_single,
517 .map_sg = calgary_map_sg, 533 .map_sg = calgary_map_sg,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f704cb51ff82..0a3824e837b4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address);
41/* Dummy device used for NULL arguments (normally ISA). Better would 41/* Dummy device used for NULL arguments (normally ISA). Better would
42 be probably a smaller DMA mask, but this is bug-to-bug compatible 42 be probably a smaller DMA mask, but this is bug-to-bug compatible
43 to older i386. */ 43 to older i386. */
44struct device fallback_dev = { 44struct device x86_dma_fallback_dev = {
45 .bus_id = "fallback device", 45 .bus_id = "fallback device",
46 .coherent_dma_mask = DMA_32BIT_MASK, 46 .coherent_dma_mask = DMA_32BIT_MASK,
47 .dma_mask = &fallback_dev.coherent_dma_mask, 47 .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
48}; 48};
49EXPORT_SYMBOL(x86_dma_fallback_dev);
49 50
50int dma_set_mask(struct device *dev, u64 mask) 51int dma_set_mask(struct device *dev, u64 mask)
51{ 52{
@@ -133,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
133EXPORT_SYMBOL(iommu_num_pages); 134EXPORT_SYMBOL(iommu_num_pages);
134#endif 135#endif
135 136
137void *dma_generic_alloc_coherent(struct device *dev, size_t size,
138 dma_addr_t *dma_addr, gfp_t flag)
139{
140 unsigned long dma_mask;
141 struct page *page;
142 dma_addr_t addr;
143
144 dma_mask = dma_alloc_coherent_mask(dev, flag);
145
146 flag |= __GFP_ZERO;
147again:
148 page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
149 if (!page)
150 return NULL;
151
152 addr = page_to_phys(page);
153 if (!is_buffer_dma_capable(dma_mask, addr, size)) {
154 __free_pages(page, get_order(size));
155
156 if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) {
157 flag = (flag & ~GFP_DMA32) | GFP_DMA;
158 goto again;
159 }
160
161 return NULL;
162 }
163
164 *dma_addr = addr;
165 return page_address(page);
166}
167
136/* 168/*
137 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter 169 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
138 * documentation. 170 * documentation.
@@ -241,147 +273,6 @@ int dma_supported(struct device *dev, u64 mask)
241} 273}
242EXPORT_SYMBOL(dma_supported); 274EXPORT_SYMBOL(dma_supported);
243 275
244/* Allocate DMA memory on node near device */
245static noinline struct page *
246dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
247{
248 int node;
249
250 node = dev_to_node(dev);
251
252 return alloc_pages_node(node, gfp, order);
253}
254
255/*
256 * Allocate memory for a coherent mapping.
257 */
258void *
259dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
260 gfp_t gfp)
261{
262 struct dma_mapping_ops *ops = get_dma_ops(dev);
263 void *memory = NULL;
264 struct page *page;
265 unsigned long dma_mask = 0;
266 dma_addr_t bus;
267 int noretry = 0;
268
269 /* ignore region specifiers */
270 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
271
272 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
273 return memory;
274
275 if (!dev) {
276 dev = &fallback_dev;
277 gfp |= GFP_DMA;
278 }
279 dma_mask = dev->coherent_dma_mask;
280 if (dma_mask == 0)
281 dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
282
283 /* Device not DMA able */
284 if (dev->dma_mask == NULL)
285 return NULL;
286
287 /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
288 if (gfp & __GFP_DMA)
289 noretry = 1;
290
291#ifdef CONFIG_X86_64
292 /* Why <=? Even when the mask is smaller than 4GB it is often
293 larger than 16MB and in this case we have a chance of
294 finding fitting memory in the next higher zone first. If
295 not retry with true GFP_DMA. -AK */
296 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
297 gfp |= GFP_DMA32;
298 if (dma_mask < DMA_32BIT_MASK)
299 noretry = 1;
300 }
301#endif
302
303 again:
304 page = dma_alloc_pages(dev,
305 noretry ? gfp | __GFP_NORETRY : gfp, get_order(size));
306 if (page == NULL)
307 return NULL;
308
309 {
310 int high, mmu;
311 bus = page_to_phys(page);
312 memory = page_address(page);
313 high = (bus + size) >= dma_mask;
314 mmu = high;
315 if (force_iommu && !(gfp & GFP_DMA))
316 mmu = 1;
317 else if (high) {
318 free_pages((unsigned long)memory,
319 get_order(size));
320
321 /* Don't use the 16MB ZONE_DMA unless absolutely
322 needed. It's better to use remapping first. */
323 if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
324 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
325 goto again;
326 }
327
328 /* Let low level make its own zone decisions */
329 gfp &= ~(GFP_DMA32|GFP_DMA);
330
331 if (ops->alloc_coherent)
332 return ops->alloc_coherent(dev, size,
333 dma_handle, gfp);
334 return NULL;
335 }
336
337 memset(memory, 0, size);
338 if (!mmu) {
339 *dma_handle = bus;
340 return memory;
341 }
342 }
343
344 if (ops->alloc_coherent) {
345 free_pages((unsigned long)memory, get_order(size));
346 gfp &= ~(GFP_DMA|GFP_DMA32);
347 return ops->alloc_coherent(dev, size, dma_handle, gfp);
348 }
349
350 if (ops->map_simple) {
351 *dma_handle = ops->map_simple(dev, virt_to_phys(memory),
352 size,
353 PCI_DMA_BIDIRECTIONAL);
354 if (*dma_handle != bad_dma_address)
355 return memory;
356 }
357
358 if (panic_on_overflow)
359 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
360 (unsigned long)size);
361 free_pages((unsigned long)memory, get_order(size));
362 return NULL;
363}
364EXPORT_SYMBOL(dma_alloc_coherent);
365
366/*
367 * Unmap coherent memory.
368 * The caller must ensure that the device has finished accessing the mapping.
369 */
370void dma_free_coherent(struct device *dev, size_t size,
371 void *vaddr, dma_addr_t bus)
372{
373 struct dma_mapping_ops *ops = get_dma_ops(dev);
374
375 int order = get_order(size);
376 WARN_ON(irqs_disabled()); /* for portability */
377 if (dma_release_from_coherent(dev, order, vaddr))
378 return;
379 if (ops->unmap_single)
380 ops->unmap_single(dev, bus, size, 0);
381 free_pages((unsigned long)vaddr, order);
382}
383EXPORT_SYMBOL(dma_free_coherent);
384
385static int __init pci_iommu_init(void) 276static int __init pci_iommu_init(void)
386{ 277{
387 calgary_iommu_init(); 278 calgary_iommu_init();
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 1a895a582534..145f1c83369f 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -27,8 +27,8 @@
27#include <linux/scatterlist.h> 27#include <linux/scatterlist.h>
28#include <linux/iommu-helper.h> 28#include <linux/iommu-helper.h>
29#include <linux/sysdev.h> 29#include <linux/sysdev.h>
30#include <linux/io.h>
30#include <asm/atomic.h> 31#include <asm/atomic.h>
31#include <asm/io.h>
32#include <asm/mtrr.h> 32#include <asm/mtrr.h>
33#include <asm/pgtable.h> 33#include <asm/pgtable.h>
34#include <asm/proto.h> 34#include <asm/proto.h>
@@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved;
80AGPEXTERN __u32 *agp_gatt_table; 80AGPEXTERN __u32 *agp_gatt_table;
81 81
82static unsigned long next_bit; /* protected by iommu_bitmap_lock */ 82static unsigned long next_bit; /* protected by iommu_bitmap_lock */
83static int need_flush; /* global flush state. set for each gart wrap */ 83static bool need_flush; /* global flush state. set for each gart wrap */
84 84
85static unsigned long alloc_iommu(struct device *dev, int size, 85static unsigned long alloc_iommu(struct device *dev, int size,
86 unsigned long align_mask) 86 unsigned long align_mask)
@@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size,
98 offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, 98 offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
99 size, base_index, boundary_size, align_mask); 99 size, base_index, boundary_size, align_mask);
100 if (offset == -1) { 100 if (offset == -1) {
101 need_flush = 1; 101 need_flush = true;
102 offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, 102 offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
103 size, base_index, boundary_size, 103 size, base_index, boundary_size,
104 align_mask); 104 align_mask);
@@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size,
107 next_bit = offset+size; 107 next_bit = offset+size;
108 if (next_bit >= iommu_pages) { 108 if (next_bit >= iommu_pages) {
109 next_bit = 0; 109 next_bit = 0;
110 need_flush = 1; 110 need_flush = true;
111 } 111 }
112 } 112 }
113 if (iommu_fullflush) 113 if (iommu_fullflush)
114 need_flush = 1; 114 need_flush = true;
115 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 115 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
116 116
117 return offset; 117 return offset;
@@ -136,7 +136,7 @@ static void flush_gart(void)
136 spin_lock_irqsave(&iommu_bitmap_lock, flags); 136 spin_lock_irqsave(&iommu_bitmap_lock, flags);
137 if (need_flush) { 137 if (need_flush) {
138 k8_flush_garts(); 138 k8_flush_garts();
139 need_flush = 0; 139 need_flush = false;
140 } 140 }
141 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 141 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
142} 142}
@@ -175,7 +175,8 @@ static void dump_leak(void)
175 iommu_leak_pages); 175 iommu_leak_pages);
176 for (i = 0; i < iommu_leak_pages; i += 2) { 176 for (i = 0; i < iommu_leak_pages; i += 2) {
177 printk(KERN_DEBUG "%lu: ", iommu_pages-i); 177 printk(KERN_DEBUG "%lu: ", iommu_pages-i);
178 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); 178 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
179 0);
179 printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); 180 printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
180 } 181 }
181 printk(KERN_DEBUG "\n"); 182 printk(KERN_DEBUG "\n");
@@ -214,24 +215,14 @@ static void iommu_full(struct device *dev, size_t size, int dir)
214static inline int 215static inline int
215need_iommu(struct device *dev, unsigned long addr, size_t size) 216need_iommu(struct device *dev, unsigned long addr, size_t size)
216{ 217{
217 u64 mask = *dev->dma_mask; 218 return force_iommu ||
218 int high = addr + size > mask; 219 !is_buffer_dma_capable(*dev->dma_mask, addr, size);
219 int mmu = high;
220
221 if (force_iommu)
222 mmu = 1;
223
224 return mmu;
225} 220}
226 221
227static inline int 222static inline int
228nonforced_iommu(struct device *dev, unsigned long addr, size_t size) 223nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
229{ 224{
230 u64 mask = *dev->dma_mask; 225 return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
231 int high = addr + size > mask;
232 int mmu = high;
233
234 return mmu;
235} 226}
236 227
237/* Map a single continuous physical area into the IOMMU. 228/* Map a single continuous physical area into the IOMMU.
@@ -261,20 +252,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
261 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); 252 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
262} 253}
263 254
264static dma_addr_t
265gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
266{
267 dma_addr_t map;
268 unsigned long align_mask;
269
270 align_mask = (1UL << get_order(size)) - 1;
271 map = dma_map_area(dev, paddr, size, dir, align_mask);
272
273 flush_gart();
274
275 return map;
276}
277
278/* Map a single area into the IOMMU */ 255/* Map a single area into the IOMMU */
279static dma_addr_t 256static dma_addr_t
280gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) 257gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
@@ -282,7 +259,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
282 unsigned long bus; 259 unsigned long bus;
283 260
284 if (!dev) 261 if (!dev)
285 dev = &fallback_dev; 262 dev = &x86_dma_fallback_dev;
286 263
287 if (!need_iommu(dev, paddr, size)) 264 if (!need_iommu(dev, paddr, size))
288 return paddr; 265 return paddr;
@@ -434,7 +411,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
434 return 0; 411 return 0;
435 412
436 if (!dev) 413 if (!dev)
437 dev = &fallback_dev; 414 dev = &x86_dma_fallback_dev;
438 415
439 out = 0; 416 out = 0;
440 start = 0; 417 start = 0;
@@ -506,6 +483,46 @@ error:
506 return 0; 483 return 0;
507} 484}
508 485
486/* allocate and map a coherent mapping */
487static void *
488gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
489 gfp_t flag)
490{
491 dma_addr_t paddr;
492 unsigned long align_mask;
493 struct page *page;
494
495 if (force_iommu && !(flag & GFP_DMA)) {
496 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
497 page = alloc_pages(flag | __GFP_ZERO, get_order(size));
498 if (!page)
499 return NULL;
500
501 align_mask = (1UL << get_order(size)) - 1;
502 paddr = dma_map_area(dev, page_to_phys(page), size,
503 DMA_BIDIRECTIONAL, align_mask);
504
505 flush_gart();
506 if (paddr != bad_dma_address) {
507 *dma_addr = paddr;
508 return page_address(page);
509 }
510 __free_pages(page, get_order(size));
511 } else
512 return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
513
514 return NULL;
515}
516
517/* free a coherent mapping */
518static void
519gart_free_coherent(struct device *dev, size_t size, void *vaddr,
520 dma_addr_t dma_addr)
521{
522 gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL);
523 free_pages((unsigned long)vaddr, get_order(size));
524}
525
509static int no_agp; 526static int no_agp;
510 527
511static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) 528static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -656,13 +673,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
656 info->aper_size = aper_size >> 20; 673 info->aper_size = aper_size >> 20;
657 674
658 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); 675 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
659 gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); 676 gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
677 get_order(gatt_size));
660 if (!gatt) 678 if (!gatt)
661 panic("Cannot allocate GATT table"); 679 panic("Cannot allocate GATT table");
662 if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) 680 if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
663 panic("Could not set GART PTEs to uncacheable pages"); 681 panic("Could not set GART PTEs to uncacheable pages");
664 682
665 memset(gatt, 0, gatt_size);
666 agp_gatt_table = gatt; 683 agp_gatt_table = gatt;
667 684
668 enable_gart_translations(); 685 enable_gart_translations();
@@ -671,7 +688,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
671 if (!error) 688 if (!error)
672 error = sysdev_register(&device_gart); 689 error = sysdev_register(&device_gart);
673 if (error) 690 if (error)
674 panic("Could not register gart_sysdev -- would corrupt data on next suspend"); 691 panic("Could not register gart_sysdev -- "
692 "would corrupt data on next suspend");
675 693
676 flush_gart(); 694 flush_gart();
677 695
@@ -687,20 +705,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
687 return -1; 705 return -1;
688} 706}
689 707
690extern int agp_amd64_init(void);
691
692static struct dma_mapping_ops gart_dma_ops = { 708static struct dma_mapping_ops gart_dma_ops = {
693 .map_single = gart_map_single, 709 .map_single = gart_map_single,
694 .map_simple = gart_map_simple,
695 .unmap_single = gart_unmap_single, 710 .unmap_single = gart_unmap_single,
696 .sync_single_for_cpu = NULL,
697 .sync_single_for_device = NULL,
698 .sync_single_range_for_cpu = NULL,
699 .sync_single_range_for_device = NULL,
700 .sync_sg_for_cpu = NULL,
701 .sync_sg_for_device = NULL,
702 .map_sg = gart_map_sg, 711 .map_sg = gart_map_sg,
703 .unmap_sg = gart_unmap_sg, 712 .unmap_sg = gart_unmap_sg,
713 .alloc_coherent = gart_alloc_coherent,
714 .free_coherent = gart_free_coherent,
704}; 715};
705 716
706void gart_iommu_shutdown(void) 717void gart_iommu_shutdown(void)
@@ -760,8 +771,8 @@ void __init gart_iommu_init(void)
760 (no_agp && init_k8_gatt(&info) < 0)) { 771 (no_agp && init_k8_gatt(&info) < 0)) {
761 if (max_pfn > MAX_DMA32_PFN) { 772 if (max_pfn > MAX_DMA32_PFN) {
762 printk(KERN_WARNING "More than 4GB of memory " 773 printk(KERN_WARNING "More than 4GB of memory "
763 "but GART IOMMU not available.\n" 774 "but GART IOMMU not available.\n");
764 KERN_WARNING "falling back to iommu=soft.\n"); 775 printk(KERN_WARNING "falling back to iommu=soft.\n");
765 } 776 }
766 return; 777 return;
767 } 778 }
@@ -779,19 +790,16 @@ void __init gart_iommu_init(void)
779 iommu_size = check_iommu_size(info.aper_base, aper_size); 790 iommu_size = check_iommu_size(info.aper_base, aper_size);
780 iommu_pages = iommu_size >> PAGE_SHIFT; 791 iommu_pages = iommu_size >> PAGE_SHIFT;
781 792
782 iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, 793 iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
783 get_order(iommu_pages/8)); 794 get_order(iommu_pages/8));
784 if (!iommu_gart_bitmap) 795 if (!iommu_gart_bitmap)
785 panic("Cannot allocate iommu bitmap\n"); 796 panic("Cannot allocate iommu bitmap\n");
786 memset(iommu_gart_bitmap, 0, iommu_pages/8);
787 797
788#ifdef CONFIG_IOMMU_LEAK 798#ifdef CONFIG_IOMMU_LEAK
789 if (leak_trace) { 799 if (leak_trace) {
790 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, 800 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
791 get_order(iommu_pages*sizeof(void *))); 801 get_order(iommu_pages*sizeof(void *)));
792 if (iommu_leak_tab) 802 if (!iommu_leak_tab)
793 memset(iommu_leak_tab, 0, iommu_pages * 8);
794 else
795 printk(KERN_DEBUG 803 printk(KERN_DEBUG
796 "PCI-DMA: Cannot allocate leak trace area\n"); 804 "PCI-DMA: Cannot allocate leak trace area\n");
797 } 805 }
@@ -801,7 +809,7 @@ void __init gart_iommu_init(void)
801 * Out of IOMMU space handling. 809 * Out of IOMMU space handling.
802 * Reserve some invalid pages at the beginning of the GART. 810 * Reserve some invalid pages at the beginning of the GART.
803 */ 811 */
804 set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); 812 iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
805 813
806 agp_memory_reserved = iommu_size; 814 agp_memory_reserved = iommu_size;
807 printk(KERN_INFO 815 printk(KERN_INFO
@@ -859,7 +867,8 @@ void __init gart_parse_options(char *p)
859 if (!strncmp(p, "leak", 4)) { 867 if (!strncmp(p, "leak", 4)) {
860 leak_trace = 1; 868 leak_trace = 1;
861 p += 4; 869 p += 4;
862 if (*p == '=') ++p; 870 if (*p == '=')
871 ++p;
863 if (isdigit(*p) && get_option(&p, &arg)) 872 if (isdigit(*p) && get_option(&p, &arg))
864 iommu_leak_pages = arg; 873 iommu_leak_pages = arg;
865 } 874 }
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 3f91f71cdc3e..c70ab5a5d4c8 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -14,7 +14,7 @@
14static int 14static int
15check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 15check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
16{ 16{
17 if (hwdev && bus + size > *hwdev->dma_mask) { 17 if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
18 if (*hwdev->dma_mask >= DMA_32BIT_MASK) 18 if (*hwdev->dma_mask >= DMA_32BIT_MASK)
19 printk(KERN_ERR 19 printk(KERN_ERR
20 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", 20 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
@@ -72,7 +72,15 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
72 return nents; 72 return nents;
73} 73}
74 74
75static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
76 dma_addr_t dma_addr)
77{
78 free_pages((unsigned long)vaddr, get_order(size));
79}
80
75struct dma_mapping_ops nommu_dma_ops = { 81struct dma_mapping_ops nommu_dma_ops = {
82 .alloc_coherent = dma_generic_alloc_coherent,
83 .free_coherent = nommu_free_coherent,
76 .map_single = nommu_map_single, 84 .map_single = nommu_map_single,
77 .map_sg = nommu_map_sg, 85 .map_sg = nommu_map_sg,
78 .is_phys = 1, 86 .is_phys = 1,
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c3edcdc08e72..6c4c1c3c50ee 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -80,7 +80,7 @@ static long list_size;
80 80
81static void domain_remove_dev_info(struct dmar_domain *domain); 81static void domain_remove_dev_info(struct dmar_domain *domain);
82 82
83static int dmar_disabled; 83int dmar_disabled;
84static int __initdata dmar_map_gfx = 1; 84static int __initdata dmar_map_gfx = 1;
85static int dmar_forcedac; 85static int dmar_forcedac;
86static int intel_iommu_strict; 86static int intel_iommu_strict;
diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h
index 783f43e58052..041d0db7da27 100644
--- a/include/asm-x86/amd_iommu.h
+++ b/include/asm-x86/amd_iommu.h
@@ -20,10 +20,13 @@
20#ifndef ASM_X86__AMD_IOMMU_H 20#ifndef ASM_X86__AMD_IOMMU_H
21#define ASM_X86__AMD_IOMMU_H 21#define ASM_X86__AMD_IOMMU_H
22 22
23#include <linux/irqreturn.h>
24
23#ifdef CONFIG_AMD_IOMMU 25#ifdef CONFIG_AMD_IOMMU
24extern int amd_iommu_init(void); 26extern int amd_iommu_init(void);
25extern int amd_iommu_init_dma_ops(void); 27extern int amd_iommu_init_dma_ops(void);
26extern void amd_iommu_detect(void); 28extern void amd_iommu_detect(void);
29extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
27#else 30#else
28static inline int amd_iommu_init(void) { return -ENODEV; } 31static inline int amd_iommu_init(void) { return -ENODEV; }
29static inline void amd_iommu_detect(void) { } 32static inline void amd_iommu_detect(void) { }
diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index 1ffa4e53c989..b3085869a17b 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -37,6 +37,7 @@
37/* Capability offsets used by the driver */ 37/* Capability offsets used by the driver */
38#define MMIO_CAP_HDR_OFFSET 0x00 38#define MMIO_CAP_HDR_OFFSET 0x00
39#define MMIO_RANGE_OFFSET 0x0c 39#define MMIO_RANGE_OFFSET 0x0c
40#define MMIO_MISC_OFFSET 0x10
40 41
41/* Masks, shifts and macros to parse the device range capability */ 42/* Masks, shifts and macros to parse the device range capability */
42#define MMIO_RANGE_LD_MASK 0xff000000 43#define MMIO_RANGE_LD_MASK 0xff000000
@@ -48,6 +49,7 @@
48#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) 49#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT)
49#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) 50#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT)
50#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) 51#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
52#define MMIO_MSI_NUM(x) ((x) & 0x1f)
51 53
52/* Flag masks for the AMD IOMMU exclusion range */ 54/* Flag masks for the AMD IOMMU exclusion range */
53#define MMIO_EXCL_ENABLE_MASK 0x01ULL 55#define MMIO_EXCL_ENABLE_MASK 0x01ULL
@@ -69,6 +71,25 @@
69/* MMIO status bits */ 71/* MMIO status bits */
70#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 72#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
71 73
74/* event logging constants */
75#define EVENT_ENTRY_SIZE 0x10
76#define EVENT_TYPE_SHIFT 28
77#define EVENT_TYPE_MASK 0xf
78#define EVENT_TYPE_ILL_DEV 0x1
79#define EVENT_TYPE_IO_FAULT 0x2
80#define EVENT_TYPE_DEV_TAB_ERR 0x3
81#define EVENT_TYPE_PAGE_TAB_ERR 0x4
82#define EVENT_TYPE_ILL_CMD 0x5
83#define EVENT_TYPE_CMD_HARD_ERR 0x6
84#define EVENT_TYPE_IOTLB_INV_TO 0x7
85#define EVENT_TYPE_INV_DEV_REQ 0x8
86#define EVENT_DEVID_MASK 0xffff
87#define EVENT_DEVID_SHIFT 0
88#define EVENT_DOMID_MASK 0xffff
89#define EVENT_DOMID_SHIFT 0
90#define EVENT_FLAGS_MASK 0xfff
91#define EVENT_FLAGS_SHIFT 0x10
92
72/* feature control bits */ 93/* feature control bits */
73#define CONTROL_IOMMU_EN 0x00ULL 94#define CONTROL_IOMMU_EN 0x00ULL
74#define CONTROL_HT_TUN_EN 0x01ULL 95#define CONTROL_HT_TUN_EN 0x01ULL
@@ -109,6 +130,8 @@
109#define DEV_ENTRY_NMI_PASS 0xba 130#define DEV_ENTRY_NMI_PASS 0xba
110#define DEV_ENTRY_LINT0_PASS 0xbe 131#define DEV_ENTRY_LINT0_PASS 0xbe
111#define DEV_ENTRY_LINT1_PASS 0xbf 132#define DEV_ENTRY_LINT1_PASS 0xbf
133#define DEV_ENTRY_MODE_MASK 0x07
134#define DEV_ENTRY_MODE_SHIFT 0x09
112 135
113/* constants to configure the command buffer */ 136/* constants to configure the command buffer */
114#define CMD_BUFFER_SIZE 8192 137#define CMD_BUFFER_SIZE 8192
@@ -116,6 +139,10 @@
116#define MMIO_CMD_SIZE_SHIFT 56 139#define MMIO_CMD_SIZE_SHIFT 56
117#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) 140#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
118 141
142/* constants for event buffer handling */
143#define EVT_BUFFER_SIZE 8192 /* 512 entries */
144#define EVT_LEN_MASK (0x9ULL << 56)
145
119#define PAGE_MODE_1_LEVEL 0x01 146#define PAGE_MODE_1_LEVEL 0x01
120#define PAGE_MODE_2_LEVEL 0x02 147#define PAGE_MODE_2_LEVEL 0x02
121#define PAGE_MODE_3_LEVEL 0x03 148#define PAGE_MODE_3_LEVEL 0x03
@@ -134,6 +161,7 @@
134#define IOMMU_MAP_SIZE_L3 (1ULL << 39) 161#define IOMMU_MAP_SIZE_L3 (1ULL << 39)
135 162
136#define IOMMU_PTE_P (1ULL << 0) 163#define IOMMU_PTE_P (1ULL << 0)
164#define IOMMU_PTE_TV (1ULL << 1)
137#define IOMMU_PTE_U (1ULL << 59) 165#define IOMMU_PTE_U (1ULL << 59)
138#define IOMMU_PTE_FC (1ULL << 60) 166#define IOMMU_PTE_FC (1ULL << 60)
139#define IOMMU_PTE_IR (1ULL << 61) 167#define IOMMU_PTE_IR (1ULL << 61)
@@ -159,6 +187,9 @@
159 187
160#define MAX_DOMAIN_ID 65536 188#define MAX_DOMAIN_ID 65536
161 189
190/* FIXME: move this macro to <linux/pci.h> */
191#define PCI_BUS(x) (((x) >> 8) & 0xff)
192
162/* 193/*
163 * This structure contains generic data for IOMMU protection domains 194 * This structure contains generic data for IOMMU protection domains
164 * independent of their use. 195 * independent of their use.
@@ -196,6 +227,15 @@ struct dma_ops_domain {
196 * just calculate its address in constant time. 227 * just calculate its address in constant time.
197 */ 228 */
198 u64 **pte_pages; 229 u64 **pte_pages;
230
231 /* This will be set to true when TLB needs to be flushed */
232 bool need_flush;
233
234 /*
235 * if this is a preallocated domain, keep the device for which it was
236 * preallocated in this variable
237 */
238 u16 target_dev;
199}; 239};
200 240
201/* 241/*
@@ -208,8 +248,9 @@ struct amd_iommu {
208 /* locks the accesses to the hardware */ 248 /* locks the accesses to the hardware */
209 spinlock_t lock; 249 spinlock_t lock;
210 250
211 /* device id of this IOMMU */ 251 /* Pointer to PCI device of this IOMMU */
212 u16 devid; 252 struct pci_dev *dev;
253
213 /* 254 /*
214 * Capability pointer. There could be more than one IOMMU per PCI 255 * Capability pointer. There could be more than one IOMMU per PCI
215 * device function if there are more than one AMD IOMMU capability 256 * device function if there are more than one AMD IOMMU capability
@@ -225,6 +266,9 @@ struct amd_iommu {
225 /* capabilities of that IOMMU read from ACPI */ 266 /* capabilities of that IOMMU read from ACPI */
226 u32 cap; 267 u32 cap;
227 268
269 /* pci domain of this IOMMU */
270 u16 pci_seg;
271
228 /* first device this IOMMU handles. read from PCI */ 272 /* first device this IOMMU handles. read from PCI */
229 u16 first_device; 273 u16 first_device;
230 /* last device this IOMMU handles. read from PCI */ 274 /* last device this IOMMU handles. read from PCI */
@@ -240,9 +284,19 @@ struct amd_iommu {
240 /* size of command buffer */ 284 /* size of command buffer */
241 u32 cmd_buf_size; 285 u32 cmd_buf_size;
242 286
287 /* event buffer virtual address */
288 u8 *evt_buf;
289 /* size of event buffer */
290 u32 evt_buf_size;
291 /* MSI number for event interrupt */
292 u16 evt_msi_num;
293
243 /* if one, we need to send a completion wait command */ 294 /* if one, we need to send a completion wait command */
244 int need_sync; 295 int need_sync;
245 296
297 /* true if interrupts for this IOMMU are already enabled */
298 bool int_enabled;
299
246 /* default dma_ops domain for that IOMMU */ 300 /* default dma_ops domain for that IOMMU */
247 struct dma_ops_domain *default_dom; 301 struct dma_ops_domain *default_dom;
248}; 302};
@@ -322,6 +376,12 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
322/* will be 1 if device isolation is enabled */ 376/* will be 1 if device isolation is enabled */
323extern int amd_iommu_isolate; 377extern int amd_iommu_isolate;
324 378
379/*
380 * If true, the addresses will be flushed on unmap time, not when
381 * they are reused
382 */
383extern bool amd_iommu_unmap_flush;
384
325/* takes a PCI device id and prints it out in a readable form */ 385/* takes a PCI device id and prints it out in a readable form */
326static inline void print_devid(u16 devid, int nl) 386static inline void print_devid(u16 devid, int nl)
327{ 387{
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 61989b93b475..451a74762bd4 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -424,16 +424,6 @@ static inline int fls(int x)
424 424
425#undef ADDR 425#undef ADDR
426 426
427static inline void set_bit_string(unsigned long *bitmap,
428 unsigned long i, int len)
429{
430 unsigned long end = i + len;
431 while (i < end) {
432 __set_bit(i, bitmap);
433 i++;
434 }
435}
436
437#ifdef __KERNEL__ 427#ifdef __KERNEL__
438 428
439#include <asm-generic/bitops/sched.h> 429#include <asm-generic/bitops/sched.h>
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index 5d200e78bd81..219c33d6361c 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -9,12 +9,12 @@
9#include <linux/scatterlist.h> 9#include <linux/scatterlist.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/swiotlb.h> 11#include <asm/swiotlb.h>
12#include <asm-generic/dma-coherent.h>
12 13
13extern dma_addr_t bad_dma_address; 14extern dma_addr_t bad_dma_address;
14extern int iommu_merge; 15extern int iommu_merge;
15extern struct device fallback_dev; 16extern struct device x86_dma_fallback_dev;
16extern int panic_on_overflow; 17extern int panic_on_overflow;
17extern int force_iommu;
18 18
19struct dma_mapping_ops { 19struct dma_mapping_ops {
20 int (*mapping_error)(struct device *dev, 20 int (*mapping_error)(struct device *dev,
@@ -25,9 +25,6 @@ struct dma_mapping_ops {
25 void *vaddr, dma_addr_t dma_handle); 25 void *vaddr, dma_addr_t dma_handle);
26 dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr, 26 dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr,
27 size_t size, int direction); 27 size_t size, int direction);
28 /* like map_single, but doesn't check the device mask */
29 dma_addr_t (*map_simple)(struct device *hwdev, phys_addr_t ptr,
30 size_t size, int direction);
31 void (*unmap_single)(struct device *dev, dma_addr_t addr, 28 void (*unmap_single)(struct device *dev, dma_addr_t addr,
32 size_t size, int direction); 29 size_t size, int direction);
33 void (*sync_single_for_cpu)(struct device *hwdev, 30 void (*sync_single_for_cpu)(struct device *hwdev,
@@ -68,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
68 return dma_ops; 65 return dma_ops;
69 else 66 else
70 return dev->archdata.dma_ops; 67 return dev->archdata.dma_ops;
71#endif 68#endif /* ASM_X86__DMA_MAPPING_H */
72} 69}
73 70
74/* Make sure we keep the same behaviour */ 71/* Make sure we keep the same behaviour */
@@ -87,17 +84,14 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
87 84
88#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 85#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
89#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) 86#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
90 87#define dma_is_consistent(d, h) (1)
91void *dma_alloc_coherent(struct device *dev, size_t size,
92 dma_addr_t *dma_handle, gfp_t flag);
93
94void dma_free_coherent(struct device *dev, size_t size,
95 void *vaddr, dma_addr_t dma_handle);
96
97 88
98extern int dma_supported(struct device *hwdev, u64 mask); 89extern int dma_supported(struct device *hwdev, u64 mask);
99extern int dma_set_mask(struct device *dev, u64 mask); 90extern int dma_set_mask(struct device *dev, u64 mask);
100 91
92extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
93 dma_addr_t *dma_addr, gfp_t flag);
94
101static inline dma_addr_t 95static inline dma_addr_t
102dma_map_single(struct device *hwdev, void *ptr, size_t size, 96dma_map_single(struct device *hwdev, void *ptr, size_t size,
103 int direction) 97 int direction)
@@ -247,7 +241,68 @@ static inline int dma_get_cache_alignment(void)
247 return boot_cpu_data.x86_clflush_size; 241 return boot_cpu_data.x86_clflush_size;
248} 242}
249 243
250#define dma_is_consistent(d, h) (1) 244static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
245 gfp_t gfp)
246{
247 unsigned long dma_mask = 0;
251 248
252#include <asm-generic/dma-coherent.h> 249 dma_mask = dev->coherent_dma_mask;
253#endif /* ASM_X86__DMA_MAPPING_H */ 250 if (!dma_mask)
251 dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
252
253 return dma_mask;
254}
255
256static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
257{
258#ifdef CONFIG_X86_64
259 unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
260
261 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA))
262 gfp |= GFP_DMA32;
263#endif
264 return gfp;
265}
266
267static inline void *
268dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
269 gfp_t gfp)
270{
271 struct dma_mapping_ops *ops = get_dma_ops(dev);
272 void *memory;
273
274 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
275
276 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
277 return memory;
278
279 if (!dev) {
280 dev = &x86_dma_fallback_dev;
281 gfp |= GFP_DMA;
282 }
283
284 if (!is_device_dma_capable(dev))
285 return NULL;
286
287 if (!ops->alloc_coherent)
288 return NULL;
289
290 return ops->alloc_coherent(dev, size, dma_handle,
291 dma_alloc_coherent_gfp_flags(dev, gfp));
292}
293
294static inline void dma_free_coherent(struct device *dev, size_t size,
295 void *vaddr, dma_addr_t bus)
296{
297 struct dma_mapping_ops *ops = get_dma_ops(dev);
298
299 WARN_ON(irqs_disabled()); /* for portability */
300
301 if (dma_release_from_coherent(dev, get_order(size), vaddr))
302 return;
303
304 if (ops->free_coherent)
305 ops->free_coherent(dev, size, vaddr, bus);
306}
307
308#endif
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index baa54faba892..605edb39ef9e 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -29,6 +29,8 @@ extern int fix_aperture;
29#define AMD64_GARTCACHECTL 0x9c 29#define AMD64_GARTCACHECTL 0x9c
30#define AMD64_GARTEN (1<<0) 30#define AMD64_GARTEN (1<<0)
31 31
32extern int agp_amd64_init(void);
33
32static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) 34static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
33{ 35{
34 u32 tmp, ctl; 36 u32 tmp, ctl;
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index e86f44148c66..546ad3110fea 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -6,6 +6,7 @@ extern void no_iommu_init(void);
6extern struct dma_mapping_ops nommu_dma_ops; 6extern struct dma_mapping_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 7extern int force_iommu, no_iommu;
8extern int iommu_detected; 8extern int iommu_detected;
9extern int dmar_disabled;
9 10
10extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); 11extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
11 12
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 952e0f857ac9..ba9114ec5d3a 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -48,6 +48,11 @@ static inline int is_device_dma_capable(struct device *dev)
48 return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; 48 return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
49} 49}
50 50
51static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size)
52{
53 return addr + size <= mask;
54}
55
51#ifdef CONFIG_HAS_DMA 56#ifdef CONFIG_HAS_DMA
52#include <asm/dma-mapping.h> 57#include <asm/dma-mapping.h>
53#else 58#else
@@ -58,6 +63,13 @@ static inline int is_device_dma_capable(struct device *dev)
58#define dma_sync_single dma_sync_single_for_cpu 63#define dma_sync_single dma_sync_single_for_cpu
59#define dma_sync_sg dma_sync_sg_for_cpu 64#define dma_sync_sg dma_sync_sg_for_cpu
60 65
66static inline u64 dma_get_mask(struct device *dev)
67{
68 if (dev && dev->dma_mask && *dev->dma_mask)
69 return *dev->dma_mask;
70 return DMA_32BIT_MASK;
71}
72
61extern u64 dma_get_required_mask(struct device *dev); 73extern u64 dma_get_required_mask(struct device *dev);
62 74
63static inline unsigned int dma_get_max_seg_size(struct device *dev) 75static inline unsigned int dma_get_max_seg_size(struct device *dev)
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index c975caf75385..a6d0586e2bf7 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -1,6 +1,20 @@
1#ifndef _LINUX_IOMMU_HELPER_H
2#define _LINUX_IOMMU_HELPER_H
3
4static inline unsigned long iommu_device_max_index(unsigned long size,
5 unsigned long offset,
6 u64 dma_mask)
7{
8 if (size + offset > dma_mask)
9 return dma_mask - offset + 1;
10 else
11 return size;
12}
13
1extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, 14extern int iommu_is_span_boundary(unsigned int index, unsigned int nr,
2 unsigned long shift, 15 unsigned long shift,
3 unsigned long boundary_size); 16 unsigned long boundary_size);
17extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len);
4extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, 18extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
5 unsigned long start, unsigned int nr, 19 unsigned long start, unsigned int nr,
6 unsigned long shift, 20 unsigned long shift,
@@ -8,3 +22,5 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
8 unsigned long align_mask); 22 unsigned long align_mask);
9extern void iommu_area_free(unsigned long *map, unsigned long start, 23extern void iommu_area_free(unsigned long *map, unsigned long start,
10 unsigned int nr); 24 unsigned int nr);
25
26#endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a65b082a888a..f63b5455801c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -497,6 +497,16 @@
497#define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 497#define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101
498#define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 498#define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102
499#define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 499#define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103
500#define PCI_DEVICE_ID_AMD_10H_NB_HT 0x1200
501#define PCI_DEVICE_ID_AMD_10H_NB_MAP 0x1201
502#define PCI_DEVICE_ID_AMD_10H_NB_DRAM 0x1202
503#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203
504#define PCI_DEVICE_ID_AMD_10H_NB_LINK 0x1204
505#define PCI_DEVICE_ID_AMD_11H_NB_HT 0x1300
506#define PCI_DEVICE_ID_AMD_11H_NB_MAP 0x1301
507#define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302
508#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
509#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
500#define PCI_DEVICE_ID_AMD_LANCE 0x2000 510#define PCI_DEVICE_ID_AMD_LANCE 0x2000
501#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 511#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
502#define PCI_DEVICE_ID_AMD_SCSI 0x2020 512#define PCI_DEVICE_ID_AMD_SCSI 0x2020
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index c1d4d5b4c61c..f013a0c2e111 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -124,6 +124,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
124 } 124 }
125 return (mem != NULL); 125 return (mem != NULL);
126} 126}
127EXPORT_SYMBOL(dma_alloc_from_coherent);
127 128
128/** 129/**
129 * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool 130 * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
@@ -151,3 +152,4 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
151 } 152 }
152 return 0; 153 return 0;
153} 154}
155EXPORT_SYMBOL(dma_release_from_coherent);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index a3b8d4c3f77a..5d90074dca75 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -30,8 +30,7 @@ again:
30 return index; 30 return index;
31} 31}
32 32
33static inline void set_bit_area(unsigned long *map, unsigned long i, 33void iommu_area_reserve(unsigned long *map, unsigned long i, int len)
34 int len)
35{ 34{
36 unsigned long end = i + len; 35 unsigned long end = i + len;
37 while (i < end) { 36 while (i < end) {
@@ -64,7 +63,7 @@ again:
64 start = index + 1; 63 start = index + 1;
65 goto again; 64 goto again;
66 } 65 }
67 set_bit_area(map, index, nr); 66 iommu_area_reserve(map, index, nr);
68 } 67 }
69 return index; 68 return index;
70} 69}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 8826fdf0f180..f8eebd489149 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -274,13 +274,14 @@ cleanup1:
274} 274}
275 275
276static int 276static int
277address_needs_mapping(struct device *hwdev, dma_addr_t addr) 277address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
278{ 278{
279 dma_addr_t mask = 0xffffffff; 279 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
280 /* If the device has a mask, use it, otherwise default to 32 bits */ 280}
281 if (hwdev && hwdev->dma_mask) 281
282 mask = *hwdev->dma_mask; 282static int is_swiotlb_buffer(char *addr)
283 return (addr & ~mask) != 0; 283{
284 return addr >= io_tlb_start && addr < io_tlb_end;
284} 285}
285 286
286/* 287/*
@@ -467,15 +468,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
467 void *ret; 468 void *ret;
468 int order = get_order(size); 469 int order = get_order(size);
469 470
470 /*
471 * XXX fix me: the DMA API should pass us an explicit DMA mask
472 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
473 * bit range instead of a 16MB one).
474 */
475 flags |= GFP_DMA;
476
477 ret = (void *)__get_free_pages(flags, order); 471 ret = (void *)__get_free_pages(flags, order);
478 if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) { 472 if (ret && address_needs_mapping(hwdev, virt_to_bus(ret), size)) {
479 /* 473 /*
480 * The allocated memory isn't reachable by the device. 474 * The allocated memory isn't reachable by the device.
481 * Fall back on swiotlb_map_single(). 475 * Fall back on swiotlb_map_single().
@@ -490,19 +484,16 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
490 * swiotlb_map_single(), which will grab memory from 484 * swiotlb_map_single(), which will grab memory from
491 * the lowest available address range. 485 * the lowest available address range.
492 */ 486 */
493 dma_addr_t handle; 487 ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
494 handle = swiotlb_map_single(hwdev, NULL, size, DMA_FROM_DEVICE); 488 if (!ret)
495 if (swiotlb_dma_mapping_error(hwdev, handle))
496 return NULL; 489 return NULL;
497
498 ret = bus_to_virt(handle);
499 } 490 }
500 491
501 memset(ret, 0, size); 492 memset(ret, 0, size);
502 dev_addr = virt_to_bus(ret); 493 dev_addr = virt_to_bus(ret);
503 494
504 /* Confirm address can be DMA'd by device */ 495 /* Confirm address can be DMA'd by device */
505 if (address_needs_mapping(hwdev, dev_addr)) { 496 if (address_needs_mapping(hwdev, dev_addr, size)) {
506 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", 497 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
507 (unsigned long long)*hwdev->dma_mask, 498 (unsigned long long)*hwdev->dma_mask,
508 (unsigned long long)dev_addr); 499 (unsigned long long)dev_addr);
@@ -518,12 +509,11 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
518 dma_addr_t dma_handle) 509 dma_addr_t dma_handle)
519{ 510{
520 WARN_ON(irqs_disabled()); 511 WARN_ON(irqs_disabled());
521 if (!(vaddr >= (void *)io_tlb_start 512 if (!is_swiotlb_buffer(vaddr))
522 && vaddr < (void *)io_tlb_end))
523 free_pages((unsigned long) vaddr, get_order(size)); 513 free_pages((unsigned long) vaddr, get_order(size));
524 else 514 else
525 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 515 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
526 swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); 516 unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
527} 517}
528 518
529static void 519static void
@@ -567,7 +557,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
567 * we can safely return the device addr and not worry about bounce 557 * we can safely return the device addr and not worry about bounce
568 * buffering it. 558 * buffering it.
569 */ 559 */
570 if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) 560 if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force)
571 return dev_addr; 561 return dev_addr;
572 562
573 /* 563 /*
@@ -584,7 +574,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
584 /* 574 /*
585 * Ensure that the address returned is DMA'ble 575 * Ensure that the address returned is DMA'ble
586 */ 576 */
587 if (address_needs_mapping(hwdev, dev_addr)) 577 if (address_needs_mapping(hwdev, dev_addr, size))
588 panic("map_single: bounce buffer is not DMA'ble"); 578 panic("map_single: bounce buffer is not DMA'ble");
589 579
590 return dev_addr; 580 return dev_addr;
@@ -612,7 +602,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
612 char *dma_addr = bus_to_virt(dev_addr); 602 char *dma_addr = bus_to_virt(dev_addr);
613 603
614 BUG_ON(dir == DMA_NONE); 604 BUG_ON(dir == DMA_NONE);
615 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 605 if (is_swiotlb_buffer(dma_addr))
616 unmap_single(hwdev, dma_addr, size, dir); 606 unmap_single(hwdev, dma_addr, size, dir);
617 else if (dir == DMA_FROM_DEVICE) 607 else if (dir == DMA_FROM_DEVICE)
618 dma_mark_clean(dma_addr, size); 608 dma_mark_clean(dma_addr, size);
@@ -642,7 +632,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
642 char *dma_addr = bus_to_virt(dev_addr); 632 char *dma_addr = bus_to_virt(dev_addr);
643 633
644 BUG_ON(dir == DMA_NONE); 634 BUG_ON(dir == DMA_NONE);
645 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 635 if (is_swiotlb_buffer(dma_addr))
646 sync_single(hwdev, dma_addr, size, dir, target); 636 sync_single(hwdev, dma_addr, size, dir, target);
647 else if (dir == DMA_FROM_DEVICE) 637 else if (dir == DMA_FROM_DEVICE)
648 dma_mark_clean(dma_addr, size); 638 dma_mark_clean(dma_addr, size);
@@ -673,7 +663,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
673 char *dma_addr = bus_to_virt(dev_addr) + offset; 663 char *dma_addr = bus_to_virt(dev_addr) + offset;
674 664
675 BUG_ON(dir == DMA_NONE); 665 BUG_ON(dir == DMA_NONE);
676 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 666 if (is_swiotlb_buffer(dma_addr))
677 sync_single(hwdev, dma_addr, size, dir, target); 667 sync_single(hwdev, dma_addr, size, dir, target);
678 else if (dir == DMA_FROM_DEVICE) 668 else if (dir == DMA_FROM_DEVICE)
679 dma_mark_clean(dma_addr, size); 669 dma_mark_clean(dma_addr, size);
@@ -727,7 +717,8 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
727 for_each_sg(sgl, sg, nelems, i) { 717 for_each_sg(sgl, sg, nelems, i) {
728 addr = SG_ENT_VIRT_ADDRESS(sg); 718 addr = SG_ENT_VIRT_ADDRESS(sg);
729 dev_addr = virt_to_bus(addr); 719 dev_addr = virt_to_bus(addr);
730 if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { 720 if (swiotlb_force ||
721 address_needs_mapping(hwdev, dev_addr, sg->length)) {
731 void *map = map_single(hwdev, addr, sg->length, dir); 722 void *map = map_single(hwdev, addr, sg->length, dir);
732 if (!map) { 723 if (!map) {
733 /* Don't panic here, we expect map_sg users 724 /* Don't panic here, we expect map_sg users