diff options
author | Keshavamurthy, Anil S <anil.s.keshavamurthy@intel.com> | 2007-10-21 19:41:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-22 11:13:18 -0400 |
commit | ba39592764ed20cee09aae5352e603a27bf56b0d (patch) | |
tree | efe7ec88bbd4d6b08b639830352c68411a7ef7fb | |
parent | f8de50eb6b085572ea773f26e066835ea3d3028b (diff) |
Intel IOMMU: Intel IOMMU driver
Actual intel IOMMU driver. Hardware spec can be found at:
http://www.intel.com/technology/virtualization
This driver sets X86_64 'dma_ops', so hook into standard DMA APIs. In this
way, PCI driver will get virtual DMA address. This change is transparent to
PCI drivers.
[akpm@linux-foundation.org: remove unneeded cast]
[akpm@linux-foundation.org: build fix]
[bunk@stusta.de: fix duplicate CONFIG_DMAR Makefile line]
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/Intel-IOMMU.txt | 93 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 10 | ||||
-rw-r--r-- | arch/x86/kernel/pci-dma_64.c | 5 | ||||
-rw-r--r-- | drivers/pci/Makefile | 2 | ||||
-rw-r--r-- | drivers/pci/intel-iommu.c | 1957 | ||||
-rw-r--r-- | drivers/pci/intel-iommu.h | 318 | ||||
-rw-r--r-- | include/linux/dmar.h | 22 |
7 files changed, 2406 insertions, 1 deletions
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt new file mode 100644 index 000000000000..cbb4dbaef761 --- /dev/null +++ b/Documentation/Intel-IOMMU.txt | |||
@@ -0,0 +1,93 @@ | |||
1 | Linux IOMMU Support | ||
2 | =================== | ||
3 | |||
4 | The architecture spec can be obtained from the below location. | ||
5 | |||
6 | http://www.intel.com/technology/virtualization/ | ||
7 | |||
8 | This guide gives a quick cheat sheet for some basic understanding. | ||
9 | |||
10 | Some Keywords | ||
11 | |||
12 | DMAR - DMA remapping | ||
13 | DRHD - DMA Engine Reporting Structure | ||
14 | RMRR - Reserved memory Region Reporting Structure | ||
15 | ZLR - Zero length reads from PCI devices | ||
16 | IOVA - IO Virtual address. | ||
17 | |||
18 | Basic stuff | ||
19 | ----------- | ||
20 | |||
21 | ACPI enumerates and lists the different DMA engines in the platform, and | ||
22 | device scope relationships between PCI devices and which DMA engine controls | ||
23 | them. | ||
24 | |||
25 | What is RMRR? | ||
26 | ------------- | ||
27 | |||
28 | There are some devices the BIOS controls, for e.g USB devices to perform | ||
29 | PS2 emulation. The regions of memory used for these devices are marked | ||
30 | reserved in the e820 map. When we turn on DMA translation, DMA to those | ||
31 | regions will fail. Hence BIOS uses RMRR to specify these regions along with | ||
32 | devices that need to access these regions. OS is expected to setup | ||
33 | unity mappings for these regions for these devices to access these regions. | ||
34 | |||
35 | How is IOVA generated? | ||
36 | --------------------- | ||
37 | |||
38 | Well behaved drivers call pci_map_*() calls before sending command to device | ||
39 | that needs to perform DMA. Once DMA is completed and mapping is no longer | ||
40 | required, device performs a pci_unmap_*() calls to unmap the region. | ||
41 | |||
42 | The Intel IOMMU driver allocates a virtual address per domain. Each PCIE | ||
43 | device has its own domain (hence protection). Devices under p2p bridges | ||
44 | share the virtual address with all devices under the p2p bridge due to | ||
45 | transaction id aliasing for p2p bridges. | ||
46 | |||
47 | IOVA generation is pretty generic. We used the same technique as vmalloc() | ||
48 | but these are not global address spaces, but separate for each domain. | ||
49 | Different DMA engines may support different number of domains. | ||
50 | |||
51 | We also allocate gaurd pages with each mapping, so we can attempt to catch | ||
52 | any overflow that might happen. | ||
53 | |||
54 | |||
55 | Graphics Problems? | ||
56 | ------------------ | ||
57 | If you encounter issues with graphics devices, you can try adding | ||
58 | option intel_iommu=igfx_off to turn off the integrated graphics engine. | ||
59 | |||
60 | Some exceptions to IOVA | ||
61 | ----------------------- | ||
62 | Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff). | ||
63 | The same is true for peer to peer transactions. Hence we reserve the | ||
64 | address from PCI MMIO ranges so they are not allocated for IOVA addresses. | ||
65 | |||
66 | Boot Message Sample | ||
67 | ------------------- | ||
68 | |||
69 | Something like this gets printed indicating presence of DMAR tables | ||
70 | in ACPI. | ||
71 | |||
72 | ACPI: DMAR (v001 A M I OEMDMAR 0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0 | ||
73 | |||
74 | When DMAR is being processed and initialized by ACPI, prints DMAR locations | ||
75 | and any RMRR's processed. | ||
76 | |||
77 | ACPI DMAR:Host address width 36 | ||
78 | ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000 | ||
79 | ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000 | ||
80 | ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000 | ||
81 | ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff | ||
82 | ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff | ||
83 | |||
84 | When DMAR is enabled for use, you will notice.. | ||
85 | |||
86 | PCI-DMA: Using DMAR IOMMU | ||
87 | |||
88 | TBD | ||
89 | ---- | ||
90 | |||
91 | - For compatibility testing, could use unity map domain for all devices, just | ||
92 | provide a 1-1 for all useful memory under a single domain for all devices. | ||
93 | - API for paravirt ops for abstracting functionlity for VMM folks. | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6accd360da73..8157417724a8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -772,6 +772,16 @@ and is between 256 and 4096 characters. It is defined in the file | |||
772 | 772 | ||
773 | inttest= [IA64] | 773 | inttest= [IA64] |
774 | 774 | ||
775 | intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option | ||
776 | off | ||
777 | Disable intel iommu driver. | ||
778 | igfx_off [Default Off] | ||
779 | By default, gfx is mapped as normal device. If a gfx | ||
780 | device has a dedicated DMAR unit, the DMAR unit is | ||
781 | bypassed by not enabling DMAR with this option. In | ||
782 | this case, gfx device will use physical address for | ||
783 | DMA. | ||
784 | |||
775 | io7= [HW] IO7 for Marvel based alpha systems | 785 | io7= [HW] IO7 for Marvel based alpha systems |
776 | See comment before marvel_specify_io7 in | 786 | See comment before marvel_specify_io7 in |
777 | arch/alpha/kernel/core_marvel.c. | 787 | arch/alpha/kernel/core_marvel.c. |
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index afaf9f12c032..393e2725a6e3 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/dmar.h> | ||
10 | #include <asm/io.h> | 11 | #include <asm/io.h> |
11 | #include <asm/iommu.h> | 12 | #include <asm/iommu.h> |
12 | #include <asm/calgary.h> | 13 | #include <asm/calgary.h> |
@@ -305,6 +306,8 @@ void __init pci_iommu_alloc(void) | |||
305 | detect_calgary(); | 306 | detect_calgary(); |
306 | #endif | 307 | #endif |
307 | 308 | ||
309 | detect_intel_iommu(); | ||
310 | |||
308 | #ifdef CONFIG_SWIOTLB | 311 | #ifdef CONFIG_SWIOTLB |
309 | pci_swiotlb_init(); | 312 | pci_swiotlb_init(); |
310 | #endif | 313 | #endif |
@@ -316,6 +319,8 @@ static int __init pci_iommu_init(void) | |||
316 | calgary_iommu_init(); | 319 | calgary_iommu_init(); |
317 | #endif | 320 | #endif |
318 | 321 | ||
322 | intel_iommu_init(); | ||
323 | |||
319 | #ifdef CONFIG_IOMMU | 324 | #ifdef CONFIG_IOMMU |
320 | gart_iommu_init(); | 325 | gart_iommu_init(); |
321 | #endif | 326 | #endif |
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 836ab2f250d1..555055650733 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile | |||
@@ -21,7 +21,7 @@ obj-$(CONFIG_PCI_MSI) += msi.o | |||
21 | obj-$(CONFIG_HT_IRQ) += htirq.o | 21 | obj-$(CONFIG_HT_IRQ) += htirq.o |
22 | 22 | ||
23 | # Build Intel IOMMU support | 23 | # Build Intel IOMMU support |
24 | obj-$(CONFIG_DMAR) += dmar.o | 24 | obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o |
25 | 25 | ||
26 | # | 26 | # |
27 | # Some architectures use the generic PCI setup functions | 27 | # Some architectures use the generic PCI setup functions |
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c new file mode 100644 index 000000000000..93ed771b3254 --- /dev/null +++ b/drivers/pci/intel-iommu.c | |||
@@ -0,0 +1,1957 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along with | ||
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
16 | * | ||
17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
18 | * Copyright (C) Shaohua Li <shaohua.li@intel.com> | ||
19 | * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/init.h> | ||
23 | #include <linux/bitmap.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/irq.h> | ||
26 | #include <linux/interrupt.h> | ||
27 | #include <linux/sysdev.h> | ||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/pci.h> | ||
30 | #include <linux/dmar.h> | ||
31 | #include <linux/dma-mapping.h> | ||
32 | #include <linux/mempool.h> | ||
33 | #include "iova.h" | ||
34 | #include "intel-iommu.h" | ||
35 | #include <asm/proto.h> /* force_iommu in this header in x86-64*/ | ||
36 | #include <asm/cacheflush.h> | ||
37 | #include <asm/iommu.h> | ||
38 | #include "pci.h" | ||
39 | |||
40 | #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) | ||
41 | #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) | ||
42 | |||
43 | #define IOAPIC_RANGE_START (0xfee00000) | ||
44 | #define IOAPIC_RANGE_END (0xfeefffff) | ||
45 | #define IOVA_START_ADDR (0x1000) | ||
46 | |||
47 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 | ||
48 | |||
49 | #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */ | ||
50 | |||
51 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) | ||
52 | |||
53 | static void domain_remove_dev_info(struct dmar_domain *domain); | ||
54 | |||
55 | static int dmar_disabled; | ||
56 | static int __initdata dmar_map_gfx = 1; | ||
57 | |||
58 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) | ||
59 | static DEFINE_SPINLOCK(device_domain_lock); | ||
60 | static LIST_HEAD(device_domain_list); | ||
61 | |||
62 | static int __init intel_iommu_setup(char *str) | ||
63 | { | ||
64 | if (!str) | ||
65 | return -EINVAL; | ||
66 | while (*str) { | ||
67 | if (!strncmp(str, "off", 3)) { | ||
68 | dmar_disabled = 1; | ||
69 | printk(KERN_INFO"Intel-IOMMU: disabled\n"); | ||
70 | } else if (!strncmp(str, "igfx_off", 8)) { | ||
71 | dmar_map_gfx = 0; | ||
72 | printk(KERN_INFO | ||
73 | "Intel-IOMMU: disable GFX device mapping\n"); | ||
74 | } | ||
75 | |||
76 | str += strcspn(str, ","); | ||
77 | while (*str == ',') | ||
78 | str++; | ||
79 | } | ||
80 | return 0; | ||
81 | } | ||
82 | __setup("intel_iommu=", intel_iommu_setup); | ||
83 | |||
84 | static struct kmem_cache *iommu_domain_cache; | ||
85 | static struct kmem_cache *iommu_devinfo_cache; | ||
86 | static struct kmem_cache *iommu_iova_cache; | ||
87 | |||
88 | static inline void *alloc_pgtable_page(void) | ||
89 | { | ||
90 | return (void *)get_zeroed_page(GFP_ATOMIC); | ||
91 | } | ||
92 | |||
93 | static inline void free_pgtable_page(void *vaddr) | ||
94 | { | ||
95 | free_page((unsigned long)vaddr); | ||
96 | } | ||
97 | |||
98 | static inline void *alloc_domain_mem(void) | ||
99 | { | ||
100 | return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); | ||
101 | } | ||
102 | |||
103 | static inline void free_domain_mem(void *vaddr) | ||
104 | { | ||
105 | kmem_cache_free(iommu_domain_cache, vaddr); | ||
106 | } | ||
107 | |||
108 | static inline void * alloc_devinfo_mem(void) | ||
109 | { | ||
110 | return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); | ||
111 | } | ||
112 | |||
113 | static inline void free_devinfo_mem(void *vaddr) | ||
114 | { | ||
115 | kmem_cache_free(iommu_devinfo_cache, vaddr); | ||
116 | } | ||
117 | |||
118 | struct iova *alloc_iova_mem(void) | ||
119 | { | ||
120 | return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); | ||
121 | } | ||
122 | |||
123 | void free_iova_mem(struct iova *iova) | ||
124 | { | ||
125 | kmem_cache_free(iommu_iova_cache, iova); | ||
126 | } | ||
127 | |||
128 | static inline void __iommu_flush_cache( | ||
129 | struct intel_iommu *iommu, void *addr, int size) | ||
130 | { | ||
131 | if (!ecap_coherent(iommu->ecap)) | ||
132 | clflush_cache_range(addr, size); | ||
133 | } | ||
134 | |||
135 | /* Gets context entry for a given bus and devfn */ | ||
136 | static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, | ||
137 | u8 bus, u8 devfn) | ||
138 | { | ||
139 | struct root_entry *root; | ||
140 | struct context_entry *context; | ||
141 | unsigned long phy_addr; | ||
142 | unsigned long flags; | ||
143 | |||
144 | spin_lock_irqsave(&iommu->lock, flags); | ||
145 | root = &iommu->root_entry[bus]; | ||
146 | context = get_context_addr_from_root(root); | ||
147 | if (!context) { | ||
148 | context = (struct context_entry *)alloc_pgtable_page(); | ||
149 | if (!context) { | ||
150 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
151 | return NULL; | ||
152 | } | ||
153 | __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K); | ||
154 | phy_addr = virt_to_phys((void *)context); | ||
155 | set_root_value(root, phy_addr); | ||
156 | set_root_present(root); | ||
157 | __iommu_flush_cache(iommu, root, sizeof(*root)); | ||
158 | } | ||
159 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
160 | return &context[devfn]; | ||
161 | } | ||
162 | |||
163 | static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) | ||
164 | { | ||
165 | struct root_entry *root; | ||
166 | struct context_entry *context; | ||
167 | int ret; | ||
168 | unsigned long flags; | ||
169 | |||
170 | spin_lock_irqsave(&iommu->lock, flags); | ||
171 | root = &iommu->root_entry[bus]; | ||
172 | context = get_context_addr_from_root(root); | ||
173 | if (!context) { | ||
174 | ret = 0; | ||
175 | goto out; | ||
176 | } | ||
177 | ret = context_present(context[devfn]); | ||
178 | out: | ||
179 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
180 | return ret; | ||
181 | } | ||
182 | |||
183 | static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) | ||
184 | { | ||
185 | struct root_entry *root; | ||
186 | struct context_entry *context; | ||
187 | unsigned long flags; | ||
188 | |||
189 | spin_lock_irqsave(&iommu->lock, flags); | ||
190 | root = &iommu->root_entry[bus]; | ||
191 | context = get_context_addr_from_root(root); | ||
192 | if (context) { | ||
193 | context_clear_entry(context[devfn]); | ||
194 | __iommu_flush_cache(iommu, &context[devfn], \ | ||
195 | sizeof(*context)); | ||
196 | } | ||
197 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
198 | } | ||
199 | |||
200 | static void free_context_table(struct intel_iommu *iommu) | ||
201 | { | ||
202 | struct root_entry *root; | ||
203 | int i; | ||
204 | unsigned long flags; | ||
205 | struct context_entry *context; | ||
206 | |||
207 | spin_lock_irqsave(&iommu->lock, flags); | ||
208 | if (!iommu->root_entry) { | ||
209 | goto out; | ||
210 | } | ||
211 | for (i = 0; i < ROOT_ENTRY_NR; i++) { | ||
212 | root = &iommu->root_entry[i]; | ||
213 | context = get_context_addr_from_root(root); | ||
214 | if (context) | ||
215 | free_pgtable_page(context); | ||
216 | } | ||
217 | free_pgtable_page(iommu->root_entry); | ||
218 | iommu->root_entry = NULL; | ||
219 | out: | ||
220 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
221 | } | ||
222 | |||
223 | /* page table handling */ | ||
224 | #define LEVEL_STRIDE (9) | ||
225 | #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) | ||
226 | |||
227 | static inline int agaw_to_level(int agaw) | ||
228 | { | ||
229 | return agaw + 2; | ||
230 | } | ||
231 | |||
232 | static inline int agaw_to_width(int agaw) | ||
233 | { | ||
234 | return 30 + agaw * LEVEL_STRIDE; | ||
235 | |||
236 | } | ||
237 | |||
238 | static inline int width_to_agaw(int width) | ||
239 | { | ||
240 | return (width - 30) / LEVEL_STRIDE; | ||
241 | } | ||
242 | |||
243 | static inline unsigned int level_to_offset_bits(int level) | ||
244 | { | ||
245 | return (12 + (level - 1) * LEVEL_STRIDE); | ||
246 | } | ||
247 | |||
248 | static inline int address_level_offset(u64 addr, int level) | ||
249 | { | ||
250 | return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); | ||
251 | } | ||
252 | |||
253 | static inline u64 level_mask(int level) | ||
254 | { | ||
255 | return ((u64)-1 << level_to_offset_bits(level)); | ||
256 | } | ||
257 | |||
258 | static inline u64 level_size(int level) | ||
259 | { | ||
260 | return ((u64)1 << level_to_offset_bits(level)); | ||
261 | } | ||
262 | |||
263 | static inline u64 align_to_level(u64 addr, int level) | ||
264 | { | ||
265 | return ((addr + level_size(level) - 1) & level_mask(level)); | ||
266 | } | ||
267 | |||
268 | static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) | ||
269 | { | ||
270 | int addr_width = agaw_to_width(domain->agaw); | ||
271 | struct dma_pte *parent, *pte = NULL; | ||
272 | int level = agaw_to_level(domain->agaw); | ||
273 | int offset; | ||
274 | unsigned long flags; | ||
275 | |||
276 | BUG_ON(!domain->pgd); | ||
277 | |||
278 | addr &= (((u64)1) << addr_width) - 1; | ||
279 | parent = domain->pgd; | ||
280 | |||
281 | spin_lock_irqsave(&domain->mapping_lock, flags); | ||
282 | while (level > 0) { | ||
283 | void *tmp_page; | ||
284 | |||
285 | offset = address_level_offset(addr, level); | ||
286 | pte = &parent[offset]; | ||
287 | if (level == 1) | ||
288 | break; | ||
289 | |||
290 | if (!dma_pte_present(*pte)) { | ||
291 | tmp_page = alloc_pgtable_page(); | ||
292 | |||
293 | if (!tmp_page) { | ||
294 | spin_unlock_irqrestore(&domain->mapping_lock, | ||
295 | flags); | ||
296 | return NULL; | ||
297 | } | ||
298 | __iommu_flush_cache(domain->iommu, tmp_page, | ||
299 | PAGE_SIZE_4K); | ||
300 | dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); | ||
301 | /* | ||
302 | * high level table always sets r/w, last level page | ||
303 | * table control read/write | ||
304 | */ | ||
305 | dma_set_pte_readable(*pte); | ||
306 | dma_set_pte_writable(*pte); | ||
307 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
308 | } | ||
309 | parent = phys_to_virt(dma_pte_addr(*pte)); | ||
310 | level--; | ||
311 | } | ||
312 | |||
313 | spin_unlock_irqrestore(&domain->mapping_lock, flags); | ||
314 | return pte; | ||
315 | } | ||
316 | |||
317 | /* return address's pte at specific level */ | ||
318 | static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, | ||
319 | int level) | ||
320 | { | ||
321 | struct dma_pte *parent, *pte = NULL; | ||
322 | int total = agaw_to_level(domain->agaw); | ||
323 | int offset; | ||
324 | |||
325 | parent = domain->pgd; | ||
326 | while (level <= total) { | ||
327 | offset = address_level_offset(addr, total); | ||
328 | pte = &parent[offset]; | ||
329 | if (level == total) | ||
330 | return pte; | ||
331 | |||
332 | if (!dma_pte_present(*pte)) | ||
333 | break; | ||
334 | parent = phys_to_virt(dma_pte_addr(*pte)); | ||
335 | total--; | ||
336 | } | ||
337 | return NULL; | ||
338 | } | ||
339 | |||
340 | /* clear one page's page table */ | ||
341 | static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) | ||
342 | { | ||
343 | struct dma_pte *pte = NULL; | ||
344 | |||
345 | /* get last level pte */ | ||
346 | pte = dma_addr_level_pte(domain, addr, 1); | ||
347 | |||
348 | if (pte) { | ||
349 | dma_clear_pte(*pte); | ||
350 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
351 | } | ||
352 | } | ||
353 | |||
354 | /* clear last level pte, a tlb flush should be followed */ | ||
355 | static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) | ||
356 | { | ||
357 | int addr_width = agaw_to_width(domain->agaw); | ||
358 | |||
359 | start &= (((u64)1) << addr_width) - 1; | ||
360 | end &= (((u64)1) << addr_width) - 1; | ||
361 | /* in case it's partial page */ | ||
362 | start = PAGE_ALIGN_4K(start); | ||
363 | end &= PAGE_MASK_4K; | ||
364 | |||
365 | /* we don't need lock here, nobody else touches the iova range */ | ||
366 | while (start < end) { | ||
367 | dma_pte_clear_one(domain, start); | ||
368 | start += PAGE_SIZE_4K; | ||
369 | } | ||
370 | } | ||
371 | |||
372 | /* free page table pages. last level pte should already be cleared */ | ||
373 | static void dma_pte_free_pagetable(struct dmar_domain *domain, | ||
374 | u64 start, u64 end) | ||
375 | { | ||
376 | int addr_width = agaw_to_width(domain->agaw); | ||
377 | struct dma_pte *pte; | ||
378 | int total = agaw_to_level(domain->agaw); | ||
379 | int level; | ||
380 | u64 tmp; | ||
381 | |||
382 | start &= (((u64)1) << addr_width) - 1; | ||
383 | end &= (((u64)1) << addr_width) - 1; | ||
384 | |||
385 | /* we don't need lock here, nobody else touches the iova range */ | ||
386 | level = 2; | ||
387 | while (level <= total) { | ||
388 | tmp = align_to_level(start, level); | ||
389 | if (tmp >= end || (tmp + level_size(level) > end)) | ||
390 | return; | ||
391 | |||
392 | while (tmp < end) { | ||
393 | pte = dma_addr_level_pte(domain, tmp, level); | ||
394 | if (pte) { | ||
395 | free_pgtable_page( | ||
396 | phys_to_virt(dma_pte_addr(*pte))); | ||
397 | dma_clear_pte(*pte); | ||
398 | __iommu_flush_cache(domain->iommu, | ||
399 | pte, sizeof(*pte)); | ||
400 | } | ||
401 | tmp += level_size(level); | ||
402 | } | ||
403 | level++; | ||
404 | } | ||
405 | /* free pgd */ | ||
406 | if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { | ||
407 | free_pgtable_page(domain->pgd); | ||
408 | domain->pgd = NULL; | ||
409 | } | ||
410 | } | ||
411 | |||
412 | /* iommu handling */ | ||
413 | static int iommu_alloc_root_entry(struct intel_iommu *iommu) | ||
414 | { | ||
415 | struct root_entry *root; | ||
416 | unsigned long flags; | ||
417 | |||
418 | root = (struct root_entry *)alloc_pgtable_page(); | ||
419 | if (!root) | ||
420 | return -ENOMEM; | ||
421 | |||
422 | __iommu_flush_cache(iommu, root, PAGE_SIZE_4K); | ||
423 | |||
424 | spin_lock_irqsave(&iommu->lock, flags); | ||
425 | iommu->root_entry = root; | ||
426 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ | ||
432 | {\ | ||
433 | unsigned long start_time = jiffies;\ | ||
434 | while (1) {\ | ||
435 | sts = op (iommu->reg + offset);\ | ||
436 | if (cond)\ | ||
437 | break;\ | ||
438 | if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\ | ||
439 | panic("DMAR hardware is malfunctioning\n");\ | ||
440 | cpu_relax();\ | ||
441 | }\ | ||
442 | } | ||
443 | |||
444 | static void iommu_set_root_entry(struct intel_iommu *iommu) | ||
445 | { | ||
446 | void *addr; | ||
447 | u32 cmd, sts; | ||
448 | unsigned long flag; | ||
449 | |||
450 | addr = iommu->root_entry; | ||
451 | |||
452 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
453 | dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); | ||
454 | |||
455 | cmd = iommu->gcmd | DMA_GCMD_SRTP; | ||
456 | writel(cmd, iommu->reg + DMAR_GCMD_REG); | ||
457 | |||
458 | /* Make sure hardware complete it */ | ||
459 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
460 | readl, (sts & DMA_GSTS_RTPS), sts); | ||
461 | |||
462 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
463 | } | ||
464 | |||
465 | static void iommu_flush_write_buffer(struct intel_iommu *iommu) | ||
466 | { | ||
467 | u32 val; | ||
468 | unsigned long flag; | ||
469 | |||
470 | if (!cap_rwbf(iommu->cap)) | ||
471 | return; | ||
472 | val = iommu->gcmd | DMA_GCMD_WBF; | ||
473 | |||
474 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
475 | writel(val, iommu->reg + DMAR_GCMD_REG); | ||
476 | |||
477 | /* Make sure hardware complete it */ | ||
478 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
479 | readl, (!(val & DMA_GSTS_WBFS)), val); | ||
480 | |||
481 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
482 | } | ||
483 | |||
484 | /* return value determine if we need a write buffer flush */ | ||
485 | static int __iommu_flush_context(struct intel_iommu *iommu, | ||
486 | u16 did, u16 source_id, u8 function_mask, u64 type, | ||
487 | int non_present_entry_flush) | ||
488 | { | ||
489 | u64 val = 0; | ||
490 | unsigned long flag; | ||
491 | |||
492 | /* | ||
493 | * In the non-present entry flush case, if hardware doesn't cache | ||
494 | * non-present entry we do nothing and if hardware cache non-present | ||
495 | * entry, we flush entries of domain 0 (the domain id is used to cache | ||
496 | * any non-present entries) | ||
497 | */ | ||
498 | if (non_present_entry_flush) { | ||
499 | if (!cap_caching_mode(iommu->cap)) | ||
500 | return 1; | ||
501 | else | ||
502 | did = 0; | ||
503 | } | ||
504 | |||
505 | switch (type) { | ||
506 | case DMA_CCMD_GLOBAL_INVL: | ||
507 | val = DMA_CCMD_GLOBAL_INVL; | ||
508 | break; | ||
509 | case DMA_CCMD_DOMAIN_INVL: | ||
510 | val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); | ||
511 | break; | ||
512 | case DMA_CCMD_DEVICE_INVL: | ||
513 | val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) | ||
514 | | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); | ||
515 | break; | ||
516 | default: | ||
517 | BUG(); | ||
518 | } | ||
519 | val |= DMA_CCMD_ICC; | ||
520 | |||
521 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
522 | dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); | ||
523 | |||
524 | /* Make sure hardware complete it */ | ||
525 | IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, | ||
526 | dmar_readq, (!(val & DMA_CCMD_ICC)), val); | ||
527 | |||
528 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
529 | |||
530 | /* flush context entry will implictly flush write buffer */ | ||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | static int inline iommu_flush_context_global(struct intel_iommu *iommu, | ||
535 | int non_present_entry_flush) | ||
536 | { | ||
537 | return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, | ||
538 | non_present_entry_flush); | ||
539 | } | ||
540 | |||
541 | static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did, | ||
542 | int non_present_entry_flush) | ||
543 | { | ||
544 | return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL, | ||
545 | non_present_entry_flush); | ||
546 | } | ||
547 | |||
548 | static int inline iommu_flush_context_device(struct intel_iommu *iommu, | ||
549 | u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush) | ||
550 | { | ||
551 | return __iommu_flush_context(iommu, did, source_id, function_mask, | ||
552 | DMA_CCMD_DEVICE_INVL, non_present_entry_flush); | ||
553 | } | ||
554 | |||
555 | /* return value determine if we need a write buffer flush */ | ||
556 | static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, | ||
557 | u64 addr, unsigned int size_order, u64 type, | ||
558 | int non_present_entry_flush) | ||
559 | { | ||
560 | int tlb_offset = ecap_iotlb_offset(iommu->ecap); | ||
561 | u64 val = 0, val_iva = 0; | ||
562 | unsigned long flag; | ||
563 | |||
564 | /* | ||
565 | * In the non-present entry flush case, if hardware doesn't cache | ||
566 | * non-present entry we do nothing and if hardware cache non-present | ||
567 | * entry, we flush entries of domain 0 (the domain id is used to cache | ||
568 | * any non-present entries) | ||
569 | */ | ||
570 | if (non_present_entry_flush) { | ||
571 | if (!cap_caching_mode(iommu->cap)) | ||
572 | return 1; | ||
573 | else | ||
574 | did = 0; | ||
575 | } | ||
576 | |||
577 | switch (type) { | ||
578 | case DMA_TLB_GLOBAL_FLUSH: | ||
579 | /* global flush doesn't need set IVA_REG */ | ||
580 | val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; | ||
581 | break; | ||
582 | case DMA_TLB_DSI_FLUSH: | ||
583 | val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); | ||
584 | break; | ||
585 | case DMA_TLB_PSI_FLUSH: | ||
586 | val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); | ||
587 | /* Note: always flush non-leaf currently */ | ||
588 | val_iva = size_order | addr; | ||
589 | break; | ||
590 | default: | ||
591 | BUG(); | ||
592 | } | ||
593 | /* Note: set drain read/write */ | ||
594 | #if 0 | ||
595 | /* | ||
596 | * This is probably to be super secure.. Looks like we can | ||
597 | * ignore it without any impact. | ||
598 | */ | ||
599 | if (cap_read_drain(iommu->cap)) | ||
600 | val |= DMA_TLB_READ_DRAIN; | ||
601 | #endif | ||
602 | if (cap_write_drain(iommu->cap)) | ||
603 | val |= DMA_TLB_WRITE_DRAIN; | ||
604 | |||
605 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
606 | /* Note: Only uses first TLB reg currently */ | ||
607 | if (val_iva) | ||
608 | dmar_writeq(iommu->reg + tlb_offset, val_iva); | ||
609 | dmar_writeq(iommu->reg + tlb_offset + 8, val); | ||
610 | |||
611 | /* Make sure hardware complete it */ | ||
612 | IOMMU_WAIT_OP(iommu, tlb_offset + 8, | ||
613 | dmar_readq, (!(val & DMA_TLB_IVT)), val); | ||
614 | |||
615 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
616 | |||
617 | /* check IOTLB invalidation granularity */ | ||
618 | if (DMA_TLB_IAIG(val) == 0) | ||
619 | printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); | ||
620 | if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) | ||
621 | pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", | ||
622 | DMA_TLB_IIRG(type), DMA_TLB_IAIG(val)); | ||
623 | /* flush context entry will implictly flush write buffer */ | ||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu, | ||
628 | int non_present_entry_flush) | ||
629 | { | ||
630 | return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, | ||
631 | non_present_entry_flush); | ||
632 | } | ||
633 | |||
634 | static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did, | ||
635 | int non_present_entry_flush) | ||
636 | { | ||
637 | return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, | ||
638 | non_present_entry_flush); | ||
639 | } | ||
640 | |||
641 | static int iommu_get_alignment(u64 base, unsigned int size) | ||
642 | { | ||
643 | int t = 0; | ||
644 | u64 end; | ||
645 | |||
646 | end = base + size - 1; | ||
647 | while (base != end) { | ||
648 | t++; | ||
649 | base >>= 1; | ||
650 | end >>= 1; | ||
651 | } | ||
652 | return t; | ||
653 | } | ||
654 | |||
655 | static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, | ||
656 | u64 addr, unsigned int pages, int non_present_entry_flush) | ||
657 | { | ||
658 | unsigned int align; | ||
659 | |||
660 | BUG_ON(addr & (~PAGE_MASK_4K)); | ||
661 | BUG_ON(pages == 0); | ||
662 | |||
663 | /* Fallback to domain selective flush if no PSI support */ | ||
664 | if (!cap_pgsel_inv(iommu->cap)) | ||
665 | return iommu_flush_iotlb_dsi(iommu, did, | ||
666 | non_present_entry_flush); | ||
667 | |||
668 | /* | ||
669 | * PSI requires page size to be 2 ^ x, and the base address is naturally | ||
670 | * aligned to the size | ||
671 | */ | ||
672 | align = iommu_get_alignment(addr >> PAGE_SHIFT_4K, pages); | ||
673 | /* Fallback to domain selective flush if size is too big */ | ||
674 | if (align > cap_max_amask_val(iommu->cap)) | ||
675 | return iommu_flush_iotlb_dsi(iommu, did, | ||
676 | non_present_entry_flush); | ||
677 | |||
678 | addr >>= PAGE_SHIFT_4K + align; | ||
679 | addr <<= PAGE_SHIFT_4K + align; | ||
680 | |||
681 | return __iommu_flush_iotlb(iommu, did, addr, align, | ||
682 | DMA_TLB_PSI_FLUSH, non_present_entry_flush); | ||
683 | } | ||
684 | |||
685 | static int iommu_enable_translation(struct intel_iommu *iommu) | ||
686 | { | ||
687 | u32 sts; | ||
688 | unsigned long flags; | ||
689 | |||
690 | spin_lock_irqsave(&iommu->register_lock, flags); | ||
691 | writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); | ||
692 | |||
693 | /* Make sure hardware complete it */ | ||
694 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
695 | readl, (sts & DMA_GSTS_TES), sts); | ||
696 | |||
697 | iommu->gcmd |= DMA_GCMD_TE; | ||
698 | spin_unlock_irqrestore(&iommu->register_lock, flags); | ||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | static int iommu_disable_translation(struct intel_iommu *iommu) | ||
703 | { | ||
704 | u32 sts; | ||
705 | unsigned long flag; | ||
706 | |||
707 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
708 | iommu->gcmd &= ~DMA_GCMD_TE; | ||
709 | writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); | ||
710 | |||
711 | /* Make sure hardware complete it */ | ||
712 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
713 | readl, (!(sts & DMA_GSTS_TES)), sts); | ||
714 | |||
715 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
716 | return 0; | ||
717 | } | ||
718 | |||
719 | static int iommu_init_domains(struct intel_iommu *iommu) | ||
720 | { | ||
721 | unsigned long ndomains; | ||
722 | unsigned long nlongs; | ||
723 | |||
724 | ndomains = cap_ndoms(iommu->cap); | ||
725 | pr_debug("Number of Domains supportd <%ld>\n", ndomains); | ||
726 | nlongs = BITS_TO_LONGS(ndomains); | ||
727 | |||
728 | /* TBD: there might be 64K domains, | ||
729 | * consider other allocation for future chip | ||
730 | */ | ||
731 | iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); | ||
732 | if (!iommu->domain_ids) { | ||
733 | printk(KERN_ERR "Allocating domain id array failed\n"); | ||
734 | return -ENOMEM; | ||
735 | } | ||
736 | iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), | ||
737 | GFP_KERNEL); | ||
738 | if (!iommu->domains) { | ||
739 | printk(KERN_ERR "Allocating domain array failed\n"); | ||
740 | kfree(iommu->domain_ids); | ||
741 | return -ENOMEM; | ||
742 | } | ||
743 | |||
744 | /* | ||
745 | * if Caching mode is set, then invalid translations are tagged | ||
746 | * with domainid 0. Hence we need to pre-allocate it. | ||
747 | */ | ||
748 | if (cap_caching_mode(iommu->cap)) | ||
749 | set_bit(0, iommu->domain_ids); | ||
750 | return 0; | ||
751 | } | ||
752 | |||
753 | static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) | ||
754 | { | ||
755 | struct intel_iommu *iommu; | ||
756 | int ret; | ||
757 | int map_size; | ||
758 | u32 ver; | ||
759 | |||
760 | iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); | ||
761 | if (!iommu) | ||
762 | return NULL; | ||
763 | iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); | ||
764 | if (!iommu->reg) { | ||
765 | printk(KERN_ERR "IOMMU: can't map the region\n"); | ||
766 | goto error; | ||
767 | } | ||
768 | iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); | ||
769 | iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); | ||
770 | |||
771 | /* the registers might be more than one page */ | ||
772 | map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), | ||
773 | cap_max_fault_reg_offset(iommu->cap)); | ||
774 | map_size = PAGE_ALIGN_4K(map_size); | ||
775 | if (map_size > PAGE_SIZE_4K) { | ||
776 | iounmap(iommu->reg); | ||
777 | iommu->reg = ioremap(drhd->reg_base_addr, map_size); | ||
778 | if (!iommu->reg) { | ||
779 | printk(KERN_ERR "IOMMU: can't map the region\n"); | ||
780 | goto error; | ||
781 | } | ||
782 | } | ||
783 | |||
784 | ver = readl(iommu->reg + DMAR_VER_REG); | ||
785 | pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", | ||
786 | drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), | ||
787 | iommu->cap, iommu->ecap); | ||
788 | ret = iommu_init_domains(iommu); | ||
789 | if (ret) | ||
790 | goto error_unmap; | ||
791 | spin_lock_init(&iommu->lock); | ||
792 | spin_lock_init(&iommu->register_lock); | ||
793 | |||
794 | drhd->iommu = iommu; | ||
795 | return iommu; | ||
796 | error_unmap: | ||
797 | iounmap(iommu->reg); | ||
798 | iommu->reg = 0; | ||
799 | error: | ||
800 | kfree(iommu); | ||
801 | return NULL; | ||
802 | } | ||
803 | |||
804 | static void domain_exit(struct dmar_domain *domain); | ||
805 | static void free_iommu(struct intel_iommu *iommu) | ||
806 | { | ||
807 | struct dmar_domain *domain; | ||
808 | int i; | ||
809 | |||
810 | if (!iommu) | ||
811 | return; | ||
812 | |||
813 | i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); | ||
814 | for (; i < cap_ndoms(iommu->cap); ) { | ||
815 | domain = iommu->domains[i]; | ||
816 | clear_bit(i, iommu->domain_ids); | ||
817 | domain_exit(domain); | ||
818 | i = find_next_bit(iommu->domain_ids, | ||
819 | cap_ndoms(iommu->cap), i+1); | ||
820 | } | ||
821 | |||
822 | if (iommu->gcmd & DMA_GCMD_TE) | ||
823 | iommu_disable_translation(iommu); | ||
824 | |||
825 | if (iommu->irq) { | ||
826 | set_irq_data(iommu->irq, NULL); | ||
827 | /* This will mask the irq */ | ||
828 | free_irq(iommu->irq, iommu); | ||
829 | destroy_irq(iommu->irq); | ||
830 | } | ||
831 | |||
832 | kfree(iommu->domains); | ||
833 | kfree(iommu->domain_ids); | ||
834 | |||
835 | /* free context mapping */ | ||
836 | free_context_table(iommu); | ||
837 | |||
838 | if (iommu->reg) | ||
839 | iounmap(iommu->reg); | ||
840 | kfree(iommu); | ||
841 | } | ||
842 | |||
843 | static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) | ||
844 | { | ||
845 | unsigned long num; | ||
846 | unsigned long ndomains; | ||
847 | struct dmar_domain *domain; | ||
848 | unsigned long flags; | ||
849 | |||
850 | domain = alloc_domain_mem(); | ||
851 | if (!domain) | ||
852 | return NULL; | ||
853 | |||
854 | ndomains = cap_ndoms(iommu->cap); | ||
855 | |||
856 | spin_lock_irqsave(&iommu->lock, flags); | ||
857 | num = find_first_zero_bit(iommu->domain_ids, ndomains); | ||
858 | if (num >= ndomains) { | ||
859 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
860 | free_domain_mem(domain); | ||
861 | printk(KERN_ERR "IOMMU: no free domain ids\n"); | ||
862 | return NULL; | ||
863 | } | ||
864 | |||
865 | set_bit(num, iommu->domain_ids); | ||
866 | domain->id = num; | ||
867 | domain->iommu = iommu; | ||
868 | iommu->domains[num] = domain; | ||
869 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
870 | |||
871 | return domain; | ||
872 | } | ||
873 | |||
874 | static void iommu_free_domain(struct dmar_domain *domain) | ||
875 | { | ||
876 | unsigned long flags; | ||
877 | |||
878 | spin_lock_irqsave(&domain->iommu->lock, flags); | ||
879 | clear_bit(domain->id, domain->iommu->domain_ids); | ||
880 | spin_unlock_irqrestore(&domain->iommu->lock, flags); | ||
881 | } | ||
882 | |||
883 | static struct iova_domain reserved_iova_list; | ||
884 | |||
885 | static void dmar_init_reserved_ranges(void) | ||
886 | { | ||
887 | struct pci_dev *pdev = NULL; | ||
888 | struct iova *iova; | ||
889 | int i; | ||
890 | u64 addr, size; | ||
891 | |||
892 | init_iova_domain(&reserved_iova_list); | ||
893 | |||
894 | /* IOAPIC ranges shouldn't be accessed by DMA */ | ||
895 | iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), | ||
896 | IOVA_PFN(IOAPIC_RANGE_END)); | ||
897 | if (!iova) | ||
898 | printk(KERN_ERR "Reserve IOAPIC range failed\n"); | ||
899 | |||
900 | /* Reserve all PCI MMIO to avoid peer-to-peer access */ | ||
901 | for_each_pci_dev(pdev) { | ||
902 | struct resource *r; | ||
903 | |||
904 | for (i = 0; i < PCI_NUM_RESOURCES; i++) { | ||
905 | r = &pdev->resource[i]; | ||
906 | if (!r->flags || !(r->flags & IORESOURCE_MEM)) | ||
907 | continue; | ||
908 | addr = r->start; | ||
909 | addr &= PAGE_MASK_4K; | ||
910 | size = r->end - addr; | ||
911 | size = PAGE_ALIGN_4K(size); | ||
912 | iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), | ||
913 | IOVA_PFN(size + addr) - 1); | ||
914 | if (!iova) | ||
915 | printk(KERN_ERR "Reserve iova failed\n"); | ||
916 | } | ||
917 | } | ||
918 | |||
919 | } | ||
920 | |||
921 | static void domain_reserve_special_ranges(struct dmar_domain *domain) | ||
922 | { | ||
923 | copy_reserved_iova(&reserved_iova_list, &domain->iovad); | ||
924 | } | ||
925 | |||
926 | static inline int guestwidth_to_adjustwidth(int gaw) | ||
927 | { | ||
928 | int agaw; | ||
929 | int r = (gaw - 12) % 9; | ||
930 | |||
931 | if (r == 0) | ||
932 | agaw = gaw; | ||
933 | else | ||
934 | agaw = gaw + 9 - r; | ||
935 | if (agaw > 64) | ||
936 | agaw = 64; | ||
937 | return agaw; | ||
938 | } | ||
939 | |||
940 | static int domain_init(struct dmar_domain *domain, int guest_width) | ||
941 | { | ||
942 | struct intel_iommu *iommu; | ||
943 | int adjust_width, agaw; | ||
944 | unsigned long sagaw; | ||
945 | |||
946 | init_iova_domain(&domain->iovad); | ||
947 | spin_lock_init(&domain->mapping_lock); | ||
948 | |||
949 | domain_reserve_special_ranges(domain); | ||
950 | |||
951 | /* calculate AGAW */ | ||
952 | iommu = domain->iommu; | ||
953 | if (guest_width > cap_mgaw(iommu->cap)) | ||
954 | guest_width = cap_mgaw(iommu->cap); | ||
955 | domain->gaw = guest_width; | ||
956 | adjust_width = guestwidth_to_adjustwidth(guest_width); | ||
957 | agaw = width_to_agaw(adjust_width); | ||
958 | sagaw = cap_sagaw(iommu->cap); | ||
959 | if (!test_bit(agaw, &sagaw)) { | ||
960 | /* hardware doesn't support it, choose a bigger one */ | ||
961 | pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); | ||
962 | agaw = find_next_bit(&sagaw, 5, agaw); | ||
963 | if (agaw >= 5) | ||
964 | return -ENODEV; | ||
965 | } | ||
966 | domain->agaw = agaw; | ||
967 | INIT_LIST_HEAD(&domain->devices); | ||
968 | |||
969 | /* always allocate the top pgd */ | ||
970 | domain->pgd = (struct dma_pte *)alloc_pgtable_page(); | ||
971 | if (!domain->pgd) | ||
972 | return -ENOMEM; | ||
973 | __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K); | ||
974 | return 0; | ||
975 | } | ||
976 | |||
977 | static void domain_exit(struct dmar_domain *domain) | ||
978 | { | ||
979 | u64 end; | ||
980 | |||
981 | /* Domain 0 is reserved, so dont process it */ | ||
982 | if (!domain) | ||
983 | return; | ||
984 | |||
985 | domain_remove_dev_info(domain); | ||
986 | /* destroy iovas */ | ||
987 | put_iova_domain(&domain->iovad); | ||
988 | end = DOMAIN_MAX_ADDR(domain->gaw); | ||
989 | end = end & (~PAGE_MASK_4K); | ||
990 | |||
991 | /* clear ptes */ | ||
992 | dma_pte_clear_range(domain, 0, end); | ||
993 | |||
994 | /* free page tables */ | ||
995 | dma_pte_free_pagetable(domain, 0, end); | ||
996 | |||
997 | iommu_free_domain(domain); | ||
998 | free_domain_mem(domain); | ||
999 | } | ||
1000 | |||
1001 | static int domain_context_mapping_one(struct dmar_domain *domain, | ||
1002 | u8 bus, u8 devfn) | ||
1003 | { | ||
1004 | struct context_entry *context; | ||
1005 | struct intel_iommu *iommu = domain->iommu; | ||
1006 | unsigned long flags; | ||
1007 | |||
1008 | pr_debug("Set context mapping for %02x:%02x.%d\n", | ||
1009 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); | ||
1010 | BUG_ON(!domain->pgd); | ||
1011 | context = device_to_context_entry(iommu, bus, devfn); | ||
1012 | if (!context) | ||
1013 | return -ENOMEM; | ||
1014 | spin_lock_irqsave(&iommu->lock, flags); | ||
1015 | if (context_present(*context)) { | ||
1016 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
1017 | return 0; | ||
1018 | } | ||
1019 | |||
1020 | context_set_domain_id(*context, domain->id); | ||
1021 | context_set_address_width(*context, domain->agaw); | ||
1022 | context_set_address_root(*context, virt_to_phys(domain->pgd)); | ||
1023 | context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); | ||
1024 | context_set_fault_enable(*context); | ||
1025 | context_set_present(*context); | ||
1026 | __iommu_flush_cache(iommu, context, sizeof(*context)); | ||
1027 | |||
1028 | /* it's a non-present to present mapping */ | ||
1029 | if (iommu_flush_context_device(iommu, domain->id, | ||
1030 | (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1)) | ||
1031 | iommu_flush_write_buffer(iommu); | ||
1032 | else | ||
1033 | iommu_flush_iotlb_dsi(iommu, 0, 0); | ||
1034 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
1035 | return 0; | ||
1036 | } | ||
1037 | |||
1038 | static int | ||
1039 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | ||
1040 | { | ||
1041 | int ret; | ||
1042 | struct pci_dev *tmp, *parent; | ||
1043 | |||
1044 | ret = domain_context_mapping_one(domain, pdev->bus->number, | ||
1045 | pdev->devfn); | ||
1046 | if (ret) | ||
1047 | return ret; | ||
1048 | |||
1049 | /* dependent device mapping */ | ||
1050 | tmp = pci_find_upstream_pcie_bridge(pdev); | ||
1051 | if (!tmp) | ||
1052 | return 0; | ||
1053 | /* Secondary interface's bus number and devfn 0 */ | ||
1054 | parent = pdev->bus->self; | ||
1055 | while (parent != tmp) { | ||
1056 | ret = domain_context_mapping_one(domain, parent->bus->number, | ||
1057 | parent->devfn); | ||
1058 | if (ret) | ||
1059 | return ret; | ||
1060 | parent = parent->bus->self; | ||
1061 | } | ||
1062 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ | ||
1063 | return domain_context_mapping_one(domain, | ||
1064 | tmp->subordinate->number, 0); | ||
1065 | else /* this is a legacy PCI bridge */ | ||
1066 | return domain_context_mapping_one(domain, | ||
1067 | tmp->bus->number, tmp->devfn); | ||
1068 | } | ||
1069 | |||
1070 | static int domain_context_mapped(struct dmar_domain *domain, | ||
1071 | struct pci_dev *pdev) | ||
1072 | { | ||
1073 | int ret; | ||
1074 | struct pci_dev *tmp, *parent; | ||
1075 | |||
1076 | ret = device_context_mapped(domain->iommu, | ||
1077 | pdev->bus->number, pdev->devfn); | ||
1078 | if (!ret) | ||
1079 | return ret; | ||
1080 | /* dependent device mapping */ | ||
1081 | tmp = pci_find_upstream_pcie_bridge(pdev); | ||
1082 | if (!tmp) | ||
1083 | return ret; | ||
1084 | /* Secondary interface's bus number and devfn 0 */ | ||
1085 | parent = pdev->bus->self; | ||
1086 | while (parent != tmp) { | ||
1087 | ret = device_context_mapped(domain->iommu, parent->bus->number, | ||
1088 | parent->devfn); | ||
1089 | if (!ret) | ||
1090 | return ret; | ||
1091 | parent = parent->bus->self; | ||
1092 | } | ||
1093 | if (tmp->is_pcie) | ||
1094 | return device_context_mapped(domain->iommu, | ||
1095 | tmp->subordinate->number, 0); | ||
1096 | else | ||
1097 | return device_context_mapped(domain->iommu, | ||
1098 | tmp->bus->number, tmp->devfn); | ||
1099 | } | ||
1100 | |||
1101 | static int | ||
1102 | domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, | ||
1103 | u64 hpa, size_t size, int prot) | ||
1104 | { | ||
1105 | u64 start_pfn, end_pfn; | ||
1106 | struct dma_pte *pte; | ||
1107 | int index; | ||
1108 | |||
1109 | if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) | ||
1110 | return -EINVAL; | ||
1111 | iova &= PAGE_MASK_4K; | ||
1112 | start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K; | ||
1113 | end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K; | ||
1114 | index = 0; | ||
1115 | while (start_pfn < end_pfn) { | ||
1116 | pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); | ||
1117 | if (!pte) | ||
1118 | return -ENOMEM; | ||
1119 | /* We don't need lock here, nobody else | ||
1120 | * touches the iova range | ||
1121 | */ | ||
1122 | BUG_ON(dma_pte_addr(*pte)); | ||
1123 | dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); | ||
1124 | dma_set_pte_prot(*pte, prot); | ||
1125 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
1126 | start_pfn++; | ||
1127 | index++; | ||
1128 | } | ||
1129 | return 0; | ||
1130 | } | ||
1131 | |||
1132 | static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) | ||
1133 | { | ||
1134 | clear_context_table(domain->iommu, bus, devfn); | ||
1135 | iommu_flush_context_global(domain->iommu, 0); | ||
1136 | iommu_flush_iotlb_global(domain->iommu, 0); | ||
1137 | } | ||
1138 | |||
1139 | static void domain_remove_dev_info(struct dmar_domain *domain) | ||
1140 | { | ||
1141 | struct device_domain_info *info; | ||
1142 | unsigned long flags; | ||
1143 | |||
1144 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1145 | while (!list_empty(&domain->devices)) { | ||
1146 | info = list_entry(domain->devices.next, | ||
1147 | struct device_domain_info, link); | ||
1148 | list_del(&info->link); | ||
1149 | list_del(&info->global); | ||
1150 | if (info->dev) | ||
1151 | info->dev->sysdata = NULL; | ||
1152 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1153 | |||
1154 | detach_domain_for_dev(info->domain, info->bus, info->devfn); | ||
1155 | free_devinfo_mem(info); | ||
1156 | |||
1157 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1158 | } | ||
1159 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1160 | } | ||
1161 | |||
1162 | /* | ||
1163 | * find_domain | ||
1164 | * Note: we use struct pci_dev->sysdata stores the info | ||
1165 | */ | ||
1166 | struct dmar_domain * | ||
1167 | find_domain(struct pci_dev *pdev) | ||
1168 | { | ||
1169 | struct device_domain_info *info; | ||
1170 | |||
1171 | /* No lock here, assumes no domain exit in normal case */ | ||
1172 | info = pdev->sysdata; | ||
1173 | if (info) | ||
1174 | return info->domain; | ||
1175 | return NULL; | ||
1176 | } | ||
1177 | |||
1178 | static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, | ||
1179 | struct pci_dev *dev) | ||
1180 | { | ||
1181 | int index; | ||
1182 | |||
1183 | while (dev) { | ||
1184 | for (index = 0; index < cnt; index ++) | ||
1185 | if (dev == devices[index]) | ||
1186 | return 1; | ||
1187 | |||
1188 | /* Check our parent */ | ||
1189 | dev = dev->bus->self; | ||
1190 | } | ||
1191 | |||
1192 | return 0; | ||
1193 | } | ||
1194 | |||
1195 | static struct dmar_drhd_unit * | ||
1196 | dmar_find_matched_drhd_unit(struct pci_dev *dev) | ||
1197 | { | ||
1198 | struct dmar_drhd_unit *drhd = NULL; | ||
1199 | |||
1200 | list_for_each_entry(drhd, &dmar_drhd_units, list) { | ||
1201 | if (drhd->include_all || dmar_pci_device_match(drhd->devices, | ||
1202 | drhd->devices_cnt, dev)) | ||
1203 | return drhd; | ||
1204 | } | ||
1205 | |||
1206 | return NULL; | ||
1207 | } | ||
1208 | |||
1209 | /* domain is initialized */ | ||
1210 | static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) | ||
1211 | { | ||
1212 | struct dmar_domain *domain, *found = NULL; | ||
1213 | struct intel_iommu *iommu; | ||
1214 | struct dmar_drhd_unit *drhd; | ||
1215 | struct device_domain_info *info, *tmp; | ||
1216 | struct pci_dev *dev_tmp; | ||
1217 | unsigned long flags; | ||
1218 | int bus = 0, devfn = 0; | ||
1219 | |||
1220 | domain = find_domain(pdev); | ||
1221 | if (domain) | ||
1222 | return domain; | ||
1223 | |||
1224 | dev_tmp = pci_find_upstream_pcie_bridge(pdev); | ||
1225 | if (dev_tmp) { | ||
1226 | if (dev_tmp->is_pcie) { | ||
1227 | bus = dev_tmp->subordinate->number; | ||
1228 | devfn = 0; | ||
1229 | } else { | ||
1230 | bus = dev_tmp->bus->number; | ||
1231 | devfn = dev_tmp->devfn; | ||
1232 | } | ||
1233 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1234 | list_for_each_entry(info, &device_domain_list, global) { | ||
1235 | if (info->bus == bus && info->devfn == devfn) { | ||
1236 | found = info->domain; | ||
1237 | break; | ||
1238 | } | ||
1239 | } | ||
1240 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1241 | /* pcie-pci bridge already has a domain, uses it */ | ||
1242 | if (found) { | ||
1243 | domain = found; | ||
1244 | goto found_domain; | ||
1245 | } | ||
1246 | } | ||
1247 | |||
1248 | /* Allocate new domain for the device */ | ||
1249 | drhd = dmar_find_matched_drhd_unit(pdev); | ||
1250 | if (!drhd) { | ||
1251 | printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", | ||
1252 | pci_name(pdev)); | ||
1253 | return NULL; | ||
1254 | } | ||
1255 | iommu = drhd->iommu; | ||
1256 | |||
1257 | domain = iommu_alloc_domain(iommu); | ||
1258 | if (!domain) | ||
1259 | goto error; | ||
1260 | |||
1261 | if (domain_init(domain, gaw)) { | ||
1262 | domain_exit(domain); | ||
1263 | goto error; | ||
1264 | } | ||
1265 | |||
1266 | /* register pcie-to-pci device */ | ||
1267 | if (dev_tmp) { | ||
1268 | info = alloc_devinfo_mem(); | ||
1269 | if (!info) { | ||
1270 | domain_exit(domain); | ||
1271 | goto error; | ||
1272 | } | ||
1273 | info->bus = bus; | ||
1274 | info->devfn = devfn; | ||
1275 | info->dev = NULL; | ||
1276 | info->domain = domain; | ||
1277 | /* This domain is shared by devices under p2p bridge */ | ||
1278 | domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; | ||
1279 | |||
1280 | /* pcie-to-pci bridge already has a domain, uses it */ | ||
1281 | found = NULL; | ||
1282 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1283 | list_for_each_entry(tmp, &device_domain_list, global) { | ||
1284 | if (tmp->bus == bus && tmp->devfn == devfn) { | ||
1285 | found = tmp->domain; | ||
1286 | break; | ||
1287 | } | ||
1288 | } | ||
1289 | if (found) { | ||
1290 | free_devinfo_mem(info); | ||
1291 | domain_exit(domain); | ||
1292 | domain = found; | ||
1293 | } else { | ||
1294 | list_add(&info->link, &domain->devices); | ||
1295 | list_add(&info->global, &device_domain_list); | ||
1296 | } | ||
1297 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1298 | } | ||
1299 | |||
1300 | found_domain: | ||
1301 | info = alloc_devinfo_mem(); | ||
1302 | if (!info) | ||
1303 | goto error; | ||
1304 | info->bus = pdev->bus->number; | ||
1305 | info->devfn = pdev->devfn; | ||
1306 | info->dev = pdev; | ||
1307 | info->domain = domain; | ||
1308 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1309 | /* somebody is fast */ | ||
1310 | found = find_domain(pdev); | ||
1311 | if (found != NULL) { | ||
1312 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1313 | if (found != domain) { | ||
1314 | domain_exit(domain); | ||
1315 | domain = found; | ||
1316 | } | ||
1317 | free_devinfo_mem(info); | ||
1318 | return domain; | ||
1319 | } | ||
1320 | list_add(&info->link, &domain->devices); | ||
1321 | list_add(&info->global, &device_domain_list); | ||
1322 | pdev->sysdata = info; | ||
1323 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1324 | return domain; | ||
1325 | error: | ||
1326 | /* recheck it here, maybe others set it */ | ||
1327 | return find_domain(pdev); | ||
1328 | } | ||
1329 | |||
1330 | static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end) | ||
1331 | { | ||
1332 | struct dmar_domain *domain; | ||
1333 | unsigned long size; | ||
1334 | u64 base; | ||
1335 | int ret; | ||
1336 | |||
1337 | printk(KERN_INFO | ||
1338 | "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", | ||
1339 | pci_name(pdev), start, end); | ||
1340 | /* page table init */ | ||
1341 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
1342 | if (!domain) | ||
1343 | return -ENOMEM; | ||
1344 | |||
1345 | /* The address might not be aligned */ | ||
1346 | base = start & PAGE_MASK_4K; | ||
1347 | size = end - base; | ||
1348 | size = PAGE_ALIGN_4K(size); | ||
1349 | if (!reserve_iova(&domain->iovad, IOVA_PFN(base), | ||
1350 | IOVA_PFN(base + size) - 1)) { | ||
1351 | printk(KERN_ERR "IOMMU: reserve iova failed\n"); | ||
1352 | ret = -ENOMEM; | ||
1353 | goto error; | ||
1354 | } | ||
1355 | |||
1356 | pr_debug("Mapping reserved region %lx@%llx for %s\n", | ||
1357 | size, base, pci_name(pdev)); | ||
1358 | /* | ||
1359 | * RMRR range might have overlap with physical memory range, | ||
1360 | * clear it first | ||
1361 | */ | ||
1362 | dma_pte_clear_range(domain, base, base + size); | ||
1363 | |||
1364 | ret = domain_page_mapping(domain, base, base, size, | ||
1365 | DMA_PTE_READ|DMA_PTE_WRITE); | ||
1366 | if (ret) | ||
1367 | goto error; | ||
1368 | |||
1369 | /* context entry init */ | ||
1370 | ret = domain_context_mapping(domain, pdev); | ||
1371 | if (!ret) | ||
1372 | return 0; | ||
1373 | error: | ||
1374 | domain_exit(domain); | ||
1375 | return ret; | ||
1376 | |||
1377 | } | ||
1378 | |||
1379 | static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, | ||
1380 | struct pci_dev *pdev) | ||
1381 | { | ||
1382 | if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) | ||
1383 | return 0; | ||
1384 | return iommu_prepare_identity_map(pdev, rmrr->base_address, | ||
1385 | rmrr->end_address + 1); | ||
1386 | } | ||
1387 | |||
1388 | int __init init_dmars(void) | ||
1389 | { | ||
1390 | struct dmar_drhd_unit *drhd; | ||
1391 | struct dmar_rmrr_unit *rmrr; | ||
1392 | struct pci_dev *pdev; | ||
1393 | struct intel_iommu *iommu; | ||
1394 | int ret, unit = 0; | ||
1395 | |||
1396 | /* | ||
1397 | * for each drhd | ||
1398 | * allocate root | ||
1399 | * initialize and program root entry to not present | ||
1400 | * endfor | ||
1401 | */ | ||
1402 | for_each_drhd_unit(drhd) { | ||
1403 | if (drhd->ignored) | ||
1404 | continue; | ||
1405 | iommu = alloc_iommu(drhd); | ||
1406 | if (!iommu) { | ||
1407 | ret = -ENOMEM; | ||
1408 | goto error; | ||
1409 | } | ||
1410 | |||
1411 | /* | ||
1412 | * TBD: | ||
1413 | * we could share the same root & context tables | ||
1414 | * amoung all IOMMU's. Need to Split it later. | ||
1415 | */ | ||
1416 | ret = iommu_alloc_root_entry(iommu); | ||
1417 | if (ret) { | ||
1418 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); | ||
1419 | goto error; | ||
1420 | } | ||
1421 | } | ||
1422 | |||
1423 | /* | ||
1424 | * For each rmrr | ||
1425 | * for each dev attached to rmrr | ||
1426 | * do | ||
1427 | * locate drhd for dev, alloc domain for dev | ||
1428 | * allocate free domain | ||
1429 | * allocate page table entries for rmrr | ||
1430 | * if context not allocated for bus | ||
1431 | * allocate and init context | ||
1432 | * set present in root table for this bus | ||
1433 | * init context with domain, translation etc | ||
1434 | * endfor | ||
1435 | * endfor | ||
1436 | */ | ||
1437 | for_each_rmrr_units(rmrr) { | ||
1438 | int i; | ||
1439 | for (i = 0; i < rmrr->devices_cnt; i++) { | ||
1440 | pdev = rmrr->devices[i]; | ||
1441 | /* some BIOS lists non-exist devices in DMAR table */ | ||
1442 | if (!pdev) | ||
1443 | continue; | ||
1444 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | ||
1445 | if (ret) | ||
1446 | printk(KERN_ERR | ||
1447 | "IOMMU: mapping reserved region failed\n"); | ||
1448 | } | ||
1449 | } | ||
1450 | |||
1451 | /* | ||
1452 | * for each drhd | ||
1453 | * enable fault log | ||
1454 | * global invalidate context cache | ||
1455 | * global invalidate iotlb | ||
1456 | * enable translation | ||
1457 | */ | ||
1458 | for_each_drhd_unit(drhd) { | ||
1459 | if (drhd->ignored) | ||
1460 | continue; | ||
1461 | iommu = drhd->iommu; | ||
1462 | sprintf (iommu->name, "dmar%d", unit++); | ||
1463 | |||
1464 | iommu_flush_write_buffer(iommu); | ||
1465 | |||
1466 | iommu_set_root_entry(iommu); | ||
1467 | |||
1468 | iommu_flush_context_global(iommu, 0); | ||
1469 | iommu_flush_iotlb_global(iommu, 0); | ||
1470 | |||
1471 | ret = iommu_enable_translation(iommu); | ||
1472 | if (ret) | ||
1473 | goto error; | ||
1474 | } | ||
1475 | |||
1476 | return 0; | ||
1477 | error: | ||
1478 | for_each_drhd_unit(drhd) { | ||
1479 | if (drhd->ignored) | ||
1480 | continue; | ||
1481 | iommu = drhd->iommu; | ||
1482 | free_iommu(iommu); | ||
1483 | } | ||
1484 | return ret; | ||
1485 | } | ||
1486 | |||
1487 | static inline u64 aligned_size(u64 host_addr, size_t size) | ||
1488 | { | ||
1489 | u64 addr; | ||
1490 | addr = (host_addr & (~PAGE_MASK_4K)) + size; | ||
1491 | return PAGE_ALIGN_4K(addr); | ||
1492 | } | ||
1493 | |||
1494 | struct iova * | ||
1495 | iommu_alloc_iova(struct dmar_domain *domain, void *host_addr, size_t size, | ||
1496 | u64 start, u64 end) | ||
1497 | { | ||
1498 | u64 start_addr; | ||
1499 | struct iova *piova; | ||
1500 | |||
1501 | /* Make sure it's in range */ | ||
1502 | if ((start > DOMAIN_MAX_ADDR(domain->gaw)) || end < start) | ||
1503 | return NULL; | ||
1504 | |||
1505 | end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); | ||
1506 | start_addr = PAGE_ALIGN_4K(start); | ||
1507 | size = aligned_size((u64)host_addr, size); | ||
1508 | if (!size || (start_addr + size > end)) | ||
1509 | return NULL; | ||
1510 | |||
1511 | piova = alloc_iova(&domain->iovad, | ||
1512 | size >> PAGE_SHIFT_4K, IOVA_PFN(end)); | ||
1513 | |||
1514 | return piova; | ||
1515 | } | ||
1516 | |||
1517 | static dma_addr_t __intel_map_single(struct device *dev, void *addr, | ||
1518 | size_t size, int dir, u64 *flush_addr, unsigned int *flush_size) | ||
1519 | { | ||
1520 | struct dmar_domain *domain; | ||
1521 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1522 | int ret; | ||
1523 | int prot = 0; | ||
1524 | struct iova *iova = NULL; | ||
1525 | u64 start_addr; | ||
1526 | |||
1527 | addr = (void *)virt_to_phys(addr); | ||
1528 | |||
1529 | domain = get_domain_for_dev(pdev, | ||
1530 | DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
1531 | if (!domain) { | ||
1532 | printk(KERN_ERR | ||
1533 | "Allocating domain for %s failed", pci_name(pdev)); | ||
1534 | return 0; | ||
1535 | } | ||
1536 | |||
1537 | start_addr = IOVA_START_ADDR; | ||
1538 | |||
1539 | if (pdev->dma_mask <= DMA_32BIT_MASK) { | ||
1540 | iova = iommu_alloc_iova(domain, addr, size, start_addr, | ||
1541 | pdev->dma_mask); | ||
1542 | } else { | ||
1543 | /* | ||
1544 | * First try to allocate an io virtual address in | ||
1545 | * DMA_32BIT_MASK and if that fails then try allocating | ||
1546 | * from higer range | ||
1547 | */ | ||
1548 | iova = iommu_alloc_iova(domain, addr, size, start_addr, | ||
1549 | DMA_32BIT_MASK); | ||
1550 | if (!iova) | ||
1551 | iova = iommu_alloc_iova(domain, addr, size, start_addr, | ||
1552 | pdev->dma_mask); | ||
1553 | } | ||
1554 | |||
1555 | if (!iova) { | ||
1556 | printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); | ||
1557 | return 0; | ||
1558 | } | ||
1559 | |||
1560 | /* make sure context mapping is ok */ | ||
1561 | if (unlikely(!domain_context_mapped(domain, pdev))) { | ||
1562 | ret = domain_context_mapping(domain, pdev); | ||
1563 | if (ret) | ||
1564 | goto error; | ||
1565 | } | ||
1566 | |||
1567 | /* | ||
1568 | * Check if DMAR supports zero-length reads on write only | ||
1569 | * mappings.. | ||
1570 | */ | ||
1571 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ | ||
1572 | !cap_zlr(domain->iommu->cap)) | ||
1573 | prot |= DMA_PTE_READ; | ||
1574 | if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) | ||
1575 | prot |= DMA_PTE_WRITE; | ||
1576 | /* | ||
1577 | * addr - (addr + size) might be partial page, we should map the whole | ||
1578 | * page. Note: if two part of one page are separately mapped, we | ||
1579 | * might have two guest_addr mapping to the same host addr, but this | ||
1580 | * is not a big problem | ||
1581 | */ | ||
1582 | ret = domain_page_mapping(domain, iova->pfn_lo << PAGE_SHIFT_4K, | ||
1583 | ((u64)addr) & PAGE_MASK_4K, | ||
1584 | (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, prot); | ||
1585 | if (ret) | ||
1586 | goto error; | ||
1587 | |||
1588 | pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", | ||
1589 | pci_name(pdev), size, (u64)addr, | ||
1590 | (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, | ||
1591 | (u64)(iova->pfn_lo << PAGE_SHIFT_4K), dir); | ||
1592 | |||
1593 | *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
1594 | *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K; | ||
1595 | return (iova->pfn_lo << PAGE_SHIFT_4K) + ((u64)addr & (~PAGE_MASK_4K)); | ||
1596 | error: | ||
1597 | __free_iova(&domain->iovad, iova); | ||
1598 | printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", | ||
1599 | pci_name(pdev), size, (u64)addr, dir); | ||
1600 | return 0; | ||
1601 | } | ||
1602 | |||
1603 | static dma_addr_t intel_map_single(struct device *hwdev, void *addr, | ||
1604 | size_t size, int dir) | ||
1605 | { | ||
1606 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
1607 | dma_addr_t ret; | ||
1608 | struct dmar_domain *domain; | ||
1609 | u64 flush_addr; | ||
1610 | unsigned int flush_size; | ||
1611 | |||
1612 | BUG_ON(dir == DMA_NONE); | ||
1613 | if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) | ||
1614 | return virt_to_bus(addr); | ||
1615 | |||
1616 | ret = __intel_map_single(hwdev, addr, size, | ||
1617 | dir, &flush_addr, &flush_size); | ||
1618 | if (ret) { | ||
1619 | domain = find_domain(pdev); | ||
1620 | /* it's a non-present to present mapping */ | ||
1621 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, | ||
1622 | flush_addr, flush_size >> PAGE_SHIFT_4K, 1)) | ||
1623 | iommu_flush_write_buffer(domain->iommu); | ||
1624 | } | ||
1625 | return ret; | ||
1626 | } | ||
1627 | |||
1628 | static void __intel_unmap_single(struct device *dev, dma_addr_t dev_addr, | ||
1629 | size_t size, int dir, u64 *flush_addr, unsigned int *flush_size) | ||
1630 | { | ||
1631 | struct dmar_domain *domain; | ||
1632 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1633 | struct iova *iova; | ||
1634 | |||
1635 | domain = find_domain(pdev); | ||
1636 | BUG_ON(!domain); | ||
1637 | |||
1638 | iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); | ||
1639 | if (!iova) { | ||
1640 | *flush_size = 0; | ||
1641 | return; | ||
1642 | } | ||
1643 | pr_debug("Device %s unmapping: %lx@%llx\n", | ||
1644 | pci_name(pdev), | ||
1645 | (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, | ||
1646 | (u64)(iova->pfn_lo << PAGE_SHIFT_4K)); | ||
1647 | |||
1648 | *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
1649 | *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K; | ||
1650 | /* clear the whole page, not just dev_addr - (dev_addr + size) */ | ||
1651 | dma_pte_clear_range(domain, *flush_addr, *flush_addr + *flush_size); | ||
1652 | /* free page tables */ | ||
1653 | dma_pte_free_pagetable(domain, *flush_addr, *flush_addr + *flush_size); | ||
1654 | /* free iova */ | ||
1655 | __free_iova(&domain->iovad, iova); | ||
1656 | } | ||
1657 | |||
1658 | static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, | ||
1659 | size_t size, int dir) | ||
1660 | { | ||
1661 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1662 | struct dmar_domain *domain; | ||
1663 | u64 flush_addr; | ||
1664 | unsigned int flush_size; | ||
1665 | |||
1666 | if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) | ||
1667 | return; | ||
1668 | |||
1669 | domain = find_domain(pdev); | ||
1670 | __intel_unmap_single(dev, dev_addr, size, | ||
1671 | dir, &flush_addr, &flush_size); | ||
1672 | if (flush_size == 0) | ||
1673 | return; | ||
1674 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, flush_addr, | ||
1675 | flush_size >> PAGE_SHIFT_4K, 0)) | ||
1676 | iommu_flush_write_buffer(domain->iommu); | ||
1677 | } | ||
1678 | |||
1679 | static void * intel_alloc_coherent(struct device *hwdev, size_t size, | ||
1680 | dma_addr_t *dma_handle, gfp_t flags) | ||
1681 | { | ||
1682 | void *vaddr; | ||
1683 | int order; | ||
1684 | |||
1685 | size = PAGE_ALIGN_4K(size); | ||
1686 | order = get_order(size); | ||
1687 | flags &= ~(GFP_DMA | GFP_DMA32); | ||
1688 | |||
1689 | vaddr = (void *)__get_free_pages(flags, order); | ||
1690 | if (!vaddr) | ||
1691 | return NULL; | ||
1692 | memset(vaddr, 0, size); | ||
1693 | |||
1694 | *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL); | ||
1695 | if (*dma_handle) | ||
1696 | return vaddr; | ||
1697 | free_pages((unsigned long)vaddr, order); | ||
1698 | return NULL; | ||
1699 | } | ||
1700 | |||
1701 | static void intel_free_coherent(struct device *hwdev, size_t size, | ||
1702 | void *vaddr, dma_addr_t dma_handle) | ||
1703 | { | ||
1704 | int order; | ||
1705 | |||
1706 | size = PAGE_ALIGN_4K(size); | ||
1707 | order = get_order(size); | ||
1708 | |||
1709 | intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL); | ||
1710 | free_pages((unsigned long)vaddr, order); | ||
1711 | } | ||
1712 | |||
1713 | static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sg, | ||
1714 | int nelems, int dir) | ||
1715 | { | ||
1716 | int i; | ||
1717 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
1718 | struct dmar_domain *domain; | ||
1719 | u64 flush_addr; | ||
1720 | unsigned int flush_size; | ||
1721 | |||
1722 | if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) | ||
1723 | return; | ||
1724 | |||
1725 | domain = find_domain(pdev); | ||
1726 | for (i = 0; i < nelems; i++, sg++) | ||
1727 | __intel_unmap_single(hwdev, sg->dma_address, | ||
1728 | sg->dma_length, dir, &flush_addr, &flush_size); | ||
1729 | |||
1730 | if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 0)) | ||
1731 | iommu_flush_write_buffer(domain->iommu); | ||
1732 | } | ||
1733 | |||
1734 | #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) | ||
1735 | static int intel_nontranslate_map_sg(struct device *hddev, | ||
1736 | struct scatterlist *sg, int nelems, int dir) | ||
1737 | { | ||
1738 | int i; | ||
1739 | |||
1740 | for (i = 0; i < nelems; i++) { | ||
1741 | struct scatterlist *s = &sg[i]; | ||
1742 | BUG_ON(!s->page); | ||
1743 | s->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(s)); | ||
1744 | s->dma_length = s->length; | ||
1745 | } | ||
1746 | return nelems; | ||
1747 | } | ||
1748 | |||
1749 | static int intel_map_sg(struct device *hwdev, struct scatterlist *sg, | ||
1750 | int nelems, int dir) | ||
1751 | { | ||
1752 | void *addr; | ||
1753 | int i; | ||
1754 | dma_addr_t dma_handle; | ||
1755 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
1756 | struct dmar_domain *domain; | ||
1757 | u64 flush_addr; | ||
1758 | unsigned int flush_size; | ||
1759 | |||
1760 | BUG_ON(dir == DMA_NONE); | ||
1761 | if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) | ||
1762 | return intel_nontranslate_map_sg(hwdev, sg, nelems, dir); | ||
1763 | |||
1764 | for (i = 0; i < nelems; i++, sg++) { | ||
1765 | addr = SG_ENT_VIRT_ADDRESS(sg); | ||
1766 | dma_handle = __intel_map_single(hwdev, addr, | ||
1767 | sg->length, dir, &flush_addr, &flush_size); | ||
1768 | if (!dma_handle) { | ||
1769 | intel_unmap_sg(hwdev, sg - i, i, dir); | ||
1770 | sg[0].dma_length = 0; | ||
1771 | return 0; | ||
1772 | } | ||
1773 | sg->dma_address = dma_handle; | ||
1774 | sg->dma_length = sg->length; | ||
1775 | } | ||
1776 | |||
1777 | domain = find_domain(pdev); | ||
1778 | |||
1779 | /* it's a non-present to present mapping */ | ||
1780 | if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 1)) | ||
1781 | iommu_flush_write_buffer(domain->iommu); | ||
1782 | return nelems; | ||
1783 | } | ||
1784 | |||
1785 | static struct dma_mapping_ops intel_dma_ops = { | ||
1786 | .alloc_coherent = intel_alloc_coherent, | ||
1787 | .free_coherent = intel_free_coherent, | ||
1788 | .map_single = intel_map_single, | ||
1789 | .unmap_single = intel_unmap_single, | ||
1790 | .map_sg = intel_map_sg, | ||
1791 | .unmap_sg = intel_unmap_sg, | ||
1792 | }; | ||
1793 | |||
1794 | static inline int iommu_domain_cache_init(void) | ||
1795 | { | ||
1796 | int ret = 0; | ||
1797 | |||
1798 | iommu_domain_cache = kmem_cache_create("iommu_domain", | ||
1799 | sizeof(struct dmar_domain), | ||
1800 | 0, | ||
1801 | SLAB_HWCACHE_ALIGN, | ||
1802 | |||
1803 | NULL); | ||
1804 | if (!iommu_domain_cache) { | ||
1805 | printk(KERN_ERR "Couldn't create iommu_domain cache\n"); | ||
1806 | ret = -ENOMEM; | ||
1807 | } | ||
1808 | |||
1809 | return ret; | ||
1810 | } | ||
1811 | |||
1812 | static inline int iommu_devinfo_cache_init(void) | ||
1813 | { | ||
1814 | int ret = 0; | ||
1815 | |||
1816 | iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", | ||
1817 | sizeof(struct device_domain_info), | ||
1818 | 0, | ||
1819 | SLAB_HWCACHE_ALIGN, | ||
1820 | |||
1821 | NULL); | ||
1822 | if (!iommu_devinfo_cache) { | ||
1823 | printk(KERN_ERR "Couldn't create devinfo cache\n"); | ||
1824 | ret = -ENOMEM; | ||
1825 | } | ||
1826 | |||
1827 | return ret; | ||
1828 | } | ||
1829 | |||
1830 | static inline int iommu_iova_cache_init(void) | ||
1831 | { | ||
1832 | int ret = 0; | ||
1833 | |||
1834 | iommu_iova_cache = kmem_cache_create("iommu_iova", | ||
1835 | sizeof(struct iova), | ||
1836 | 0, | ||
1837 | SLAB_HWCACHE_ALIGN, | ||
1838 | |||
1839 | NULL); | ||
1840 | if (!iommu_iova_cache) { | ||
1841 | printk(KERN_ERR "Couldn't create iova cache\n"); | ||
1842 | ret = -ENOMEM; | ||
1843 | } | ||
1844 | |||
1845 | return ret; | ||
1846 | } | ||
1847 | |||
1848 | static int __init iommu_init_mempool(void) | ||
1849 | { | ||
1850 | int ret; | ||
1851 | ret = iommu_iova_cache_init(); | ||
1852 | if (ret) | ||
1853 | return ret; | ||
1854 | |||
1855 | ret = iommu_domain_cache_init(); | ||
1856 | if (ret) | ||
1857 | goto domain_error; | ||
1858 | |||
1859 | ret = iommu_devinfo_cache_init(); | ||
1860 | if (!ret) | ||
1861 | return ret; | ||
1862 | |||
1863 | kmem_cache_destroy(iommu_domain_cache); | ||
1864 | domain_error: | ||
1865 | kmem_cache_destroy(iommu_iova_cache); | ||
1866 | |||
1867 | return -ENOMEM; | ||
1868 | } | ||
1869 | |||
1870 | static void __init iommu_exit_mempool(void) | ||
1871 | { | ||
1872 | kmem_cache_destroy(iommu_devinfo_cache); | ||
1873 | kmem_cache_destroy(iommu_domain_cache); | ||
1874 | kmem_cache_destroy(iommu_iova_cache); | ||
1875 | |||
1876 | } | ||
1877 | |||
1878 | void __init detect_intel_iommu(void) | ||
1879 | { | ||
1880 | if (swiotlb || no_iommu || iommu_detected || dmar_disabled) | ||
1881 | return; | ||
1882 | if (early_dmar_detect()) { | ||
1883 | iommu_detected = 1; | ||
1884 | } | ||
1885 | } | ||
1886 | |||
1887 | static void __init init_no_remapping_devices(void) | ||
1888 | { | ||
1889 | struct dmar_drhd_unit *drhd; | ||
1890 | |||
1891 | for_each_drhd_unit(drhd) { | ||
1892 | if (!drhd->include_all) { | ||
1893 | int i; | ||
1894 | for (i = 0; i < drhd->devices_cnt; i++) | ||
1895 | if (drhd->devices[i] != NULL) | ||
1896 | break; | ||
1897 | /* ignore DMAR unit if no pci devices exist */ | ||
1898 | if (i == drhd->devices_cnt) | ||
1899 | drhd->ignored = 1; | ||
1900 | } | ||
1901 | } | ||
1902 | |||
1903 | if (dmar_map_gfx) | ||
1904 | return; | ||
1905 | |||
1906 | for_each_drhd_unit(drhd) { | ||
1907 | int i; | ||
1908 | if (drhd->ignored || drhd->include_all) | ||
1909 | continue; | ||
1910 | |||
1911 | for (i = 0; i < drhd->devices_cnt; i++) | ||
1912 | if (drhd->devices[i] && | ||
1913 | !IS_GFX_DEVICE(drhd->devices[i])) | ||
1914 | break; | ||
1915 | |||
1916 | if (i < drhd->devices_cnt) | ||
1917 | continue; | ||
1918 | |||
1919 | /* bypass IOMMU if it is just for gfx devices */ | ||
1920 | drhd->ignored = 1; | ||
1921 | for (i = 0; i < drhd->devices_cnt; i++) { | ||
1922 | if (!drhd->devices[i]) | ||
1923 | continue; | ||
1924 | drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO; | ||
1925 | } | ||
1926 | } | ||
1927 | } | ||
1928 | |||
1929 | int __init intel_iommu_init(void) | ||
1930 | { | ||
1931 | int ret = 0; | ||
1932 | |||
1933 | if (no_iommu || swiotlb || dmar_disabled) | ||
1934 | return -ENODEV; | ||
1935 | |||
1936 | if (dmar_table_init()) | ||
1937 | return -ENODEV; | ||
1938 | |||
1939 | iommu_init_mempool(); | ||
1940 | dmar_init_reserved_ranges(); | ||
1941 | |||
1942 | init_no_remapping_devices(); | ||
1943 | |||
1944 | ret = init_dmars(); | ||
1945 | if (ret) { | ||
1946 | printk(KERN_ERR "IOMMU: dmar init failed\n"); | ||
1947 | put_iova_domain(&reserved_iova_list); | ||
1948 | iommu_exit_mempool(); | ||
1949 | return ret; | ||
1950 | } | ||
1951 | printk(KERN_INFO | ||
1952 | "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); | ||
1953 | |||
1954 | force_iommu = 1; | ||
1955 | dma_ops = &intel_dma_ops; | ||
1956 | return 0; | ||
1957 | } | ||
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h new file mode 100644 index 000000000000..71dda6b56ffa --- /dev/null +++ b/drivers/pci/intel-iommu.h | |||
@@ -0,0 +1,318 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along with | ||
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
16 | * | ||
17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
18 | * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
19 | */ | ||
20 | |||
21 | #ifndef _INTEL_IOMMU_H_ | ||
22 | #define _INTEL_IOMMU_H_ | ||
23 | |||
24 | #include <linux/types.h> | ||
25 | #include <linux/msi.h> | ||
26 | #include "iova.h" | ||
27 | #include <linux/io.h> | ||
28 | |||
29 | /* | ||
30 | * Intel IOMMU register specification per version 1.0 public spec. | ||
31 | */ | ||
32 | |||
33 | #define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ | ||
34 | #define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ | ||
35 | #define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ | ||
36 | #define DMAR_GCMD_REG 0x18 /* Global command register */ | ||
37 | #define DMAR_GSTS_REG 0x1c /* Global status register */ | ||
38 | #define DMAR_RTADDR_REG 0x20 /* Root entry table */ | ||
39 | #define DMAR_CCMD_REG 0x28 /* Context command reg */ | ||
40 | #define DMAR_FSTS_REG 0x34 /* Fault Status register */ | ||
41 | #define DMAR_FECTL_REG 0x38 /* Fault control register */ | ||
42 | #define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ | ||
43 | #define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ | ||
44 | #define DMAR_FEUADDR_REG 0x44 /* Upper address register */ | ||
45 | #define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ | ||
46 | #define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ | ||
47 | #define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ | ||
48 | #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ | ||
49 | #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ | ||
50 | #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ | ||
51 | |||
52 | #define OFFSET_STRIDE (9) | ||
53 | /* | ||
54 | #define dmar_readl(dmar, reg) readl(dmar + reg) | ||
55 | #define dmar_readq(dmar, reg) ({ \ | ||
56 | u32 lo, hi; \ | ||
57 | lo = readl(dmar + reg); \ | ||
58 | hi = readl(dmar + reg + 4); \ | ||
59 | (((u64) hi) << 32) + lo; }) | ||
60 | */ | ||
61 | static inline u64 dmar_readq(void *addr) | ||
62 | { | ||
63 | u32 lo, hi; | ||
64 | lo = readl(addr); | ||
65 | hi = readl(addr + 4); | ||
66 | return (((u64) hi) << 32) + lo; | ||
67 | } | ||
68 | |||
69 | static inline void dmar_writeq(void __iomem *addr, u64 val) | ||
70 | { | ||
71 | writel((u32)val, addr); | ||
72 | writel((u32)(val >> 32), addr + 4); | ||
73 | } | ||
74 | |||
75 | #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) | ||
76 | #define DMAR_VER_MINOR(v) ((v) & 0x0f) | ||
77 | |||
78 | /* | ||
79 | * Decoding Capability Register | ||
80 | */ | ||
81 | #define cap_read_drain(c) (((c) >> 55) & 1) | ||
82 | #define cap_write_drain(c) (((c) >> 54) & 1) | ||
83 | #define cap_max_amask_val(c) (((c) >> 48) & 0x3f) | ||
84 | #define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) | ||
85 | #define cap_pgsel_inv(c) (((c) >> 39) & 1) | ||
86 | |||
87 | #define cap_super_page_val(c) (((c) >> 34) & 0xf) | ||
88 | #define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ | ||
89 | * OFFSET_STRIDE) + 21) | ||
90 | |||
91 | #define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) | ||
92 | #define cap_max_fault_reg_offset(c) \ | ||
93 | (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) | ||
94 | |||
95 | #define cap_zlr(c) (((c) >> 22) & 1) | ||
96 | #define cap_isoch(c) (((c) >> 23) & 1) | ||
97 | #define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) | ||
98 | #define cap_sagaw(c) (((c) >> 8) & 0x1f) | ||
99 | #define cap_caching_mode(c) (((c) >> 7) & 1) | ||
100 | #define cap_phmr(c) (((c) >> 6) & 1) | ||
101 | #define cap_plmr(c) (((c) >> 5) & 1) | ||
102 | #define cap_rwbf(c) (((c) >> 4) & 1) | ||
103 | #define cap_afl(c) (((c) >> 3) & 1) | ||
104 | #define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) | ||
105 | /* | ||
106 | * Extended Capability Register | ||
107 | */ | ||
108 | |||
109 | #define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) | ||
110 | #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) | ||
111 | #define ecap_max_iotlb_offset(e) \ | ||
112 | (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) | ||
113 | #define ecap_coherent(e) ((e) & 0x1) | ||
114 | |||
115 | |||
116 | /* IOTLB_REG */ | ||
117 | #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) | ||
118 | #define DMA_TLB_DSI_FLUSH (((u64)2) << 60) | ||
119 | #define DMA_TLB_PSI_FLUSH (((u64)3) << 60) | ||
120 | #define DMA_TLB_IIRG(type) ((type >> 60) & 7) | ||
121 | #define DMA_TLB_IAIG(val) (((val) >> 57) & 7) | ||
122 | #define DMA_TLB_READ_DRAIN (((u64)1) << 49) | ||
123 | #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) | ||
124 | #define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) | ||
125 | #define DMA_TLB_IVT (((u64)1) << 63) | ||
126 | #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) | ||
127 | #define DMA_TLB_MAX_SIZE (0x3f) | ||
128 | |||
129 | /* GCMD_REG */ | ||
130 | #define DMA_GCMD_TE (((u32)1) << 31) | ||
131 | #define DMA_GCMD_SRTP (((u32)1) << 30) | ||
132 | #define DMA_GCMD_SFL (((u32)1) << 29) | ||
133 | #define DMA_GCMD_EAFL (((u32)1) << 28) | ||
134 | #define DMA_GCMD_WBF (((u32)1) << 27) | ||
135 | |||
136 | /* GSTS_REG */ | ||
137 | #define DMA_GSTS_TES (((u32)1) << 31) | ||
138 | #define DMA_GSTS_RTPS (((u32)1) << 30) | ||
139 | #define DMA_GSTS_FLS (((u32)1) << 29) | ||
140 | #define DMA_GSTS_AFLS (((u32)1) << 28) | ||
141 | #define DMA_GSTS_WBFS (((u32)1) << 27) | ||
142 | |||
143 | /* CCMD_REG */ | ||
144 | #define DMA_CCMD_ICC (((u64)1) << 63) | ||
145 | #define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) | ||
146 | #define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) | ||
147 | #define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) | ||
148 | #define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) | ||
149 | #define DMA_CCMD_MASK_NOBIT 0 | ||
150 | #define DMA_CCMD_MASK_1BIT 1 | ||
151 | #define DMA_CCMD_MASK_2BIT 2 | ||
152 | #define DMA_CCMD_MASK_3BIT 3 | ||
153 | #define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) | ||
154 | #define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) | ||
155 | |||
156 | /* FECTL_REG */ | ||
157 | #define DMA_FECTL_IM (((u32)1) << 31) | ||
158 | |||
159 | /* FSTS_REG */ | ||
160 | #define DMA_FSTS_PPF ((u32)2) | ||
161 | #define DMA_FSTS_PFO ((u32)1) | ||
162 | #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) | ||
163 | |||
164 | /* FRCD_REG, 32 bits access */ | ||
165 | #define DMA_FRCD_F (((u32)1) << 31) | ||
166 | #define dma_frcd_type(d) ((d >> 30) & 1) | ||
167 | #define dma_frcd_fault_reason(c) (c & 0xff) | ||
168 | #define dma_frcd_source_id(c) (c & 0xffff) | ||
169 | #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ | ||
170 | |||
171 | /* | ||
172 | * 0: Present | ||
173 | * 1-11: Reserved | ||
174 | * 12-63: Context Ptr (12 - (haw-1)) | ||
175 | * 64-127: Reserved | ||
176 | */ | ||
177 | struct root_entry { | ||
178 | u64 val; | ||
179 | u64 rsvd1; | ||
180 | }; | ||
181 | #define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) | ||
182 | static inline bool root_present(struct root_entry *root) | ||
183 | { | ||
184 | return (root->val & 1); | ||
185 | } | ||
186 | static inline void set_root_present(struct root_entry *root) | ||
187 | { | ||
188 | root->val |= 1; | ||
189 | } | ||
190 | static inline void set_root_value(struct root_entry *root, unsigned long value) | ||
191 | { | ||
192 | root->val |= value & PAGE_MASK_4K; | ||
193 | } | ||
194 | |||
195 | struct context_entry; | ||
196 | static inline struct context_entry * | ||
197 | get_context_addr_from_root(struct root_entry *root) | ||
198 | { | ||
199 | return (struct context_entry *) | ||
200 | (root_present(root)?phys_to_virt( | ||
201 | root->val & PAGE_MASK_4K): | ||
202 | NULL); | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * low 64 bits: | ||
207 | * 0: present | ||
208 | * 1: fault processing disable | ||
209 | * 2-3: translation type | ||
210 | * 12-63: address space root | ||
211 | * high 64 bits: | ||
212 | * 0-2: address width | ||
213 | * 3-6: aval | ||
214 | * 8-23: domain id | ||
215 | */ | ||
216 | struct context_entry { | ||
217 | u64 lo; | ||
218 | u64 hi; | ||
219 | }; | ||
220 | #define context_present(c) ((c).lo & 1) | ||
221 | #define context_fault_disable(c) (((c).lo >> 1) & 1) | ||
222 | #define context_translation_type(c) (((c).lo >> 2) & 3) | ||
223 | #define context_address_root(c) ((c).lo & PAGE_MASK_4K) | ||
224 | #define context_address_width(c) ((c).hi & 7) | ||
225 | #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) | ||
226 | |||
227 | #define context_set_present(c) do {(c).lo |= 1;} while (0) | ||
228 | #define context_set_fault_enable(c) \ | ||
229 | do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) | ||
230 | #define context_set_translation_type(c, val) \ | ||
231 | do { \ | ||
232 | (c).lo &= (((u64)-1) << 4) | 3; \ | ||
233 | (c).lo |= ((val) & 3) << 2; \ | ||
234 | } while (0) | ||
235 | #define CONTEXT_TT_MULTI_LEVEL 0 | ||
236 | #define context_set_address_root(c, val) \ | ||
237 | do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) | ||
238 | #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) | ||
239 | #define context_set_domain_id(c, val) \ | ||
240 | do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) | ||
241 | #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) | ||
242 | |||
243 | /* | ||
244 | * 0: readable | ||
245 | * 1: writable | ||
246 | * 2-6: reserved | ||
247 | * 7: super page | ||
248 | * 8-11: available | ||
249 | * 12-63: Host physcial address | ||
250 | */ | ||
251 | struct dma_pte { | ||
252 | u64 val; | ||
253 | }; | ||
254 | #define dma_clear_pte(p) do {(p).val = 0;} while (0) | ||
255 | |||
256 | #define DMA_PTE_READ (1) | ||
257 | #define DMA_PTE_WRITE (2) | ||
258 | |||
259 | #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) | ||
260 | #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) | ||
261 | #define dma_set_pte_prot(p, prot) \ | ||
262 | do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) | ||
263 | #define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) | ||
264 | #define dma_set_pte_addr(p, addr) do {\ | ||
265 | (p).val |= ((addr) & PAGE_MASK_4K); } while (0) | ||
266 | #define dma_pte_present(p) (((p).val & 3) != 0) | ||
267 | |||
268 | struct intel_iommu; | ||
269 | |||
270 | struct dmar_domain { | ||
271 | int id; /* domain id */ | ||
272 | struct intel_iommu *iommu; /* back pointer to owning iommu */ | ||
273 | |||
274 | struct list_head devices; /* all devices' list */ | ||
275 | struct iova_domain iovad; /* iova's that belong to this domain */ | ||
276 | |||
277 | struct dma_pte *pgd; /* virtual address */ | ||
278 | spinlock_t mapping_lock; /* page table lock */ | ||
279 | int gaw; /* max guest address width */ | ||
280 | |||
281 | /* adjusted guest address width, 0 is level 2 30-bit */ | ||
282 | int agaw; | ||
283 | |||
284 | #define DOMAIN_FLAG_MULTIPLE_DEVICES 1 | ||
285 | int flags; | ||
286 | }; | ||
287 | |||
288 | /* PCI domain-device relationship */ | ||
289 | struct device_domain_info { | ||
290 | struct list_head link; /* link to domain siblings */ | ||
291 | struct list_head global; /* link to global list */ | ||
292 | u8 bus; /* PCI bus numer */ | ||
293 | u8 devfn; /* PCI devfn number */ | ||
294 | struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ | ||
295 | struct dmar_domain *domain; /* pointer to domain */ | ||
296 | }; | ||
297 | |||
298 | extern int init_dmars(void); | ||
299 | |||
300 | struct intel_iommu { | ||
301 | void __iomem *reg; /* Pointer to hardware regs, virtual addr */ | ||
302 | u64 cap; | ||
303 | u64 ecap; | ||
304 | unsigned long *domain_ids; /* bitmap of domains */ | ||
305 | struct dmar_domain **domains; /* ptr to domains */ | ||
306 | int seg; | ||
307 | u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ | ||
308 | spinlock_t lock; /* protect context, domain ids */ | ||
309 | spinlock_t register_lock; /* protect register handling */ | ||
310 | struct root_entry *root_entry; /* virtual address */ | ||
311 | |||
312 | unsigned int irq; | ||
313 | unsigned char name[7]; /* Device Name */ | ||
314 | struct msi_msg saved_msg; | ||
315 | struct sys_device sysdev; | ||
316 | }; | ||
317 | |||
318 | #endif | ||
diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8d3e0e38ca4d..7d683dc8ed1e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h | |||
@@ -23,7 +23,14 @@ | |||
23 | 23 | ||
24 | #include <linux/acpi.h> | 24 | #include <linux/acpi.h> |
25 | #include <linux/types.h> | 25 | #include <linux/types.h> |
26 | #include <linux/msi.h> | ||
26 | 27 | ||
28 | #ifdef CONFIG_DMAR | ||
29 | struct intel_iommu; | ||
30 | |||
31 | /* Intel IOMMU detection and initialization functions */ | ||
32 | extern void detect_intel_iommu(void); | ||
33 | extern int intel_iommu_init(void); | ||
27 | 34 | ||
28 | extern int dmar_table_init(void); | 35 | extern int dmar_table_init(void); |
29 | extern int early_dmar_detect(void); | 36 | extern int early_dmar_detect(void); |
@@ -49,4 +56,19 @@ struct dmar_rmrr_unit { | |||
49 | int devices_cnt; /* target device count */ | 56 | int devices_cnt; /* target device count */ |
50 | }; | 57 | }; |
51 | 58 | ||
59 | #define for_each_drhd_unit(drhd) \ | ||
60 | list_for_each_entry(drhd, &dmar_drhd_units, list) | ||
61 | #define for_each_rmrr_units(rmrr) \ | ||
62 | list_for_each_entry(rmrr, &dmar_rmrr_units, list) | ||
63 | #else | ||
64 | static inline void detect_intel_iommu(void) | ||
65 | { | ||
66 | return; | ||
67 | } | ||
68 | static inline int intel_iommu_init(void) | ||
69 | { | ||
70 | return -ENODEV; | ||
71 | } | ||
72 | |||
73 | #endif /* !CONFIG_DMAR */ | ||
52 | #endif /* __DMAR_H__ */ | 74 | #endif /* __DMAR_H__ */ |