aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-11-28 12:46:22 -0500
committerTejun Heo <tj@kernel.org>2011-11-28 12:46:22 -0500
commitd4bbf7e7759afc172e2bfbc5c416324590049cdd (patch)
tree7eab5ee5481cd3dcf1162329fec827177640018a /drivers/iommu
parenta150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff)
parent401d0069cb344f401bc9d264c31db55876ff78c0 (diff)
Merge branch 'master' into x86/memblock
Conflicts & resolutions: * arch/x86/xen/setup.c dc91c728fd "xen: allow extra memory to be in multiple regions" 24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..." conflicted on xen_add_extra_mem() updates. The resolution is trivial as the latter just want to replace memblock_x86_reserve_range() with memblock_reserve(). * drivers/pci/intel-iommu.c 166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/" 5dfe8660a3d "bootmem: Replace work_with_active_regions() with..." conflicted as the former moved the file under drivers/iommu/. Resolved by applying the chnages from the latter on the moved file. * mm/Kconfig 6661672053a "memblock: add NO_BOOTMEM config symbol" c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option" conflicted trivially. Both added config options. Just letting both add their own options resolves the conflict. * mm/memblock.c d1f0ece6cdc "mm/memblock.c: small function definition fixes" ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()" confliected. The former updates function removed by the latter. Resolution is trivial. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig134
-rw-r--r--drivers/iommu/Makefile9
-rw-r--r--drivers/iommu/amd_iommu.c2826
-rw-r--r--drivers/iommu/amd_iommu_init.c1574
-rw-r--r--drivers/iommu/amd_iommu_proto.h54
-rw-r--r--drivers/iommu/amd_iommu_types.h585
-rw-r--r--drivers/iommu/dmar.c1311
-rw-r--r--drivers/iommu/intel-iommu.c4173
-rw-r--r--drivers/iommu/intr_remapping.c834
-rw-r--r--drivers/iommu/intr_remapping.h17
-rw-r--r--drivers/iommu/iommu.c188
-rw-r--r--drivers/iommu/iova.c435
-rw-r--r--drivers/iommu/msm_iommu.c738
-rw-r--r--drivers/iommu/msm_iommu_dev.c422
-rw-r--r--drivers/iommu/omap-iommu-debug.c419
-rw-r--r--drivers/iommu/omap-iommu.c1245
-rw-r--r--drivers/iommu/omap-iovmm.c743
17 files changed, 15707 insertions, 0 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
new file mode 100644
index 000000000000..5414253b185a
--- /dev/null
+++ b/drivers/iommu/Kconfig
@@ -0,0 +1,134 @@
1# IOMMU_API always gets selected by whoever wants it.
2config IOMMU_API
3 bool
4
5menuconfig IOMMU_SUPPORT
6 bool "IOMMU Hardware Support"
7 default y
8 ---help---
9 Say Y here if you want to compile device drivers for IO Memory
10 Management Units into the kernel. These devices usually allow to
11 remap DMA requests and/or remap interrupts from other devices on the
12 system.
13
14if IOMMU_SUPPORT
15
16# MSM IOMMU support
17config MSM_IOMMU
18 bool "MSM IOMMU Support"
19 depends on ARCH_MSM8X60 || ARCH_MSM8960
20 select IOMMU_API
21 help
22 Support for the IOMMUs found on certain Qualcomm SOCs.
23 These IOMMUs allow virtualization of the address space used by most
24 cores within the multimedia subsystem.
25
26 If unsure, say N here.
27
28config IOMMU_PGTABLES_L2
29 def_bool y
30 depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n
31
32# AMD IOMMU support
33config AMD_IOMMU
34 bool "AMD IOMMU support"
35 select SWIOTLB
36 select PCI_MSI
37 select PCI_IOV
38 select IOMMU_API
39 depends on X86_64 && PCI && ACPI
40 ---help---
41 With this option you can enable support for AMD IOMMU hardware in
42 your system. An IOMMU is a hardware component which provides
43 remapping of DMA memory accesses from devices. With an AMD IOMMU you
44 can isolate the the DMA memory of different devices and protect the
45 system from misbehaving device drivers or hardware.
46
47 You can find out if your system has an AMD IOMMU if you look into
48 your BIOS for an option to enable it or if you have an IVRS ACPI
49 table.
50
51config AMD_IOMMU_STATS
52 bool "Export AMD IOMMU statistics to debugfs"
53 depends on AMD_IOMMU
54 select DEBUG_FS
55 ---help---
56 This option enables code in the AMD IOMMU driver to collect various
57 statistics about whats happening in the driver and exports that
58 information to userspace via debugfs.
59 If unsure, say N.
60
61# Intel IOMMU support
62config DMAR_TABLE
63 bool
64
65config INTEL_IOMMU
66 bool "Support for Intel IOMMU using DMA Remapping Devices"
67 depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
68 select IOMMU_API
69 select DMAR_TABLE
70 help
71 DMA remapping (DMAR) devices support enables independent address
72 translations for Direct Memory Access (DMA) from devices.
73 These DMA remapping devices are reported via ACPI tables
74 and include PCI device scope covered by these DMA
75 remapping devices.
76
77config INTEL_IOMMU_DEFAULT_ON
78 def_bool y
79 prompt "Enable Intel DMA Remapping Devices by default"
80 depends on INTEL_IOMMU
81 help
82 Selecting this option will enable a DMAR device at boot time if
83 one is found. If this option is not selected, DMAR support can
84 be enabled by passing intel_iommu=on to the kernel.
85
86config INTEL_IOMMU_BROKEN_GFX_WA
87 bool "Workaround broken graphics drivers (going away soon)"
88 depends on INTEL_IOMMU && BROKEN && X86
89 ---help---
90 Current Graphics drivers tend to use physical address
91 for DMA and avoid using DMA APIs. Setting this config
92 option permits the IOMMU driver to set a unity map for
93 all the OS-visible memory. Hence the driver can continue
94 to use physical addresses for DMA, at least until this
95 option is removed in the 2.6.32 kernel.
96
97config INTEL_IOMMU_FLOPPY_WA
98 def_bool y
99 depends on INTEL_IOMMU && X86
100 ---help---
101 Floppy disk drivers are known to bypass DMA API calls
102 thereby failing to work when IOMMU is enabled. This
103 workaround will setup a 1:1 mapping for the first
104 16MiB to make floppy (an ISA device) work.
105
106config IRQ_REMAP
107 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
108 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
109 select DMAR_TABLE
110 ---help---
111 Supports Interrupt remapping for IO-APIC and MSI devices.
112 To use x2apic mode in the CPU's which support x2APIC enhancements or
113 to support platforms with CPU's having > 8 bit APIC ID, say Y.
114
115# OMAP IOMMU support
116config OMAP_IOMMU
117 bool "OMAP IOMMU Support"
118 depends on ARCH_OMAP
119 select IOMMU_API
120
121config OMAP_IOVMM
122 tristate "OMAP IO Virtual Memory Manager Support"
123 depends on OMAP_IOMMU
124
125config OMAP_IOMMU_DEBUG
126 tristate "Export OMAP IOMMU/IOVMM internals in DebugFS"
127 depends on OMAP_IOVMM && DEBUG_FS
128 help
129 Select this to see extensive information about
130 the internal state of OMAP IOMMU/IOVMM in debugfs.
131
132 Say N unless you know you need this.
133
134endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
new file mode 100644
index 000000000000..2f4448794bc7
--- /dev/null
+++ b/drivers/iommu/Makefile
@@ -0,0 +1,9 @@
1obj-$(CONFIG_IOMMU_API) += iommu.o
2obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
3obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
4obj-$(CONFIG_DMAR_TABLE) += dmar.o
5obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
6obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
7obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
8obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o
9obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
new file mode 100644
index 000000000000..4ee277a8521a
--- /dev/null
+++ b/drivers/iommu/amd_iommu.c
@@ -0,0 +1,2826 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/pci-ats.h>
22#include <linux/bitmap.h>
23#include <linux/slab.h>
24#include <linux/debugfs.h>
25#include <linux/scatterlist.h>
26#include <linux/dma-mapping.h>
27#include <linux/iommu-helper.h>
28#include <linux/iommu.h>
29#include <linux/delay.h>
30#include <linux/amd-iommu.h>
31#include <asm/msidef.h>
32#include <asm/proto.h>
33#include <asm/iommu.h>
34#include <asm/gart.h>
35#include <asm/dma.h>
36
37#include "amd_iommu_proto.h"
38#include "amd_iommu_types.h"
39
40#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
41
42#define LOOP_TIMEOUT 100000
43
44static DEFINE_RWLOCK(amd_iommu_devtable_lock);
45
46/* A list of preallocated protection domains */
47static LIST_HEAD(iommu_pd_list);
48static DEFINE_SPINLOCK(iommu_pd_list_lock);
49
50/* List of all available dev_data structures */
51static LIST_HEAD(dev_data_list);
52static DEFINE_SPINLOCK(dev_data_list_lock);
53
54/*
55 * Domain for untranslated devices - only allocated
56 * if iommu=pt passed on kernel cmd line.
57 */
58static struct protection_domain *pt_domain;
59
60static struct iommu_ops amd_iommu_ops;
61
62/*
63 * general struct to manage commands send to an IOMMU
64 */
65struct iommu_cmd {
66 u32 data[4];
67};
68
69static void update_domain(struct protection_domain *domain);
70
71/****************************************************************************
72 *
73 * Helper functions
74 *
75 ****************************************************************************/
76
77static struct iommu_dev_data *alloc_dev_data(u16 devid)
78{
79 struct iommu_dev_data *dev_data;
80 unsigned long flags;
81
82 dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
83 if (!dev_data)
84 return NULL;
85
86 dev_data->devid = devid;
87 atomic_set(&dev_data->bind, 0);
88
89 spin_lock_irqsave(&dev_data_list_lock, flags);
90 list_add_tail(&dev_data->dev_data_list, &dev_data_list);
91 spin_unlock_irqrestore(&dev_data_list_lock, flags);
92
93 return dev_data;
94}
95
96static void free_dev_data(struct iommu_dev_data *dev_data)
97{
98 unsigned long flags;
99
100 spin_lock_irqsave(&dev_data_list_lock, flags);
101 list_del(&dev_data->dev_data_list);
102 spin_unlock_irqrestore(&dev_data_list_lock, flags);
103
104 kfree(dev_data);
105}
106
107static struct iommu_dev_data *search_dev_data(u16 devid)
108{
109 struct iommu_dev_data *dev_data;
110 unsigned long flags;
111
112 spin_lock_irqsave(&dev_data_list_lock, flags);
113 list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
114 if (dev_data->devid == devid)
115 goto out_unlock;
116 }
117
118 dev_data = NULL;
119
120out_unlock:
121 spin_unlock_irqrestore(&dev_data_list_lock, flags);
122
123 return dev_data;
124}
125
126static struct iommu_dev_data *find_dev_data(u16 devid)
127{
128 struct iommu_dev_data *dev_data;
129
130 dev_data = search_dev_data(devid);
131
132 if (dev_data == NULL)
133 dev_data = alloc_dev_data(devid);
134
135 return dev_data;
136}
137
138static inline u16 get_device_id(struct device *dev)
139{
140 struct pci_dev *pdev = to_pci_dev(dev);
141
142 return calc_devid(pdev->bus->number, pdev->devfn);
143}
144
145static struct iommu_dev_data *get_dev_data(struct device *dev)
146{
147 return dev->archdata.iommu;
148}
149
150/*
151 * In this function the list of preallocated protection domains is traversed to
152 * find the domain for a specific device
153 */
154static struct dma_ops_domain *find_protection_domain(u16 devid)
155{
156 struct dma_ops_domain *entry, *ret = NULL;
157 unsigned long flags;
158 u16 alias = amd_iommu_alias_table[devid];
159
160 if (list_empty(&iommu_pd_list))
161 return NULL;
162
163 spin_lock_irqsave(&iommu_pd_list_lock, flags);
164
165 list_for_each_entry(entry, &iommu_pd_list, list) {
166 if (entry->target_dev == devid ||
167 entry->target_dev == alias) {
168 ret = entry;
169 break;
170 }
171 }
172
173 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
174
175 return ret;
176}
177
178/*
179 * This function checks if the driver got a valid device from the caller to
180 * avoid dereferencing invalid pointers.
181 */
182static bool check_device(struct device *dev)
183{
184 u16 devid;
185
186 if (!dev || !dev->dma_mask)
187 return false;
188
189 /* No device or no PCI device */
190 if (dev->bus != &pci_bus_type)
191 return false;
192
193 devid = get_device_id(dev);
194
195 /* Out of our scope? */
196 if (devid > amd_iommu_last_bdf)
197 return false;
198
199 if (amd_iommu_rlookup_table[devid] == NULL)
200 return false;
201
202 return true;
203}
204
205static int iommu_init_device(struct device *dev)
206{
207 struct iommu_dev_data *dev_data;
208 u16 alias;
209
210 if (dev->archdata.iommu)
211 return 0;
212
213 dev_data = find_dev_data(get_device_id(dev));
214 if (!dev_data)
215 return -ENOMEM;
216
217 alias = amd_iommu_alias_table[dev_data->devid];
218 if (alias != dev_data->devid) {
219 struct iommu_dev_data *alias_data;
220
221 alias_data = find_dev_data(alias);
222 if (alias_data == NULL) {
223 pr_err("AMD-Vi: Warning: Unhandled device %s\n",
224 dev_name(dev));
225 free_dev_data(dev_data);
226 return -ENOTSUPP;
227 }
228 dev_data->alias_data = alias_data;
229 }
230
231 dev->archdata.iommu = dev_data;
232
233 return 0;
234}
235
236static void iommu_ignore_device(struct device *dev)
237{
238 u16 devid, alias;
239
240 devid = get_device_id(dev);
241 alias = amd_iommu_alias_table[devid];
242
243 memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
244 memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
245
246 amd_iommu_rlookup_table[devid] = NULL;
247 amd_iommu_rlookup_table[alias] = NULL;
248}
249
250static void iommu_uninit_device(struct device *dev)
251{
252 /*
253 * Nothing to do here - we keep dev_data around for unplugged devices
254 * and reuse it when the device is re-plugged - not doing so would
255 * introduce a ton of races.
256 */
257}
258
259void __init amd_iommu_uninit_devices(void)
260{
261 struct iommu_dev_data *dev_data, *n;
262 struct pci_dev *pdev = NULL;
263
264 for_each_pci_dev(pdev) {
265
266 if (!check_device(&pdev->dev))
267 continue;
268
269 iommu_uninit_device(&pdev->dev);
270 }
271
272 /* Free all of our dev_data structures */
273 list_for_each_entry_safe(dev_data, n, &dev_data_list, dev_data_list)
274 free_dev_data(dev_data);
275}
276
277int __init amd_iommu_init_devices(void)
278{
279 struct pci_dev *pdev = NULL;
280 int ret = 0;
281
282 for_each_pci_dev(pdev) {
283
284 if (!check_device(&pdev->dev))
285 continue;
286
287 ret = iommu_init_device(&pdev->dev);
288 if (ret == -ENOTSUPP)
289 iommu_ignore_device(&pdev->dev);
290 else if (ret)
291 goto out_free;
292 }
293
294 return 0;
295
296out_free:
297
298 amd_iommu_uninit_devices();
299
300 return ret;
301}
302#ifdef CONFIG_AMD_IOMMU_STATS
303
304/*
305 * Initialization code for statistics collection
306 */
307
308DECLARE_STATS_COUNTER(compl_wait);
309DECLARE_STATS_COUNTER(cnt_map_single);
310DECLARE_STATS_COUNTER(cnt_unmap_single);
311DECLARE_STATS_COUNTER(cnt_map_sg);
312DECLARE_STATS_COUNTER(cnt_unmap_sg);
313DECLARE_STATS_COUNTER(cnt_alloc_coherent);
314DECLARE_STATS_COUNTER(cnt_free_coherent);
315DECLARE_STATS_COUNTER(cross_page);
316DECLARE_STATS_COUNTER(domain_flush_single);
317DECLARE_STATS_COUNTER(domain_flush_all);
318DECLARE_STATS_COUNTER(alloced_io_mem);
319DECLARE_STATS_COUNTER(total_map_requests);
320
321static struct dentry *stats_dir;
322static struct dentry *de_fflush;
323
324static void amd_iommu_stats_add(struct __iommu_counter *cnt)
325{
326 if (stats_dir == NULL)
327 return;
328
329 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
330 &cnt->value);
331}
332
333static void amd_iommu_stats_init(void)
334{
335 stats_dir = debugfs_create_dir("amd-iommu", NULL);
336 if (stats_dir == NULL)
337 return;
338
339 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
340 (u32 *)&amd_iommu_unmap_flush);
341
342 amd_iommu_stats_add(&compl_wait);
343 amd_iommu_stats_add(&cnt_map_single);
344 amd_iommu_stats_add(&cnt_unmap_single);
345 amd_iommu_stats_add(&cnt_map_sg);
346 amd_iommu_stats_add(&cnt_unmap_sg);
347 amd_iommu_stats_add(&cnt_alloc_coherent);
348 amd_iommu_stats_add(&cnt_free_coherent);
349 amd_iommu_stats_add(&cross_page);
350 amd_iommu_stats_add(&domain_flush_single);
351 amd_iommu_stats_add(&domain_flush_all);
352 amd_iommu_stats_add(&alloced_io_mem);
353 amd_iommu_stats_add(&total_map_requests);
354}
355
356#endif
357
358/****************************************************************************
359 *
360 * Interrupt handling functions
361 *
362 ****************************************************************************/
363
364static void dump_dte_entry(u16 devid)
365{
366 int i;
367
368 for (i = 0; i < 8; ++i)
369 pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
370 amd_iommu_dev_table[devid].data[i]);
371}
372
373static void dump_command(unsigned long phys_addr)
374{
375 struct iommu_cmd *cmd = phys_to_virt(phys_addr);
376 int i;
377
378 for (i = 0; i < 4; ++i)
379 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
380}
381
382static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
383{
384 u32 *event = __evt;
385 int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
386 int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
387 int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
388 int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
389 u64 address = (u64)(((u64)event[3]) << 32) | event[2];
390
391 printk(KERN_ERR "AMD-Vi: Event logged [");
392
393 switch (type) {
394 case EVENT_TYPE_ILL_DEV:
395 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
396 "address=0x%016llx flags=0x%04x]\n",
397 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
398 address, flags);
399 dump_dte_entry(devid);
400 break;
401 case EVENT_TYPE_IO_FAULT:
402 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
403 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
404 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
405 domid, address, flags);
406 break;
407 case EVENT_TYPE_DEV_TAB_ERR:
408 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
409 "address=0x%016llx flags=0x%04x]\n",
410 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
411 address, flags);
412 break;
413 case EVENT_TYPE_PAGE_TAB_ERR:
414 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
415 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
416 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
417 domid, address, flags);
418 break;
419 case EVENT_TYPE_ILL_CMD:
420 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
421 dump_command(address);
422 break;
423 case EVENT_TYPE_CMD_HARD_ERR:
424 printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
425 "flags=0x%04x]\n", address, flags);
426 break;
427 case EVENT_TYPE_IOTLB_INV_TO:
428 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
429 "address=0x%016llx]\n",
430 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
431 address);
432 break;
433 case EVENT_TYPE_INV_DEV_REQ:
434 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
435 "address=0x%016llx flags=0x%04x]\n",
436 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
437 address, flags);
438 break;
439 default:
440 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
441 }
442}
443
444static void iommu_poll_events(struct amd_iommu *iommu)
445{
446 u32 head, tail;
447 unsigned long flags;
448
449 spin_lock_irqsave(&iommu->lock, flags);
450
451 head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
452 tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
453
454 while (head != tail) {
455 iommu_print_event(iommu, iommu->evt_buf + head);
456 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
457 }
458
459 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
460
461 spin_unlock_irqrestore(&iommu->lock, flags);
462}
463
464irqreturn_t amd_iommu_int_thread(int irq, void *data)
465{
466 struct amd_iommu *iommu;
467
468 for_each_iommu(iommu)
469 iommu_poll_events(iommu);
470
471 return IRQ_HANDLED;
472}
473
474irqreturn_t amd_iommu_int_handler(int irq, void *data)
475{
476 return IRQ_WAKE_THREAD;
477}
478
479/****************************************************************************
480 *
481 * IOMMU command queuing functions
482 *
483 ****************************************************************************/
484
485static int wait_on_sem(volatile u64 *sem)
486{
487 int i = 0;
488
489 while (*sem == 0 && i < LOOP_TIMEOUT) {
490 udelay(1);
491 i += 1;
492 }
493
494 if (i == LOOP_TIMEOUT) {
495 pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
496 return -EIO;
497 }
498
499 return 0;
500}
501
502static void copy_cmd_to_buffer(struct amd_iommu *iommu,
503 struct iommu_cmd *cmd,
504 u32 tail)
505{
506 u8 *target;
507
508 target = iommu->cmd_buf + tail;
509 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
510
511 /* Copy command to buffer */
512 memcpy(target, cmd, sizeof(*cmd));
513
514 /* Tell the IOMMU about it */
515 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
516}
517
518static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
519{
520 WARN_ON(address & 0x7ULL);
521
522 memset(cmd, 0, sizeof(*cmd));
523 cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
524 cmd->data[1] = upper_32_bits(__pa(address));
525 cmd->data[2] = 1;
526 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
527}
528
529static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
530{
531 memset(cmd, 0, sizeof(*cmd));
532 cmd->data[0] = devid;
533 CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
534}
535
536static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
537 size_t size, u16 domid, int pde)
538{
539 u64 pages;
540 int s;
541
542 pages = iommu_num_pages(address, size, PAGE_SIZE);
543 s = 0;
544
545 if (pages > 1) {
546 /*
547 * If we have to flush more than one page, flush all
548 * TLB entries for this domain
549 */
550 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
551 s = 1;
552 }
553
554 address &= PAGE_MASK;
555
556 memset(cmd, 0, sizeof(*cmd));
557 cmd->data[1] |= domid;
558 cmd->data[2] = lower_32_bits(address);
559 cmd->data[3] = upper_32_bits(address);
560 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
561 if (s) /* size bit - we flush more than one 4kb page */
562 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
563 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
564 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
565}
566
567static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
568 u64 address, size_t size)
569{
570 u64 pages;
571 int s;
572
573 pages = iommu_num_pages(address, size, PAGE_SIZE);
574 s = 0;
575
576 if (pages > 1) {
577 /*
578 * If we have to flush more than one page, flush all
579 * TLB entries for this domain
580 */
581 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
582 s = 1;
583 }
584
585 address &= PAGE_MASK;
586
587 memset(cmd, 0, sizeof(*cmd));
588 cmd->data[0] = devid;
589 cmd->data[0] |= (qdep & 0xff) << 24;
590 cmd->data[1] = devid;
591 cmd->data[2] = lower_32_bits(address);
592 cmd->data[3] = upper_32_bits(address);
593 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
594 if (s)
595 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
596}
597
598static void build_inv_all(struct iommu_cmd *cmd)
599{
600 memset(cmd, 0, sizeof(*cmd));
601 CMD_SET_TYPE(cmd, CMD_INV_ALL);
602}
603
604/*
605 * Writes the command to the IOMMUs command buffer and informs the
606 * hardware about the new command.
607 */
608static int iommu_queue_command_sync(struct amd_iommu *iommu,
609 struct iommu_cmd *cmd,
610 bool sync)
611{
612 u32 left, tail, head, next_tail;
613 unsigned long flags;
614
615 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
616
617again:
618 spin_lock_irqsave(&iommu->lock, flags);
619
620 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
621 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
622 next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
623 left = (head - next_tail) % iommu->cmd_buf_size;
624
625 if (left <= 2) {
626 struct iommu_cmd sync_cmd;
627 volatile u64 sem = 0;
628 int ret;
629
630 build_completion_wait(&sync_cmd, (u64)&sem);
631 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
632
633 spin_unlock_irqrestore(&iommu->lock, flags);
634
635 if ((ret = wait_on_sem(&sem)) != 0)
636 return ret;
637
638 goto again;
639 }
640
641 copy_cmd_to_buffer(iommu, cmd, tail);
642
643 /* We need to sync now to make sure all commands are processed */
644 iommu->need_sync = sync;
645
646 spin_unlock_irqrestore(&iommu->lock, flags);
647
648 return 0;
649}
650
651static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
652{
653 return iommu_queue_command_sync(iommu, cmd, true);
654}
655
656/*
657 * This function queues a completion wait command into the command
658 * buffer of an IOMMU
659 */
660static int iommu_completion_wait(struct amd_iommu *iommu)
661{
662 struct iommu_cmd cmd;
663 volatile u64 sem = 0;
664 int ret;
665
666 if (!iommu->need_sync)
667 return 0;
668
669 build_completion_wait(&cmd, (u64)&sem);
670
671 ret = iommu_queue_command_sync(iommu, &cmd, false);
672 if (ret)
673 return ret;
674
675 return wait_on_sem(&sem);
676}
677
678static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
679{
680 struct iommu_cmd cmd;
681
682 build_inv_dte(&cmd, devid);
683
684 return iommu_queue_command(iommu, &cmd);
685}
686
687static void iommu_flush_dte_all(struct amd_iommu *iommu)
688{
689 u32 devid;
690
691 for (devid = 0; devid <= 0xffff; ++devid)
692 iommu_flush_dte(iommu, devid);
693
694 iommu_completion_wait(iommu);
695}
696
697/*
698 * This function uses heavy locking and may disable irqs for some time. But
699 * this is no issue because it is only called during resume.
700 */
701static void iommu_flush_tlb_all(struct amd_iommu *iommu)
702{
703 u32 dom_id;
704
705 for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
706 struct iommu_cmd cmd;
707 build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
708 dom_id, 1);
709 iommu_queue_command(iommu, &cmd);
710 }
711
712 iommu_completion_wait(iommu);
713}
714
715static void iommu_flush_all(struct amd_iommu *iommu)
716{
717 struct iommu_cmd cmd;
718
719 build_inv_all(&cmd);
720
721 iommu_queue_command(iommu, &cmd);
722 iommu_completion_wait(iommu);
723}
724
725void iommu_flush_all_caches(struct amd_iommu *iommu)
726{
727 if (iommu_feature(iommu, FEATURE_IA)) {
728 iommu_flush_all(iommu);
729 } else {
730 iommu_flush_dte_all(iommu);
731 iommu_flush_tlb_all(iommu);
732 }
733}
734
735/*
736 * Command send function for flushing on-device TLB
737 */
738static int device_flush_iotlb(struct iommu_dev_data *dev_data,
739 u64 address, size_t size)
740{
741 struct amd_iommu *iommu;
742 struct iommu_cmd cmd;
743 int qdep;
744
745 qdep = dev_data->ats.qdep;
746 iommu = amd_iommu_rlookup_table[dev_data->devid];
747
748 build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size);
749
750 return iommu_queue_command(iommu, &cmd);
751}
752
753/*
754 * Command send function for invalidating a device table entry
755 */
756static int device_flush_dte(struct iommu_dev_data *dev_data)
757{
758 struct amd_iommu *iommu;
759 int ret;
760
761 iommu = amd_iommu_rlookup_table[dev_data->devid];
762
763 ret = iommu_flush_dte(iommu, dev_data->devid);
764 if (ret)
765 return ret;
766
767 if (dev_data->ats.enabled)
768 ret = device_flush_iotlb(dev_data, 0, ~0UL);
769
770 return ret;
771}
772
773/*
774 * TLB invalidation function which is called from the mapping functions.
775 * It invalidates a single PTE if the range to flush is within a single
776 * page. Otherwise it flushes the whole TLB of the IOMMU.
777 */
778static void __domain_flush_pages(struct protection_domain *domain,
779 u64 address, size_t size, int pde)
780{
781 struct iommu_dev_data *dev_data;
782 struct iommu_cmd cmd;
783 int ret = 0, i;
784
785 build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
786
787 for (i = 0; i < amd_iommus_present; ++i) {
788 if (!domain->dev_iommu[i])
789 continue;
790
791 /*
792 * Devices of this domain are behind this IOMMU
793 * We need a TLB flush
794 */
795 ret |= iommu_queue_command(amd_iommus[i], &cmd);
796 }
797
798 list_for_each_entry(dev_data, &domain->dev_list, list) {
799
800 if (!dev_data->ats.enabled)
801 continue;
802
803 ret |= device_flush_iotlb(dev_data, address, size);
804 }
805
806 WARN_ON(ret);
807}
808
809static void domain_flush_pages(struct protection_domain *domain,
810 u64 address, size_t size)
811{
812 __domain_flush_pages(domain, address, size, 0);
813}
814
815/* Flush the whole IO/TLB for a given protection domain */
816static void domain_flush_tlb(struct protection_domain *domain)
817{
818 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
819}
820
821/* Flush the whole IO/TLB for a given protection domain - including PDE */
822static void domain_flush_tlb_pde(struct protection_domain *domain)
823{
824 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
825}
826
827static void domain_flush_complete(struct protection_domain *domain)
828{
829 int i;
830
831 for (i = 0; i < amd_iommus_present; ++i) {
832 if (!domain->dev_iommu[i])
833 continue;
834
835 /*
836 * Devices of this domain are behind this IOMMU
837 * We need to wait for completion of all commands.
838 */
839 iommu_completion_wait(amd_iommus[i]);
840 }
841}
842
843
844/*
845 * This function flushes the DTEs for all devices in domain
846 */
847static void domain_flush_devices(struct protection_domain *domain)
848{
849 struct iommu_dev_data *dev_data;
850
851 list_for_each_entry(dev_data, &domain->dev_list, list)
852 device_flush_dte(dev_data);
853}
854
855/****************************************************************************
856 *
857 * The functions below are used the create the page table mappings for
858 * unity mapped regions.
859 *
860 ****************************************************************************/
861
862/*
863 * This function is used to add another level to an IO page table. Adding
864 * another level increases the size of the address space by 9 bits to a size up
865 * to 64 bits.
866 */
867static bool increase_address_space(struct protection_domain *domain,
868 gfp_t gfp)
869{
870 u64 *pte;
871
872 if (domain->mode == PAGE_MODE_6_LEVEL)
873 /* address space already 64 bit large */
874 return false;
875
876 pte = (void *)get_zeroed_page(gfp);
877 if (!pte)
878 return false;
879
880 *pte = PM_LEVEL_PDE(domain->mode,
881 virt_to_phys(domain->pt_root));
882 domain->pt_root = pte;
883 domain->mode += 1;
884 domain->updated = true;
885
886 return true;
887}
888
889static u64 *alloc_pte(struct protection_domain *domain,
890 unsigned long address,
891 unsigned long page_size,
892 u64 **pte_page,
893 gfp_t gfp)
894{
895 int level, end_lvl;
896 u64 *pte, *page;
897
898 BUG_ON(!is_power_of_2(page_size));
899
900 while (address > PM_LEVEL_SIZE(domain->mode))
901 increase_address_space(domain, gfp);
902
903 level = domain->mode - 1;
904 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
905 address = PAGE_SIZE_ALIGN(address, page_size);
906 end_lvl = PAGE_SIZE_LEVEL(page_size);
907
908 while (level > end_lvl) {
909 if (!IOMMU_PTE_PRESENT(*pte)) {
910 page = (u64 *)get_zeroed_page(gfp);
911 if (!page)
912 return NULL;
913 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
914 }
915
916 /* No level skipping support yet */
917 if (PM_PTE_LEVEL(*pte) != level)
918 return NULL;
919
920 level -= 1;
921
922 pte = IOMMU_PTE_PAGE(*pte);
923
924 if (pte_page && level == end_lvl)
925 *pte_page = pte;
926
927 pte = &pte[PM_LEVEL_INDEX(level, address)];
928 }
929
930 return pte;
931}
932
933/*
934 * This function checks if there is a PTE for a given dma address. If
935 * there is one, it returns the pointer to it.
936 */
937static u64 *fetch_pte(struct protection_domain *domain, unsigned long address)
938{
939 int level;
940 u64 *pte;
941
942 if (address > PM_LEVEL_SIZE(domain->mode))
943 return NULL;
944
945 level = domain->mode - 1;
946 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
947
948 while (level > 0) {
949
950 /* Not Present */
951 if (!IOMMU_PTE_PRESENT(*pte))
952 return NULL;
953
954 /* Large PTE */
955 if (PM_PTE_LEVEL(*pte) == 0x07) {
956 unsigned long pte_mask, __pte;
957
958 /*
959 * If we have a series of large PTEs, make
960 * sure to return a pointer to the first one.
961 */
962 pte_mask = PTE_PAGE_SIZE(*pte);
963 pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
964 __pte = ((unsigned long)pte) & pte_mask;
965
966 return (u64 *)__pte;
967 }
968
969 /* No level skipping support yet */
970 if (PM_PTE_LEVEL(*pte) != level)
971 return NULL;
972
973 level -= 1;
974
975 /* Walk to the next level */
976 pte = IOMMU_PTE_PAGE(*pte);
977 pte = &pte[PM_LEVEL_INDEX(level, address)];
978 }
979
980 return pte;
981}
982
983/*
984 * Generic mapping functions. It maps a physical address into a DMA
985 * address space. It allocates the page table pages if necessary.
986 * In the future it can be extended to a generic mapping function
987 * supporting all features of AMD IOMMU page tables like level skipping
988 * and full 64 bit address spaces.
989 */
990static int iommu_map_page(struct protection_domain *dom,
991 unsigned long bus_addr,
992 unsigned long phys_addr,
993 int prot,
994 unsigned long page_size)
995{
996 u64 __pte, *pte;
997 int i, count;
998
999 if (!(prot & IOMMU_PROT_MASK))
1000 return -EINVAL;
1001
1002 bus_addr = PAGE_ALIGN(bus_addr);
1003 phys_addr = PAGE_ALIGN(phys_addr);
1004 count = PAGE_SIZE_PTE_COUNT(page_size);
1005 pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
1006
1007 for (i = 0; i < count; ++i)
1008 if (IOMMU_PTE_PRESENT(pte[i]))
1009 return -EBUSY;
1010
1011 if (page_size > PAGE_SIZE) {
1012 __pte = PAGE_SIZE_PTE(phys_addr, page_size);
1013 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
1014 } else
1015 __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
1016
1017 if (prot & IOMMU_PROT_IR)
1018 __pte |= IOMMU_PTE_IR;
1019 if (prot & IOMMU_PROT_IW)
1020 __pte |= IOMMU_PTE_IW;
1021
1022 for (i = 0; i < count; ++i)
1023 pte[i] = __pte;
1024
1025 update_domain(dom);
1026
1027 return 0;
1028}
1029
1030static unsigned long iommu_unmap_page(struct protection_domain *dom,
1031 unsigned long bus_addr,
1032 unsigned long page_size)
1033{
1034 unsigned long long unmap_size, unmapped;
1035 u64 *pte;
1036
1037 BUG_ON(!is_power_of_2(page_size));
1038
1039 unmapped = 0;
1040
1041 while (unmapped < page_size) {
1042
1043 pte = fetch_pte(dom, bus_addr);
1044
1045 if (!pte) {
1046 /*
1047 * No PTE for this address
1048 * move forward in 4kb steps
1049 */
1050 unmap_size = PAGE_SIZE;
1051 } else if (PM_PTE_LEVEL(*pte) == 0) {
1052 /* 4kb PTE found for this address */
1053 unmap_size = PAGE_SIZE;
1054 *pte = 0ULL;
1055 } else {
1056 int count, i;
1057
1058 /* Large PTE found which maps this address */
1059 unmap_size = PTE_PAGE_SIZE(*pte);
1060 count = PAGE_SIZE_PTE_COUNT(unmap_size);
1061 for (i = 0; i < count; i++)
1062 pte[i] = 0ULL;
1063 }
1064
1065 bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
1066 unmapped += unmap_size;
1067 }
1068
1069 BUG_ON(!is_power_of_2(unmapped));
1070
1071 return unmapped;
1072}
1073
1074/*
1075 * This function checks if a specific unity mapping entry is needed for
1076 * this specific IOMMU.
1077 */
1078static int iommu_for_unity_map(struct amd_iommu *iommu,
1079 struct unity_map_entry *entry)
1080{
1081 u16 bdf, i;
1082
1083 for (i = entry->devid_start; i <= entry->devid_end; ++i) {
1084 bdf = amd_iommu_alias_table[i];
1085 if (amd_iommu_rlookup_table[bdf] == iommu)
1086 return 1;
1087 }
1088
1089 return 0;
1090}
1091
1092/*
1093 * This function actually applies the mapping to the page table of the
1094 * dma_ops domain.
1095 */
1096static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
1097 struct unity_map_entry *e)
1098{
1099 u64 addr;
1100 int ret;
1101
1102 for (addr = e->address_start; addr < e->address_end;
1103 addr += PAGE_SIZE) {
1104 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
1105 PAGE_SIZE);
1106 if (ret)
1107 return ret;
1108 /*
1109 * if unity mapping is in aperture range mark the page
1110 * as allocated in the aperture
1111 */
1112 if (addr < dma_dom->aperture_size)
1113 __set_bit(addr >> PAGE_SHIFT,
1114 dma_dom->aperture[0]->bitmap);
1115 }
1116
1117 return 0;
1118}
1119
1120/*
1121 * Init the unity mappings for a specific IOMMU in the system
1122 *
1123 * Basically iterates over all unity mapping entries and applies them to
1124 * the default domain DMA of that IOMMU if necessary.
1125 */
1126static int iommu_init_unity_mappings(struct amd_iommu *iommu)
1127{
1128 struct unity_map_entry *entry;
1129 int ret;
1130
1131 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
1132 if (!iommu_for_unity_map(iommu, entry))
1133 continue;
1134 ret = dma_ops_unity_map(iommu->default_dom, entry);
1135 if (ret)
1136 return ret;
1137 }
1138
1139 return 0;
1140}
1141
1142/*
1143 * Inits the unity mappings required for a specific device
1144 */
1145static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
1146 u16 devid)
1147{
1148 struct unity_map_entry *e;
1149 int ret;
1150
1151 list_for_each_entry(e, &amd_iommu_unity_map, list) {
1152 if (!(devid >= e->devid_start && devid <= e->devid_end))
1153 continue;
1154 ret = dma_ops_unity_map(dma_dom, e);
1155 if (ret)
1156 return ret;
1157 }
1158
1159 return 0;
1160}
1161
1162/****************************************************************************
1163 *
1164 * The next functions belong to the address allocator for the dma_ops
1165 * interface functions. They work like the allocators in the other IOMMU
1166 * drivers. Its basically a bitmap which marks the allocated pages in
1167 * the aperture. Maybe it could be enhanced in the future to a more
1168 * efficient allocator.
1169 *
1170 ****************************************************************************/
1171
1172/*
1173 * The address allocator core functions.
1174 *
1175 * called with domain->lock held
1176 */
1177
1178/*
1179 * Used to reserve address ranges in the aperture (e.g. for exclusion
1180 * ranges.
1181 */
1182static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1183 unsigned long start_page,
1184 unsigned int pages)
1185{
1186 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1187
1188 if (start_page + pages > last_page)
1189 pages = last_page - start_page;
1190
1191 for (i = start_page; i < start_page + pages; ++i) {
1192 int index = i / APERTURE_RANGE_PAGES;
1193 int page = i % APERTURE_RANGE_PAGES;
1194 __set_bit(page, dom->aperture[index]->bitmap);
1195 }
1196}
1197
1198/*
1199 * This function is used to add a new aperture range to an existing
1200 * aperture in case of dma_ops domain allocation or address allocation
1201 * failure.
1202 */
1203static int alloc_new_range(struct dma_ops_domain *dma_dom,
1204 bool populate, gfp_t gfp)
1205{
1206 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
1207 struct amd_iommu *iommu;
1208 unsigned long i, old_size;
1209
1210#ifdef CONFIG_IOMMU_STRESS
1211 populate = false;
1212#endif
1213
1214 if (index >= APERTURE_MAX_RANGES)
1215 return -ENOMEM;
1216
1217 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
1218 if (!dma_dom->aperture[index])
1219 return -ENOMEM;
1220
1221 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
1222 if (!dma_dom->aperture[index]->bitmap)
1223 goto out_free;
1224
1225 dma_dom->aperture[index]->offset = dma_dom->aperture_size;
1226
1227 if (populate) {
1228 unsigned long address = dma_dom->aperture_size;
1229 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
1230 u64 *pte, *pte_page;
1231
1232 for (i = 0; i < num_ptes; ++i) {
1233 pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
1234 &pte_page, gfp);
1235 if (!pte)
1236 goto out_free;
1237
1238 dma_dom->aperture[index]->pte_pages[i] = pte_page;
1239
1240 address += APERTURE_RANGE_SIZE / 64;
1241 }
1242 }
1243
1244 old_size = dma_dom->aperture_size;
1245 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
1246
1247 /* Reserve address range used for MSI messages */
1248 if (old_size < MSI_ADDR_BASE_LO &&
1249 dma_dom->aperture_size > MSI_ADDR_BASE_LO) {
1250 unsigned long spage;
1251 int pages;
1252
1253 pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE);
1254 spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT;
1255
1256 dma_ops_reserve_addresses(dma_dom, spage, pages);
1257 }
1258
1259 /* Initialize the exclusion range if necessary */
1260 for_each_iommu(iommu) {
1261 if (iommu->exclusion_start &&
1262 iommu->exclusion_start >= dma_dom->aperture[index]->offset
1263 && iommu->exclusion_start < dma_dom->aperture_size) {
1264 unsigned long startpage;
1265 int pages = iommu_num_pages(iommu->exclusion_start,
1266 iommu->exclusion_length,
1267 PAGE_SIZE);
1268 startpage = iommu->exclusion_start >> PAGE_SHIFT;
1269 dma_ops_reserve_addresses(dma_dom, startpage, pages);
1270 }
1271 }
1272
1273 /*
1274 * Check for areas already mapped as present in the new aperture
1275 * range and mark those pages as reserved in the allocator. Such
1276 * mappings may already exist as a result of requested unity
1277 * mappings for devices.
1278 */
1279 for (i = dma_dom->aperture[index]->offset;
1280 i < dma_dom->aperture_size;
1281 i += PAGE_SIZE) {
1282 u64 *pte = fetch_pte(&dma_dom->domain, i);
1283 if (!pte || !IOMMU_PTE_PRESENT(*pte))
1284 continue;
1285
1286 dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, 1);
1287 }
1288
1289 update_domain(&dma_dom->domain);
1290
1291 return 0;
1292
1293out_free:
1294 update_domain(&dma_dom->domain);
1295
1296 free_page((unsigned long)dma_dom->aperture[index]->bitmap);
1297
1298 kfree(dma_dom->aperture[index]);
1299 dma_dom->aperture[index] = NULL;
1300
1301 return -ENOMEM;
1302}
1303
1304static unsigned long dma_ops_area_alloc(struct device *dev,
1305 struct dma_ops_domain *dom,
1306 unsigned int pages,
1307 unsigned long align_mask,
1308 u64 dma_mask,
1309 unsigned long start)
1310{
1311 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
1312 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
1313 int i = start >> APERTURE_RANGE_SHIFT;
1314 unsigned long boundary_size;
1315 unsigned long address = -1;
1316 unsigned long limit;
1317
1318 next_bit >>= PAGE_SHIFT;
1319
1320 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1321 PAGE_SIZE) >> PAGE_SHIFT;
1322
1323 for (;i < max_index; ++i) {
1324 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
1325
1326 if (dom->aperture[i]->offset >= dma_mask)
1327 break;
1328
1329 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
1330 dma_mask >> PAGE_SHIFT);
1331
1332 address = iommu_area_alloc(dom->aperture[i]->bitmap,
1333 limit, next_bit, pages, 0,
1334 boundary_size, align_mask);
1335 if (address != -1) {
1336 address = dom->aperture[i]->offset +
1337 (address << PAGE_SHIFT);
1338 dom->next_address = address + (pages << PAGE_SHIFT);
1339 break;
1340 }
1341
1342 next_bit = 0;
1343 }
1344
1345 return address;
1346}
1347
1348static unsigned long dma_ops_alloc_addresses(struct device *dev,
1349 struct dma_ops_domain *dom,
1350 unsigned int pages,
1351 unsigned long align_mask,
1352 u64 dma_mask)
1353{
1354 unsigned long address;
1355
1356#ifdef CONFIG_IOMMU_STRESS
1357 dom->next_address = 0;
1358 dom->need_flush = true;
1359#endif
1360
1361 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1362 dma_mask, dom->next_address);
1363
1364 if (address == -1) {
1365 dom->next_address = 0;
1366 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1367 dma_mask, 0);
1368 dom->need_flush = true;
1369 }
1370
1371 if (unlikely(address == -1))
1372 address = DMA_ERROR_CODE;
1373
1374 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
1375
1376 return address;
1377}
1378
1379/*
1380 * The address free function.
1381 *
1382 * called with domain->lock held
1383 */
1384static void dma_ops_free_addresses(struct dma_ops_domain *dom,
1385 unsigned long address,
1386 unsigned int pages)
1387{
1388 unsigned i = address >> APERTURE_RANGE_SHIFT;
1389 struct aperture_range *range = dom->aperture[i];
1390
1391 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
1392
1393#ifdef CONFIG_IOMMU_STRESS
1394 if (i < 4)
1395 return;
1396#endif
1397
1398 if (address >= dom->next_address)
1399 dom->need_flush = true;
1400
1401 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
1402
1403 bitmap_clear(range->bitmap, address, pages);
1404
1405}
1406
1407/****************************************************************************
1408 *
1409 * The next functions belong to the domain allocation. A domain is
1410 * allocated for every IOMMU as the default domain. If device isolation
1411 * is enabled, every device get its own domain. The most important thing
1412 * about domains is the page table mapping the DMA address space they
1413 * contain.
1414 *
1415 ****************************************************************************/
1416
1417/*
1418 * This function adds a protection domain to the global protection domain list
1419 */
1420static void add_domain_to_list(struct protection_domain *domain)
1421{
1422 unsigned long flags;
1423
1424 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1425 list_add(&domain->list, &amd_iommu_pd_list);
1426 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1427}
1428
1429/*
1430 * This function removes a protection domain to the global
1431 * protection domain list
1432 */
1433static void del_domain_from_list(struct protection_domain *domain)
1434{
1435 unsigned long flags;
1436
1437 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1438 list_del(&domain->list);
1439 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1440}
1441
1442static u16 domain_id_alloc(void)
1443{
1444 unsigned long flags;
1445 int id;
1446
1447 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1448 id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
1449 BUG_ON(id == 0);
1450 if (id > 0 && id < MAX_DOMAIN_ID)
1451 __set_bit(id, amd_iommu_pd_alloc_bitmap);
1452 else
1453 id = 0;
1454 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1455
1456 return id;
1457}
1458
1459static void domain_id_free(int id)
1460{
1461 unsigned long flags;
1462
1463 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1464 if (id > 0 && id < MAX_DOMAIN_ID)
1465 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
1466 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1467}
1468
1469static void free_pagetable(struct protection_domain *domain)
1470{
1471 int i, j;
1472 u64 *p1, *p2, *p3;
1473
1474 p1 = domain->pt_root;
1475
1476 if (!p1)
1477 return;
1478
1479 for (i = 0; i < 512; ++i) {
1480 if (!IOMMU_PTE_PRESENT(p1[i]))
1481 continue;
1482
1483 p2 = IOMMU_PTE_PAGE(p1[i]);
1484 for (j = 0; j < 512; ++j) {
1485 if (!IOMMU_PTE_PRESENT(p2[j]))
1486 continue;
1487 p3 = IOMMU_PTE_PAGE(p2[j]);
1488 free_page((unsigned long)p3);
1489 }
1490
1491 free_page((unsigned long)p2);
1492 }
1493
1494 free_page((unsigned long)p1);
1495
1496 domain->pt_root = NULL;
1497}
1498
1499/*
1500 * Free a domain, only used if something went wrong in the
1501 * allocation path and we need to free an already allocated page table
1502 */
1503static void dma_ops_domain_free(struct dma_ops_domain *dom)
1504{
1505 int i;
1506
1507 if (!dom)
1508 return;
1509
1510 del_domain_from_list(&dom->domain);
1511
1512 free_pagetable(&dom->domain);
1513
1514 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
1515 if (!dom->aperture[i])
1516 continue;
1517 free_page((unsigned long)dom->aperture[i]->bitmap);
1518 kfree(dom->aperture[i]);
1519 }
1520
1521 kfree(dom);
1522}
1523
1524/*
1525 * Allocates a new protection domain usable for the dma_ops functions.
1526 * It also initializes the page table and the address allocator data
1527 * structures required for the dma_ops interface
1528 */
1529static struct dma_ops_domain *dma_ops_domain_alloc(void)
1530{
1531 struct dma_ops_domain *dma_dom;
1532
1533 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
1534 if (!dma_dom)
1535 return NULL;
1536
1537 spin_lock_init(&dma_dom->domain.lock);
1538
1539 dma_dom->domain.id = domain_id_alloc();
1540 if (dma_dom->domain.id == 0)
1541 goto free_dma_dom;
1542 INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1543 dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1544 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1545 dma_dom->domain.flags = PD_DMA_OPS_MASK;
1546 dma_dom->domain.priv = dma_dom;
1547 if (!dma_dom->domain.pt_root)
1548 goto free_dma_dom;
1549
1550 dma_dom->need_flush = false;
1551 dma_dom->target_dev = 0xffff;
1552
1553 add_domain_to_list(&dma_dom->domain);
1554
1555 if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1556 goto free_dma_dom;
1557
1558 /*
1559 * mark the first page as allocated so we never return 0 as
1560 * a valid dma-address. So we can use 0 as error value
1561 */
1562 dma_dom->aperture[0]->bitmap[0] = 1;
1563 dma_dom->next_address = 0;
1564
1565
1566 return dma_dom;
1567
1568free_dma_dom:
1569 dma_ops_domain_free(dma_dom);
1570
1571 return NULL;
1572}
1573
1574/*
1575 * little helper function to check whether a given protection domain is a
1576 * dma_ops domain
1577 */
1578static bool dma_ops_domain(struct protection_domain *domain)
1579{
1580 return domain->flags & PD_DMA_OPS_MASK;
1581}
1582
1583static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1584{
1585 u64 pte_root = virt_to_phys(domain->pt_root);
1586 u32 flags = 0;
1587
1588 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1589 << DEV_ENTRY_MODE_SHIFT;
1590 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1591
1592 if (ats)
1593 flags |= DTE_FLAG_IOTLB;
1594
1595 amd_iommu_dev_table[devid].data[3] |= flags;
1596 amd_iommu_dev_table[devid].data[2] = domain->id;
1597 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1598 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1599}
1600
1601static void clear_dte_entry(u16 devid)
1602{
1603 /* remove entry from the device table seen by the hardware */
1604 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1605 amd_iommu_dev_table[devid].data[1] = 0;
1606 amd_iommu_dev_table[devid].data[2] = 0;
1607
1608 amd_iommu_apply_erratum_63(devid);
1609}
1610
1611static void do_attach(struct iommu_dev_data *dev_data,
1612 struct protection_domain *domain)
1613{
1614 struct amd_iommu *iommu;
1615 bool ats;
1616
1617 iommu = amd_iommu_rlookup_table[dev_data->devid];
1618 ats = dev_data->ats.enabled;
1619
1620 /* Update data structures */
1621 dev_data->domain = domain;
1622 list_add(&dev_data->list, &domain->dev_list);
1623 set_dte_entry(dev_data->devid, domain, ats);
1624
1625 /* Do reference counting */
1626 domain->dev_iommu[iommu->index] += 1;
1627 domain->dev_cnt += 1;
1628
1629 /* Flush the DTE entry */
1630 device_flush_dte(dev_data);
1631}
1632
1633static void do_detach(struct iommu_dev_data *dev_data)
1634{
1635 struct amd_iommu *iommu;
1636
1637 iommu = amd_iommu_rlookup_table[dev_data->devid];
1638
1639 /* decrease reference counters */
1640 dev_data->domain->dev_iommu[iommu->index] -= 1;
1641 dev_data->domain->dev_cnt -= 1;
1642
1643 /* Update data structures */
1644 dev_data->domain = NULL;
1645 list_del(&dev_data->list);
1646 clear_dte_entry(dev_data->devid);
1647
1648 /* Flush the DTE entry */
1649 device_flush_dte(dev_data);
1650}
1651
1652/*
1653 * If a device is not yet associated with a domain, this function does
1654 * assigns it visible for the hardware
1655 */
1656static int __attach_device(struct iommu_dev_data *dev_data,
1657 struct protection_domain *domain)
1658{
1659 int ret;
1660
1661 /* lock domain */
1662 spin_lock(&domain->lock);
1663
1664 if (dev_data->alias_data != NULL) {
1665 struct iommu_dev_data *alias_data = dev_data->alias_data;
1666
1667 /* Some sanity checks */
1668 ret = -EBUSY;
1669 if (alias_data->domain != NULL &&
1670 alias_data->domain != domain)
1671 goto out_unlock;
1672
1673 if (dev_data->domain != NULL &&
1674 dev_data->domain != domain)
1675 goto out_unlock;
1676
1677 /* Do real assignment */
1678 if (alias_data->domain == NULL)
1679 do_attach(alias_data, domain);
1680
1681 atomic_inc(&alias_data->bind);
1682 }
1683
1684 if (dev_data->domain == NULL)
1685 do_attach(dev_data, domain);
1686
1687 atomic_inc(&dev_data->bind);
1688
1689 ret = 0;
1690
1691out_unlock:
1692
1693 /* ready */
1694 spin_unlock(&domain->lock);
1695
1696 return ret;
1697}
1698
1699/*
1700 * If a device is not yet associated with a domain, this function does
1701 * assigns it visible for the hardware
1702 */
1703static int attach_device(struct device *dev,
1704 struct protection_domain *domain)
1705{
1706 struct pci_dev *pdev = to_pci_dev(dev);
1707 struct iommu_dev_data *dev_data;
1708 unsigned long flags;
1709 int ret;
1710
1711 dev_data = get_dev_data(dev);
1712
1713 if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
1714 dev_data->ats.enabled = true;
1715 dev_data->ats.qdep = pci_ats_queue_depth(pdev);
1716 }
1717
1718 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1719 ret = __attach_device(dev_data, domain);
1720 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1721
1722 /*
1723 * We might boot into a crash-kernel here. The crashed kernel
1724 * left the caches in the IOMMU dirty. So we have to flush
1725 * here to evict all dirty stuff.
1726 */
1727 domain_flush_tlb_pde(domain);
1728
1729 return ret;
1730}
1731
1732/*
1733 * Removes a device from a protection domain (unlocked)
1734 */
1735static void __detach_device(struct iommu_dev_data *dev_data)
1736{
1737 struct protection_domain *domain;
1738 unsigned long flags;
1739
1740 BUG_ON(!dev_data->domain);
1741
1742 domain = dev_data->domain;
1743
1744 spin_lock_irqsave(&domain->lock, flags);
1745
1746 if (dev_data->alias_data != NULL) {
1747 struct iommu_dev_data *alias_data = dev_data->alias_data;
1748
1749 if (atomic_dec_and_test(&alias_data->bind))
1750 do_detach(alias_data);
1751 }
1752
1753 if (atomic_dec_and_test(&dev_data->bind))
1754 do_detach(dev_data);
1755
1756 spin_unlock_irqrestore(&domain->lock, flags);
1757
1758 /*
1759 * If we run in passthrough mode the device must be assigned to the
1760 * passthrough domain if it is detached from any other domain.
1761 * Make sure we can deassign from the pt_domain itself.
1762 */
1763 if (iommu_pass_through &&
1764 (dev_data->domain == NULL && domain != pt_domain))
1765 __attach_device(dev_data, pt_domain);
1766}
1767
1768/*
1769 * Removes a device from a protection domain (with devtable_lock held)
1770 */
1771static void detach_device(struct device *dev)
1772{
1773 struct iommu_dev_data *dev_data;
1774 unsigned long flags;
1775
1776 dev_data = get_dev_data(dev);
1777
1778 /* lock device table */
1779 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1780 __detach_device(dev_data);
1781 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1782
1783 if (dev_data->ats.enabled) {
1784 pci_disable_ats(to_pci_dev(dev));
1785 dev_data->ats.enabled = false;
1786 }
1787}
1788
1789/*
1790 * Find out the protection domain structure for a given PCI device. This
1791 * will give us the pointer to the page table root for example.
1792 */
1793static struct protection_domain *domain_for_device(struct device *dev)
1794{
1795 struct iommu_dev_data *dev_data;
1796 struct protection_domain *dom = NULL;
1797 unsigned long flags;
1798
1799 dev_data = get_dev_data(dev);
1800
1801 if (dev_data->domain)
1802 return dev_data->domain;
1803
1804 if (dev_data->alias_data != NULL) {
1805 struct iommu_dev_data *alias_data = dev_data->alias_data;
1806
1807 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1808 if (alias_data->domain != NULL) {
1809 __attach_device(dev_data, alias_data->domain);
1810 dom = alias_data->domain;
1811 }
1812 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1813 }
1814
1815 return dom;
1816}
1817
1818static int device_change_notifier(struct notifier_block *nb,
1819 unsigned long action, void *data)
1820{
1821 struct device *dev = data;
1822 u16 devid;
1823 struct protection_domain *domain;
1824 struct dma_ops_domain *dma_domain;
1825 struct amd_iommu *iommu;
1826 unsigned long flags;
1827
1828 if (!check_device(dev))
1829 return 0;
1830
1831 devid = get_device_id(dev);
1832 iommu = amd_iommu_rlookup_table[devid];
1833
1834 switch (action) {
1835 case BUS_NOTIFY_UNBOUND_DRIVER:
1836
1837 domain = domain_for_device(dev);
1838
1839 if (!domain)
1840 goto out;
1841 if (iommu_pass_through)
1842 break;
1843 detach_device(dev);
1844 break;
1845 case BUS_NOTIFY_ADD_DEVICE:
1846
1847 iommu_init_device(dev);
1848
1849 domain = domain_for_device(dev);
1850
1851 /* allocate a protection domain if a device is added */
1852 dma_domain = find_protection_domain(devid);
1853 if (dma_domain)
1854 goto out;
1855 dma_domain = dma_ops_domain_alloc();
1856 if (!dma_domain)
1857 goto out;
1858 dma_domain->target_dev = devid;
1859
1860 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1861 list_add_tail(&dma_domain->list, &iommu_pd_list);
1862 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1863
1864 break;
1865 case BUS_NOTIFY_DEL_DEVICE:
1866
1867 iommu_uninit_device(dev);
1868
1869 default:
1870 goto out;
1871 }
1872
1873 iommu_completion_wait(iommu);
1874
1875out:
1876 return 0;
1877}
1878
1879static struct notifier_block device_nb = {
1880 .notifier_call = device_change_notifier,
1881};
1882
1883void amd_iommu_init_notifier(void)
1884{
1885 bus_register_notifier(&pci_bus_type, &device_nb);
1886}
1887
1888/*****************************************************************************
1889 *
1890 * The next functions belong to the dma_ops mapping/unmapping code.
1891 *
1892 *****************************************************************************/
1893
1894/*
1895 * In the dma_ops path we only have the struct device. This function
1896 * finds the corresponding IOMMU, the protection domain and the
1897 * requestor id for a given device.
1898 * If the device is not yet associated with a domain this is also done
1899 * in this function.
1900 */
1901static struct protection_domain *get_domain(struct device *dev)
1902{
1903 struct protection_domain *domain;
1904 struct dma_ops_domain *dma_dom;
1905 u16 devid = get_device_id(dev);
1906
1907 if (!check_device(dev))
1908 return ERR_PTR(-EINVAL);
1909
1910 domain = domain_for_device(dev);
1911 if (domain != NULL && !dma_ops_domain(domain))
1912 return ERR_PTR(-EBUSY);
1913
1914 if (domain != NULL)
1915 return domain;
1916
1917 /* Device not bount yet - bind it */
1918 dma_dom = find_protection_domain(devid);
1919 if (!dma_dom)
1920 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1921 attach_device(dev, &dma_dom->domain);
1922 DUMP_printk("Using protection domain %d for device %s\n",
1923 dma_dom->domain.id, dev_name(dev));
1924
1925 return &dma_dom->domain;
1926}
1927
1928static void update_device_table(struct protection_domain *domain)
1929{
1930 struct iommu_dev_data *dev_data;
1931
1932 list_for_each_entry(dev_data, &domain->dev_list, list)
1933 set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
1934}
1935
1936static void update_domain(struct protection_domain *domain)
1937{
1938 if (!domain->updated)
1939 return;
1940
1941 update_device_table(domain);
1942
1943 domain_flush_devices(domain);
1944 domain_flush_tlb_pde(domain);
1945
1946 domain->updated = false;
1947}
1948
1949/*
1950 * This function fetches the PTE for a given address in the aperture
1951 */
1952static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1953 unsigned long address)
1954{
1955 struct aperture_range *aperture;
1956 u64 *pte, *pte_page;
1957
1958 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1959 if (!aperture)
1960 return NULL;
1961
1962 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1963 if (!pte) {
1964 pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
1965 GFP_ATOMIC);
1966 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1967 } else
1968 pte += PM_LEVEL_INDEX(0, address);
1969
1970 update_domain(&dom->domain);
1971
1972 return pte;
1973}
1974
1975/*
1976 * This is the generic map function. It maps one 4kb page at paddr to
1977 * the given address in the DMA address space for the domain.
1978 */
1979static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
1980 unsigned long address,
1981 phys_addr_t paddr,
1982 int direction)
1983{
1984 u64 *pte, __pte;
1985
1986 WARN_ON(address > dom->aperture_size);
1987
1988 paddr &= PAGE_MASK;
1989
1990 pte = dma_ops_get_pte(dom, address);
1991 if (!pte)
1992 return DMA_ERROR_CODE;
1993
1994 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1995
1996 if (direction == DMA_TO_DEVICE)
1997 __pte |= IOMMU_PTE_IR;
1998 else if (direction == DMA_FROM_DEVICE)
1999 __pte |= IOMMU_PTE_IW;
2000 else if (direction == DMA_BIDIRECTIONAL)
2001 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
2002
2003 WARN_ON(*pte);
2004
2005 *pte = __pte;
2006
2007 return (dma_addr_t)address;
2008}
2009
2010/*
2011 * The generic unmapping function for on page in the DMA address space.
2012 */
2013static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
2014 unsigned long address)
2015{
2016 struct aperture_range *aperture;
2017 u64 *pte;
2018
2019 if (address >= dom->aperture_size)
2020 return;
2021
2022 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
2023 if (!aperture)
2024 return;
2025
2026 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
2027 if (!pte)
2028 return;
2029
2030 pte += PM_LEVEL_INDEX(0, address);
2031
2032 WARN_ON(!*pte);
2033
2034 *pte = 0ULL;
2035}
2036
2037/*
2038 * This function contains common code for mapping of a physically
2039 * contiguous memory region into DMA address space. It is used by all
2040 * mapping functions provided with this IOMMU driver.
2041 * Must be called with the domain lock held.
2042 */
2043static dma_addr_t __map_single(struct device *dev,
2044 struct dma_ops_domain *dma_dom,
2045 phys_addr_t paddr,
2046 size_t size,
2047 int dir,
2048 bool align,
2049 u64 dma_mask)
2050{
2051 dma_addr_t offset = paddr & ~PAGE_MASK;
2052 dma_addr_t address, start, ret;
2053 unsigned int pages;
2054 unsigned long align_mask = 0;
2055 int i;
2056
2057 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
2058 paddr &= PAGE_MASK;
2059
2060 INC_STATS_COUNTER(total_map_requests);
2061
2062 if (pages > 1)
2063 INC_STATS_COUNTER(cross_page);
2064
2065 if (align)
2066 align_mask = (1UL << get_order(size)) - 1;
2067
2068retry:
2069 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
2070 dma_mask);
2071 if (unlikely(address == DMA_ERROR_CODE)) {
2072 /*
2073 * setting next_address here will let the address
2074 * allocator only scan the new allocated range in the
2075 * first run. This is a small optimization.
2076 */
2077 dma_dom->next_address = dma_dom->aperture_size;
2078
2079 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
2080 goto out;
2081
2082 /*
2083 * aperture was successfully enlarged by 128 MB, try
2084 * allocation again
2085 */
2086 goto retry;
2087 }
2088
2089 start = address;
2090 for (i = 0; i < pages; ++i) {
2091 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
2092 if (ret == DMA_ERROR_CODE)
2093 goto out_unmap;
2094
2095 paddr += PAGE_SIZE;
2096 start += PAGE_SIZE;
2097 }
2098 address += offset;
2099
2100 ADD_STATS_COUNTER(alloced_io_mem, size);
2101
2102 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
2103 domain_flush_tlb(&dma_dom->domain);
2104 dma_dom->need_flush = false;
2105 } else if (unlikely(amd_iommu_np_cache))
2106 domain_flush_pages(&dma_dom->domain, address, size);
2107
2108out:
2109 return address;
2110
2111out_unmap:
2112
2113 for (--i; i >= 0; --i) {
2114 start -= PAGE_SIZE;
2115 dma_ops_domain_unmap(dma_dom, start);
2116 }
2117
2118 dma_ops_free_addresses(dma_dom, address, pages);
2119
2120 return DMA_ERROR_CODE;
2121}
2122
2123/*
2124 * Does the reverse of the __map_single function. Must be called with
2125 * the domain lock held too
2126 */
2127static void __unmap_single(struct dma_ops_domain *dma_dom,
2128 dma_addr_t dma_addr,
2129 size_t size,
2130 int dir)
2131{
2132 dma_addr_t flush_addr;
2133 dma_addr_t i, start;
2134 unsigned int pages;
2135
2136 if ((dma_addr == DMA_ERROR_CODE) ||
2137 (dma_addr + size > dma_dom->aperture_size))
2138 return;
2139
2140 flush_addr = dma_addr;
2141 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
2142 dma_addr &= PAGE_MASK;
2143 start = dma_addr;
2144
2145 for (i = 0; i < pages; ++i) {
2146 dma_ops_domain_unmap(dma_dom, start);
2147 start += PAGE_SIZE;
2148 }
2149
2150 SUB_STATS_COUNTER(alloced_io_mem, size);
2151
2152 dma_ops_free_addresses(dma_dom, dma_addr, pages);
2153
2154 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
2155 domain_flush_pages(&dma_dom->domain, flush_addr, size);
2156 dma_dom->need_flush = false;
2157 }
2158}
2159
2160/*
2161 * The exported map_single function for dma_ops.
2162 */
2163static dma_addr_t map_page(struct device *dev, struct page *page,
2164 unsigned long offset, size_t size,
2165 enum dma_data_direction dir,
2166 struct dma_attrs *attrs)
2167{
2168 unsigned long flags;
2169 struct protection_domain *domain;
2170 dma_addr_t addr;
2171 u64 dma_mask;
2172 phys_addr_t paddr = page_to_phys(page) + offset;
2173
2174 INC_STATS_COUNTER(cnt_map_single);
2175
2176 domain = get_domain(dev);
2177 if (PTR_ERR(domain) == -EINVAL)
2178 return (dma_addr_t)paddr;
2179 else if (IS_ERR(domain))
2180 return DMA_ERROR_CODE;
2181
2182 dma_mask = *dev->dma_mask;
2183
2184 spin_lock_irqsave(&domain->lock, flags);
2185
2186 addr = __map_single(dev, domain->priv, paddr, size, dir, false,
2187 dma_mask);
2188 if (addr == DMA_ERROR_CODE)
2189 goto out;
2190
2191 domain_flush_complete(domain);
2192
2193out:
2194 spin_unlock_irqrestore(&domain->lock, flags);
2195
2196 return addr;
2197}
2198
2199/*
2200 * The exported unmap_single function for dma_ops.
2201 */
2202static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
2203 enum dma_data_direction dir, struct dma_attrs *attrs)
2204{
2205 unsigned long flags;
2206 struct protection_domain *domain;
2207
2208 INC_STATS_COUNTER(cnt_unmap_single);
2209
2210 domain = get_domain(dev);
2211 if (IS_ERR(domain))
2212 return;
2213
2214 spin_lock_irqsave(&domain->lock, flags);
2215
2216 __unmap_single(domain->priv, dma_addr, size, dir);
2217
2218 domain_flush_complete(domain);
2219
2220 spin_unlock_irqrestore(&domain->lock, flags);
2221}
2222
2223/*
2224 * This is a special map_sg function which is used if we should map a
2225 * device which is not handled by an AMD IOMMU in the system.
2226 */
2227static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
2228 int nelems, int dir)
2229{
2230 struct scatterlist *s;
2231 int i;
2232
2233 for_each_sg(sglist, s, nelems, i) {
2234 s->dma_address = (dma_addr_t)sg_phys(s);
2235 s->dma_length = s->length;
2236 }
2237
2238 return nelems;
2239}
2240
2241/*
2242 * The exported map_sg function for dma_ops (handles scatter-gather
2243 * lists).
2244 */
2245static int map_sg(struct device *dev, struct scatterlist *sglist,
2246 int nelems, enum dma_data_direction dir,
2247 struct dma_attrs *attrs)
2248{
2249 unsigned long flags;
2250 struct protection_domain *domain;
2251 int i;
2252 struct scatterlist *s;
2253 phys_addr_t paddr;
2254 int mapped_elems = 0;
2255 u64 dma_mask;
2256
2257 INC_STATS_COUNTER(cnt_map_sg);
2258
2259 domain = get_domain(dev);
2260 if (PTR_ERR(domain) == -EINVAL)
2261 return map_sg_no_iommu(dev, sglist, nelems, dir);
2262 else if (IS_ERR(domain))
2263 return 0;
2264
2265 dma_mask = *dev->dma_mask;
2266
2267 spin_lock_irqsave(&domain->lock, flags);
2268
2269 for_each_sg(sglist, s, nelems, i) {
2270 paddr = sg_phys(s);
2271
2272 s->dma_address = __map_single(dev, domain->priv,
2273 paddr, s->length, dir, false,
2274 dma_mask);
2275
2276 if (s->dma_address) {
2277 s->dma_length = s->length;
2278 mapped_elems++;
2279 } else
2280 goto unmap;
2281 }
2282
2283 domain_flush_complete(domain);
2284
2285out:
2286 spin_unlock_irqrestore(&domain->lock, flags);
2287
2288 return mapped_elems;
2289unmap:
2290 for_each_sg(sglist, s, mapped_elems, i) {
2291 if (s->dma_address)
2292 __unmap_single(domain->priv, s->dma_address,
2293 s->dma_length, dir);
2294 s->dma_address = s->dma_length = 0;
2295 }
2296
2297 mapped_elems = 0;
2298
2299 goto out;
2300}
2301
2302/*
2303 * The exported map_sg function for dma_ops (handles scatter-gather
2304 * lists).
2305 */
2306static void unmap_sg(struct device *dev, struct scatterlist *sglist,
2307 int nelems, enum dma_data_direction dir,
2308 struct dma_attrs *attrs)
2309{
2310 unsigned long flags;
2311 struct protection_domain *domain;
2312 struct scatterlist *s;
2313 int i;
2314
2315 INC_STATS_COUNTER(cnt_unmap_sg);
2316
2317 domain = get_domain(dev);
2318 if (IS_ERR(domain))
2319 return;
2320
2321 spin_lock_irqsave(&domain->lock, flags);
2322
2323 for_each_sg(sglist, s, nelems, i) {
2324 __unmap_single(domain->priv, s->dma_address,
2325 s->dma_length, dir);
2326 s->dma_address = s->dma_length = 0;
2327 }
2328
2329 domain_flush_complete(domain);
2330
2331 spin_unlock_irqrestore(&domain->lock, flags);
2332}
2333
2334/*
2335 * The exported alloc_coherent function for dma_ops.
2336 */
2337static void *alloc_coherent(struct device *dev, size_t size,
2338 dma_addr_t *dma_addr, gfp_t flag)
2339{
2340 unsigned long flags;
2341 void *virt_addr;
2342 struct protection_domain *domain;
2343 phys_addr_t paddr;
2344 u64 dma_mask = dev->coherent_dma_mask;
2345
2346 INC_STATS_COUNTER(cnt_alloc_coherent);
2347
2348 domain = get_domain(dev);
2349 if (PTR_ERR(domain) == -EINVAL) {
2350 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2351 *dma_addr = __pa(virt_addr);
2352 return virt_addr;
2353 } else if (IS_ERR(domain))
2354 return NULL;
2355
2356 dma_mask = dev->coherent_dma_mask;
2357 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2358 flag |= __GFP_ZERO;
2359
2360 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2361 if (!virt_addr)
2362 return NULL;
2363
2364 paddr = virt_to_phys(virt_addr);
2365
2366 if (!dma_mask)
2367 dma_mask = *dev->dma_mask;
2368
2369 spin_lock_irqsave(&domain->lock, flags);
2370
2371 *dma_addr = __map_single(dev, domain->priv, paddr,
2372 size, DMA_BIDIRECTIONAL, true, dma_mask);
2373
2374 if (*dma_addr == DMA_ERROR_CODE) {
2375 spin_unlock_irqrestore(&domain->lock, flags);
2376 goto out_free;
2377 }
2378
2379 domain_flush_complete(domain);
2380
2381 spin_unlock_irqrestore(&domain->lock, flags);
2382
2383 return virt_addr;
2384
2385out_free:
2386
2387 free_pages((unsigned long)virt_addr, get_order(size));
2388
2389 return NULL;
2390}
2391
2392/*
2393 * The exported free_coherent function for dma_ops.
2394 */
2395static void free_coherent(struct device *dev, size_t size,
2396 void *virt_addr, dma_addr_t dma_addr)
2397{
2398 unsigned long flags;
2399 struct protection_domain *domain;
2400
2401 INC_STATS_COUNTER(cnt_free_coherent);
2402
2403 domain = get_domain(dev);
2404 if (IS_ERR(domain))
2405 goto free_mem;
2406
2407 spin_lock_irqsave(&domain->lock, flags);
2408
2409 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2410
2411 domain_flush_complete(domain);
2412
2413 spin_unlock_irqrestore(&domain->lock, flags);
2414
2415free_mem:
2416 free_pages((unsigned long)virt_addr, get_order(size));
2417}
2418
2419/*
2420 * This function is called by the DMA layer to find out if we can handle a
2421 * particular device. It is part of the dma_ops.
2422 */
2423static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2424{
2425 return check_device(dev);
2426}
2427
2428/*
2429 * The function for pre-allocating protection domains.
2430 *
2431 * If the driver core informs the DMA layer if a driver grabs a device
2432 * we don't need to preallocate the protection domains anymore.
2433 * For now we have to.
2434 */
2435static void prealloc_protection_domains(void)
2436{
2437 struct pci_dev *dev = NULL;
2438 struct dma_ops_domain *dma_dom;
2439 u16 devid;
2440
2441 for_each_pci_dev(dev) {
2442
2443 /* Do we handle this device? */
2444 if (!check_device(&dev->dev))
2445 continue;
2446
2447 /* Is there already any domain for it? */
2448 if (domain_for_device(&dev->dev))
2449 continue;
2450
2451 devid = get_device_id(&dev->dev);
2452
2453 dma_dom = dma_ops_domain_alloc();
2454 if (!dma_dom)
2455 continue;
2456 init_unity_mappings_for_device(dma_dom, devid);
2457 dma_dom->target_dev = devid;
2458
2459 attach_device(&dev->dev, &dma_dom->domain);
2460
2461 list_add_tail(&dma_dom->list, &iommu_pd_list);
2462 }
2463}
2464
2465static struct dma_map_ops amd_iommu_dma_ops = {
2466 .alloc_coherent = alloc_coherent,
2467 .free_coherent = free_coherent,
2468 .map_page = map_page,
2469 .unmap_page = unmap_page,
2470 .map_sg = map_sg,
2471 .unmap_sg = unmap_sg,
2472 .dma_supported = amd_iommu_dma_supported,
2473};
2474
2475static unsigned device_dma_ops_init(void)
2476{
2477 struct pci_dev *pdev = NULL;
2478 unsigned unhandled = 0;
2479
2480 for_each_pci_dev(pdev) {
2481 if (!check_device(&pdev->dev)) {
2482 unhandled += 1;
2483 continue;
2484 }
2485
2486 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
2487 }
2488
2489 return unhandled;
2490}
2491
2492/*
2493 * The function which clues the AMD IOMMU driver into dma_ops.
2494 */
2495
2496void __init amd_iommu_init_api(void)
2497{
2498 bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
2499}
2500
2501int __init amd_iommu_init_dma_ops(void)
2502{
2503 struct amd_iommu *iommu;
2504 int ret, unhandled;
2505
2506 /*
2507 * first allocate a default protection domain for every IOMMU we
2508 * found in the system. Devices not assigned to any other
2509 * protection domain will be assigned to the default one.
2510 */
2511 for_each_iommu(iommu) {
2512 iommu->default_dom = dma_ops_domain_alloc();
2513 if (iommu->default_dom == NULL)
2514 return -ENOMEM;
2515 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
2516 ret = iommu_init_unity_mappings(iommu);
2517 if (ret)
2518 goto free_domains;
2519 }
2520
2521 /*
2522 * Pre-allocate the protection domains for each device.
2523 */
2524 prealloc_protection_domains();
2525
2526 iommu_detected = 1;
2527 swiotlb = 0;
2528
2529 /* Make the driver finally visible to the drivers */
2530 unhandled = device_dma_ops_init();
2531 if (unhandled && max_pfn > MAX_DMA32_PFN) {
2532 /* There are unhandled devices - initialize swiotlb for them */
2533 swiotlb = 1;
2534 }
2535
2536 amd_iommu_stats_init();
2537
2538 return 0;
2539
2540free_domains:
2541
2542 for_each_iommu(iommu) {
2543 if (iommu->default_dom)
2544 dma_ops_domain_free(iommu->default_dom);
2545 }
2546
2547 return ret;
2548}
2549
2550/*****************************************************************************
2551 *
2552 * The following functions belong to the exported interface of AMD IOMMU
2553 *
2554 * This interface allows access to lower level functions of the IOMMU
2555 * like protection domain handling and assignement of devices to domains
2556 * which is not possible with the dma_ops interface.
2557 *
2558 *****************************************************************************/
2559
2560static void cleanup_domain(struct protection_domain *domain)
2561{
2562 struct iommu_dev_data *dev_data, *next;
2563 unsigned long flags;
2564
2565 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2566
2567 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2568 __detach_device(dev_data);
2569 atomic_set(&dev_data->bind, 0);
2570 }
2571
2572 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2573}
2574
2575static void protection_domain_free(struct protection_domain *domain)
2576{
2577 if (!domain)
2578 return;
2579
2580 del_domain_from_list(domain);
2581
2582 if (domain->id)
2583 domain_id_free(domain->id);
2584
2585 kfree(domain);
2586}
2587
2588static struct protection_domain *protection_domain_alloc(void)
2589{
2590 struct protection_domain *domain;
2591
2592 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2593 if (!domain)
2594 return NULL;
2595
2596 spin_lock_init(&domain->lock);
2597 mutex_init(&domain->api_lock);
2598 domain->id = domain_id_alloc();
2599 if (!domain->id)
2600 goto out_err;
2601 INIT_LIST_HEAD(&domain->dev_list);
2602
2603 add_domain_to_list(domain);
2604
2605 return domain;
2606
2607out_err:
2608 kfree(domain);
2609
2610 return NULL;
2611}
2612
2613static int amd_iommu_domain_init(struct iommu_domain *dom)
2614{
2615 struct protection_domain *domain;
2616
2617 domain = protection_domain_alloc();
2618 if (!domain)
2619 goto out_free;
2620
2621 domain->mode = PAGE_MODE_3_LEVEL;
2622 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
2623 if (!domain->pt_root)
2624 goto out_free;
2625
2626 dom->priv = domain;
2627
2628 return 0;
2629
2630out_free:
2631 protection_domain_free(domain);
2632
2633 return -ENOMEM;
2634}
2635
2636static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2637{
2638 struct protection_domain *domain = dom->priv;
2639
2640 if (!domain)
2641 return;
2642
2643 if (domain->dev_cnt > 0)
2644 cleanup_domain(domain);
2645
2646 BUG_ON(domain->dev_cnt != 0);
2647
2648 free_pagetable(domain);
2649
2650 protection_domain_free(domain);
2651
2652 dom->priv = NULL;
2653}
2654
2655static void amd_iommu_detach_device(struct iommu_domain *dom,
2656 struct device *dev)
2657{
2658 struct iommu_dev_data *dev_data = dev->archdata.iommu;
2659 struct amd_iommu *iommu;
2660 u16 devid;
2661
2662 if (!check_device(dev))
2663 return;
2664
2665 devid = get_device_id(dev);
2666
2667 if (dev_data->domain != NULL)
2668 detach_device(dev);
2669
2670 iommu = amd_iommu_rlookup_table[devid];
2671 if (!iommu)
2672 return;
2673
2674 iommu_completion_wait(iommu);
2675}
2676
2677static int amd_iommu_attach_device(struct iommu_domain *dom,
2678 struct device *dev)
2679{
2680 struct protection_domain *domain = dom->priv;
2681 struct iommu_dev_data *dev_data;
2682 struct amd_iommu *iommu;
2683 int ret;
2684
2685 if (!check_device(dev))
2686 return -EINVAL;
2687
2688 dev_data = dev->archdata.iommu;
2689
2690 iommu = amd_iommu_rlookup_table[dev_data->devid];
2691 if (!iommu)
2692 return -EINVAL;
2693
2694 if (dev_data->domain)
2695 detach_device(dev);
2696
2697 ret = attach_device(dev, domain);
2698
2699 iommu_completion_wait(iommu);
2700
2701 return ret;
2702}
2703
2704static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2705 phys_addr_t paddr, int gfp_order, int iommu_prot)
2706{
2707 unsigned long page_size = 0x1000UL << gfp_order;
2708 struct protection_domain *domain = dom->priv;
2709 int prot = 0;
2710 int ret;
2711
2712 if (iommu_prot & IOMMU_READ)
2713 prot |= IOMMU_PROT_IR;
2714 if (iommu_prot & IOMMU_WRITE)
2715 prot |= IOMMU_PROT_IW;
2716
2717 mutex_lock(&domain->api_lock);
2718 ret = iommu_map_page(domain, iova, paddr, prot, page_size);
2719 mutex_unlock(&domain->api_lock);
2720
2721 return ret;
2722}
2723
2724static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2725 int gfp_order)
2726{
2727 struct protection_domain *domain = dom->priv;
2728 unsigned long page_size, unmap_size;
2729
2730 page_size = 0x1000UL << gfp_order;
2731
2732 mutex_lock(&domain->api_lock);
2733 unmap_size = iommu_unmap_page(domain, iova, page_size);
2734 mutex_unlock(&domain->api_lock);
2735
2736 domain_flush_tlb_pde(domain);
2737
2738 return get_order(unmap_size);
2739}
2740
2741static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2742 unsigned long iova)
2743{
2744 struct protection_domain *domain = dom->priv;
2745 unsigned long offset_mask;
2746 phys_addr_t paddr;
2747 u64 *pte, __pte;
2748
2749 pte = fetch_pte(domain, iova);
2750
2751 if (!pte || !IOMMU_PTE_PRESENT(*pte))
2752 return 0;
2753
2754 if (PM_PTE_LEVEL(*pte) == 0)
2755 offset_mask = PAGE_SIZE - 1;
2756 else
2757 offset_mask = PTE_PAGE_SIZE(*pte) - 1;
2758
2759 __pte = *pte & PM_ADDR_MASK;
2760 paddr = (__pte & ~offset_mask) | (iova & offset_mask);
2761
2762 return paddr;
2763}
2764
2765static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2766 unsigned long cap)
2767{
2768 switch (cap) {
2769 case IOMMU_CAP_CACHE_COHERENCY:
2770 return 1;
2771 }
2772
2773 return 0;
2774}
2775
2776static struct iommu_ops amd_iommu_ops = {
2777 .domain_init = amd_iommu_domain_init,
2778 .domain_destroy = amd_iommu_domain_destroy,
2779 .attach_dev = amd_iommu_attach_device,
2780 .detach_dev = amd_iommu_detach_device,
2781 .map = amd_iommu_map,
2782 .unmap = amd_iommu_unmap,
2783 .iova_to_phys = amd_iommu_iova_to_phys,
2784 .domain_has_cap = amd_iommu_domain_has_cap,
2785};
2786
2787/*****************************************************************************
2788 *
2789 * The next functions do a basic initialization of IOMMU for pass through
2790 * mode
2791 *
2792 * In passthrough mode the IOMMU is initialized and enabled but not used for
2793 * DMA-API translation.
2794 *
2795 *****************************************************************************/
2796
2797int __init amd_iommu_init_passthrough(void)
2798{
2799 struct amd_iommu *iommu;
2800 struct pci_dev *dev = NULL;
2801 u16 devid;
2802
2803 /* allocate passthrough domain */
2804 pt_domain = protection_domain_alloc();
2805 if (!pt_domain)
2806 return -ENOMEM;
2807
2808 pt_domain->mode |= PAGE_MODE_NONE;
2809
2810 for_each_pci_dev(dev) {
2811 if (!check_device(&dev->dev))
2812 continue;
2813
2814 devid = get_device_id(&dev->dev);
2815
2816 iommu = amd_iommu_rlookup_table[devid];
2817 if (!iommu)
2818 continue;
2819
2820 attach_device(&dev->dev, pt_domain);
2821 }
2822
2823 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
2824
2825 return 0;
2826}
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
new file mode 100644
index 000000000000..82d2410f4205
--- /dev/null
+++ b/drivers/iommu/amd_iommu_init.c
@@ -0,0 +1,1574 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/acpi.h>
22#include <linux/list.h>
23#include <linux/slab.h>
24#include <linux/syscore_ops.h>
25#include <linux/interrupt.h>
26#include <linux/msi.h>
27#include <linux/amd-iommu.h>
28#include <asm/pci-direct.h>
29#include <asm/iommu.h>
30#include <asm/gart.h>
31#include <asm/x86_init.h>
32#include <asm/iommu_table.h>
33
34#include "amd_iommu_proto.h"
35#include "amd_iommu_types.h"
36
37/*
38 * definitions for the ACPI scanning code
39 */
40#define IVRS_HEADER_LENGTH 48
41
42#define ACPI_IVHD_TYPE 0x10
43#define ACPI_IVMD_TYPE_ALL 0x20
44#define ACPI_IVMD_TYPE 0x21
45#define ACPI_IVMD_TYPE_RANGE 0x22
46
47#define IVHD_DEV_ALL 0x01
48#define IVHD_DEV_SELECT 0x02
49#define IVHD_DEV_SELECT_RANGE_START 0x03
50#define IVHD_DEV_RANGE_END 0x04
51#define IVHD_DEV_ALIAS 0x42
52#define IVHD_DEV_ALIAS_RANGE 0x43
53#define IVHD_DEV_EXT_SELECT 0x46
54#define IVHD_DEV_EXT_SELECT_RANGE 0x47
55
56#define IVHD_FLAG_HT_TUN_EN_MASK 0x01
57#define IVHD_FLAG_PASSPW_EN_MASK 0x02
58#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04
59#define IVHD_FLAG_ISOC_EN_MASK 0x08
60
61#define IVMD_FLAG_EXCL_RANGE 0x08
62#define IVMD_FLAG_UNITY_MAP 0x01
63
64#define ACPI_DEVFLAG_INITPASS 0x01
65#define ACPI_DEVFLAG_EXTINT 0x02
66#define ACPI_DEVFLAG_NMI 0x04
67#define ACPI_DEVFLAG_SYSMGT1 0x10
68#define ACPI_DEVFLAG_SYSMGT2 0x20
69#define ACPI_DEVFLAG_LINT0 0x40
70#define ACPI_DEVFLAG_LINT1 0x80
71#define ACPI_DEVFLAG_ATSDIS 0x10000000
72
73/*
74 * ACPI table definitions
75 *
76 * These data structures are laid over the table to parse the important values
77 * out of it.
78 */
79
80/*
81 * structure describing one IOMMU in the ACPI table. Typically followed by one
82 * or more ivhd_entrys.
83 */
84struct ivhd_header {
85 u8 type;
86 u8 flags;
87 u16 length;
88 u16 devid;
89 u16 cap_ptr;
90 u64 mmio_phys;
91 u16 pci_seg;
92 u16 info;
93 u32 reserved;
94} __attribute__((packed));
95
96/*
97 * A device entry describing which devices a specific IOMMU translates and
98 * which requestor ids they use.
99 */
100struct ivhd_entry {
101 u8 type;
102 u16 devid;
103 u8 flags;
104 u32 ext;
105} __attribute__((packed));
106
107/*
108 * An AMD IOMMU memory definition structure. It defines things like exclusion
109 * ranges for devices and regions that should be unity mapped.
110 */
111struct ivmd_header {
112 u8 type;
113 u8 flags;
114 u16 length;
115 u16 devid;
116 u16 aux;
117 u64 resv;
118 u64 range_start;
119 u64 range_length;
120} __attribute__((packed));
121
122bool amd_iommu_dump;
123
124static int __initdata amd_iommu_detected;
125static bool __initdata amd_iommu_disabled;
126
127u16 amd_iommu_last_bdf; /* largest PCI device id we have
128 to handle */
129LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
130 we find in ACPI */
131bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
132
133LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
134 system */
135
136/* Array to assign indices to IOMMUs*/
137struct amd_iommu *amd_iommus[MAX_IOMMUS];
138int amd_iommus_present;
139
140/* IOMMUs have a non-present cache? */
141bool amd_iommu_np_cache __read_mostly;
142bool amd_iommu_iotlb_sup __read_mostly = true;
143
144/*
145 * The ACPI table parsing functions set this variable on an error
146 */
147static int __initdata amd_iommu_init_err;
148
149/*
150 * List of protection domains - used during resume
151 */
152LIST_HEAD(amd_iommu_pd_list);
153spinlock_t amd_iommu_pd_lock;
154
155/*
156 * Pointer to the device table which is shared by all AMD IOMMUs
157 * it is indexed by the PCI device id or the HT unit id and contains
158 * information about the domain the device belongs to as well as the
159 * page table root pointer.
160 */
161struct dev_table_entry *amd_iommu_dev_table;
162
163/*
164 * The alias table is a driver specific data structure which contains the
165 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
166 * More than one device can share the same requestor id.
167 */
168u16 *amd_iommu_alias_table;
169
170/*
171 * The rlookup table is used to find the IOMMU which is responsible
172 * for a specific device. It is also indexed by the PCI device id.
173 */
174struct amd_iommu **amd_iommu_rlookup_table;
175
176/*
177 * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
178 * to know which ones are already in use.
179 */
180unsigned long *amd_iommu_pd_alloc_bitmap;
181
182static u32 dev_table_size; /* size of the device table */
183static u32 alias_table_size; /* size of the alias table */
184static u32 rlookup_table_size; /* size if the rlookup table */
185
186/*
187 * This function flushes all internal caches of
188 * the IOMMU used by this driver.
189 */
190extern void iommu_flush_all_caches(struct amd_iommu *iommu);
191
192static inline void update_last_devid(u16 devid)
193{
194 if (devid > amd_iommu_last_bdf)
195 amd_iommu_last_bdf = devid;
196}
197
198static inline unsigned long tbl_size(int entry_size)
199{
200 unsigned shift = PAGE_SHIFT +
201 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
202
203 return 1UL << shift;
204}
205
206/* Access to l1 and l2 indexed register spaces */
207
208static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
209{
210 u32 val;
211
212 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
213 pci_read_config_dword(iommu->dev, 0xfc, &val);
214 return val;
215}
216
217static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
218{
219 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
220 pci_write_config_dword(iommu->dev, 0xfc, val);
221 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
222}
223
224static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
225{
226 u32 val;
227
228 pci_write_config_dword(iommu->dev, 0xf0, address);
229 pci_read_config_dword(iommu->dev, 0xf4, &val);
230 return val;
231}
232
233static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
234{
235 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
236 pci_write_config_dword(iommu->dev, 0xf4, val);
237}
238
239/****************************************************************************
240 *
241 * AMD IOMMU MMIO register space handling functions
242 *
243 * These functions are used to program the IOMMU device registers in
244 * MMIO space required for that driver.
245 *
246 ****************************************************************************/
247
248/*
249 * This function set the exclusion range in the IOMMU. DMA accesses to the
250 * exclusion range are passed through untranslated
251 */
252static void iommu_set_exclusion_range(struct amd_iommu *iommu)
253{
254 u64 start = iommu->exclusion_start & PAGE_MASK;
255 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
256 u64 entry;
257
258 if (!iommu->exclusion_start)
259 return;
260
261 entry = start | MMIO_EXCL_ENABLE_MASK;
262 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
263 &entry, sizeof(entry));
264
265 entry = limit;
266 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
267 &entry, sizeof(entry));
268}
269
270/* Programs the physical address of the device table into the IOMMU hardware */
271static void __init iommu_set_device_table(struct amd_iommu *iommu)
272{
273 u64 entry;
274
275 BUG_ON(iommu->mmio_base == NULL);
276
277 entry = virt_to_phys(amd_iommu_dev_table);
278 entry |= (dev_table_size >> 12) - 1;
279 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
280 &entry, sizeof(entry));
281}
282
283/* Generic functions to enable/disable certain features of the IOMMU. */
284static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
285{
286 u32 ctrl;
287
288 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
289 ctrl |= (1 << bit);
290 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
291}
292
293static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
294{
295 u32 ctrl;
296
297 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
298 ctrl &= ~(1 << bit);
299 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
300}
301
302/* Function to enable the hardware */
303static void iommu_enable(struct amd_iommu *iommu)
304{
305 static const char * const feat_str[] = {
306 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
307 "IA", "GA", "HE", "PC", NULL
308 };
309 int i;
310
311 printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
312 dev_name(&iommu->dev->dev), iommu->cap_ptr);
313
314 if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
315 printk(KERN_CONT " extended features: ");
316 for (i = 0; feat_str[i]; ++i)
317 if (iommu_feature(iommu, (1ULL << i)))
318 printk(KERN_CONT " %s", feat_str[i]);
319 }
320 printk(KERN_CONT "\n");
321
322 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
323}
324
325static void iommu_disable(struct amd_iommu *iommu)
326{
327 /* Disable command buffer */
328 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
329
330 /* Disable event logging and event interrupts */
331 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
332 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
333
334 /* Disable IOMMU hardware itself */
335 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
336}
337
338/*
339 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
340 * the system has one.
341 */
342static u8 * __init iommu_map_mmio_space(u64 address)
343{
344 u8 *ret;
345
346 if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
347 pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
348 address);
349 pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
350 return NULL;
351 }
352
353 ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
354 if (ret != NULL)
355 return ret;
356
357 release_mem_region(address, MMIO_REGION_LENGTH);
358
359 return NULL;
360}
361
362static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
363{
364 if (iommu->mmio_base)
365 iounmap(iommu->mmio_base);
366 release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
367}
368
369/****************************************************************************
370 *
371 * The functions below belong to the first pass of AMD IOMMU ACPI table
372 * parsing. In this pass we try to find out the highest device id this
373 * code has to handle. Upon this information the size of the shared data
374 * structures is determined later.
375 *
376 ****************************************************************************/
377
378/*
379 * This function calculates the length of a given IVHD entry
380 */
381static inline int ivhd_entry_length(u8 *ivhd)
382{
383 return 0x04 << (*ivhd >> 6);
384}
385
386/*
387 * This function reads the last device id the IOMMU has to handle from the PCI
388 * capability header for this IOMMU
389 */
390static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
391{
392 u32 cap;
393
394 cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
395 update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
396
397 return 0;
398}
399
400/*
401 * After reading the highest device id from the IOMMU PCI capability header
402 * this function looks if there is a higher device id defined in the ACPI table
403 */
404static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
405{
406 u8 *p = (void *)h, *end = (void *)h;
407 struct ivhd_entry *dev;
408
409 p += sizeof(*h);
410 end += h->length;
411
412 find_last_devid_on_pci(PCI_BUS(h->devid),
413 PCI_SLOT(h->devid),
414 PCI_FUNC(h->devid),
415 h->cap_ptr);
416
417 while (p < end) {
418 dev = (struct ivhd_entry *)p;
419 switch (dev->type) {
420 case IVHD_DEV_SELECT:
421 case IVHD_DEV_RANGE_END:
422 case IVHD_DEV_ALIAS:
423 case IVHD_DEV_EXT_SELECT:
424 /* all the above subfield types refer to device ids */
425 update_last_devid(dev->devid);
426 break;
427 default:
428 break;
429 }
430 p += ivhd_entry_length(p);
431 }
432
433 WARN_ON(p != end);
434
435 return 0;
436}
437
438/*
439 * Iterate over all IVHD entries in the ACPI table and find the highest device
440 * id which we need to handle. This is the first of three functions which parse
441 * the ACPI table. So we check the checksum here.
442 */
443static int __init find_last_devid_acpi(struct acpi_table_header *table)
444{
445 int i;
446 u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
447 struct ivhd_header *h;
448
449 /*
450 * Validate checksum here so we don't need to do it when
451 * we actually parse the table
452 */
453 for (i = 0; i < table->length; ++i)
454 checksum += p[i];
455 if (checksum != 0) {
456 /* ACPI table corrupt */
457 amd_iommu_init_err = -ENODEV;
458 return 0;
459 }
460
461 p += IVRS_HEADER_LENGTH;
462
463 end += table->length;
464 while (p < end) {
465 h = (struct ivhd_header *)p;
466 switch (h->type) {
467 case ACPI_IVHD_TYPE:
468 find_last_devid_from_ivhd(h);
469 break;
470 default:
471 break;
472 }
473 p += h->length;
474 }
475 WARN_ON(p != end);
476
477 return 0;
478}
479
480/****************************************************************************
481 *
482 * The following functions belong the the code path which parses the ACPI table
483 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
484 * data structures, initialize the device/alias/rlookup table and also
485 * basically initialize the hardware.
486 *
487 ****************************************************************************/
488
489/*
490 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
491 * write commands to that buffer later and the IOMMU will execute them
492 * asynchronously
493 */
494static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
495{
496 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
497 get_order(CMD_BUFFER_SIZE));
498
499 if (cmd_buf == NULL)
500 return NULL;
501
502 iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
503
504 return cmd_buf;
505}
506
507/*
508 * This function resets the command buffer if the IOMMU stopped fetching
509 * commands from it.
510 */
511void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
512{
513 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
514
515 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
516 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
517
518 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
519}
520
521/*
522 * This function writes the command buffer address to the hardware and
523 * enables it.
524 */
525static void iommu_enable_command_buffer(struct amd_iommu *iommu)
526{
527 u64 entry;
528
529 BUG_ON(iommu->cmd_buf == NULL);
530
531 entry = (u64)virt_to_phys(iommu->cmd_buf);
532 entry |= MMIO_CMD_SIZE_512;
533
534 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
535 &entry, sizeof(entry));
536
537 amd_iommu_reset_cmd_buffer(iommu);
538 iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
539}
540
541static void __init free_command_buffer(struct amd_iommu *iommu)
542{
543 free_pages((unsigned long)iommu->cmd_buf,
544 get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
545}
546
547/* allocates the memory where the IOMMU will log its events to */
548static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
549{
550 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
551 get_order(EVT_BUFFER_SIZE));
552
553 if (iommu->evt_buf == NULL)
554 return NULL;
555
556 iommu->evt_buf_size = EVT_BUFFER_SIZE;
557
558 return iommu->evt_buf;
559}
560
561static void iommu_enable_event_buffer(struct amd_iommu *iommu)
562{
563 u64 entry;
564
565 BUG_ON(iommu->evt_buf == NULL);
566
567 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
568
569 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
570 &entry, sizeof(entry));
571
572 /* set head and tail to zero manually */
573 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
574 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
575
576 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
577}
578
579static void __init free_event_buffer(struct amd_iommu *iommu)
580{
581 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
582}
583
584/* sets a specific bit in the device table entry. */
585static void set_dev_entry_bit(u16 devid, u8 bit)
586{
587 int i = (bit >> 5) & 0x07;
588 int _bit = bit & 0x1f;
589
590 amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
591}
592
593static int get_dev_entry_bit(u16 devid, u8 bit)
594{
595 int i = (bit >> 5) & 0x07;
596 int _bit = bit & 0x1f;
597
598 return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
599}
600
601
602void amd_iommu_apply_erratum_63(u16 devid)
603{
604 int sysmgt;
605
606 sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
607 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
608
609 if (sysmgt == 0x01)
610 set_dev_entry_bit(devid, DEV_ENTRY_IW);
611}
612
613/* Writes the specific IOMMU for a device into the rlookup table */
614static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
615{
616 amd_iommu_rlookup_table[devid] = iommu;
617}
618
619/*
620 * This function takes the device specific flags read from the ACPI
621 * table and sets up the device table entry with that information
622 */
623static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
624 u16 devid, u32 flags, u32 ext_flags)
625{
626 if (flags & ACPI_DEVFLAG_INITPASS)
627 set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
628 if (flags & ACPI_DEVFLAG_EXTINT)
629 set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
630 if (flags & ACPI_DEVFLAG_NMI)
631 set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
632 if (flags & ACPI_DEVFLAG_SYSMGT1)
633 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
634 if (flags & ACPI_DEVFLAG_SYSMGT2)
635 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
636 if (flags & ACPI_DEVFLAG_LINT0)
637 set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
638 if (flags & ACPI_DEVFLAG_LINT1)
639 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
640
641 amd_iommu_apply_erratum_63(devid);
642
643 set_iommu_for_device(iommu, devid);
644}
645
646/*
647 * Reads the device exclusion range from ACPI and initialize IOMMU with
648 * it
649 */
650static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
651{
652 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
653
654 if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
655 return;
656
657 if (iommu) {
658 /*
659 * We only can configure exclusion ranges per IOMMU, not
660 * per device. But we can enable the exclusion range per
661 * device. This is done here
662 */
663 set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
664 iommu->exclusion_start = m->range_start;
665 iommu->exclusion_length = m->range_length;
666 }
667}
668
669/*
670 * This function reads some important data from the IOMMU PCI space and
671 * initializes the driver data structure with it. It reads the hardware
672 * capabilities and the first/last device entries
673 */
674static void __init init_iommu_from_pci(struct amd_iommu *iommu)
675{
676 int cap_ptr = iommu->cap_ptr;
677 u32 range, misc, low, high;
678 int i, j;
679
680 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
681 &iommu->cap);
682 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
683 &range);
684 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
685 &misc);
686
687 iommu->first_device = calc_devid(MMIO_GET_BUS(range),
688 MMIO_GET_FD(range));
689 iommu->last_device = calc_devid(MMIO_GET_BUS(range),
690 MMIO_GET_LD(range));
691 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
692
693 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
694 amd_iommu_iotlb_sup = false;
695
696 /* read extended feature bits */
697 low = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
698 high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
699
700 iommu->features = ((u64)high << 32) | low;
701
702 if (!is_rd890_iommu(iommu->dev))
703 return;
704
705 /*
706 * Some rd890 systems may not be fully reconfigured by the BIOS, so
707 * it's necessary for us to store this information so it can be
708 * reprogrammed on resume
709 */
710
711 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
712 &iommu->stored_addr_lo);
713 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
714 &iommu->stored_addr_hi);
715
716 /* Low bit locks writes to configuration space */
717 iommu->stored_addr_lo &= ~1;
718
719 for (i = 0; i < 6; i++)
720 for (j = 0; j < 0x12; j++)
721 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
722
723 for (i = 0; i < 0x83; i++)
724 iommu->stored_l2[i] = iommu_read_l2(iommu, i);
725}
726
727/*
728 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
729 * initializes the hardware and our data structures with it.
730 */
731static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
732 struct ivhd_header *h)
733{
734 u8 *p = (u8 *)h;
735 u8 *end = p, flags = 0;
736 u16 devid = 0, devid_start = 0, devid_to = 0;
737 u32 dev_i, ext_flags = 0;
738 bool alias = false;
739 struct ivhd_entry *e;
740
741 /*
742 * First save the recommended feature enable bits from ACPI
743 */
744 iommu->acpi_flags = h->flags;
745
746 /*
747 * Done. Now parse the device entries
748 */
749 p += sizeof(struct ivhd_header);
750 end += h->length;
751
752
753 while (p < end) {
754 e = (struct ivhd_entry *)p;
755 switch (e->type) {
756 case IVHD_DEV_ALL:
757
758 DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
759 " last device %02x:%02x.%x flags: %02x\n",
760 PCI_BUS(iommu->first_device),
761 PCI_SLOT(iommu->first_device),
762 PCI_FUNC(iommu->first_device),
763 PCI_BUS(iommu->last_device),
764 PCI_SLOT(iommu->last_device),
765 PCI_FUNC(iommu->last_device),
766 e->flags);
767
768 for (dev_i = iommu->first_device;
769 dev_i <= iommu->last_device; ++dev_i)
770 set_dev_entry_from_acpi(iommu, dev_i,
771 e->flags, 0);
772 break;
773 case IVHD_DEV_SELECT:
774
775 DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
776 "flags: %02x\n",
777 PCI_BUS(e->devid),
778 PCI_SLOT(e->devid),
779 PCI_FUNC(e->devid),
780 e->flags);
781
782 devid = e->devid;
783 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
784 break;
785 case IVHD_DEV_SELECT_RANGE_START:
786
787 DUMP_printk(" DEV_SELECT_RANGE_START\t "
788 "devid: %02x:%02x.%x flags: %02x\n",
789 PCI_BUS(e->devid),
790 PCI_SLOT(e->devid),
791 PCI_FUNC(e->devid),
792 e->flags);
793
794 devid_start = e->devid;
795 flags = e->flags;
796 ext_flags = 0;
797 alias = false;
798 break;
799 case IVHD_DEV_ALIAS:
800
801 DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
802 "flags: %02x devid_to: %02x:%02x.%x\n",
803 PCI_BUS(e->devid),
804 PCI_SLOT(e->devid),
805 PCI_FUNC(e->devid),
806 e->flags,
807 PCI_BUS(e->ext >> 8),
808 PCI_SLOT(e->ext >> 8),
809 PCI_FUNC(e->ext >> 8));
810
811 devid = e->devid;
812 devid_to = e->ext >> 8;
813 set_dev_entry_from_acpi(iommu, devid , e->flags, 0);
814 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
815 amd_iommu_alias_table[devid] = devid_to;
816 break;
817 case IVHD_DEV_ALIAS_RANGE:
818
819 DUMP_printk(" DEV_ALIAS_RANGE\t\t "
820 "devid: %02x:%02x.%x flags: %02x "
821 "devid_to: %02x:%02x.%x\n",
822 PCI_BUS(e->devid),
823 PCI_SLOT(e->devid),
824 PCI_FUNC(e->devid),
825 e->flags,
826 PCI_BUS(e->ext >> 8),
827 PCI_SLOT(e->ext >> 8),
828 PCI_FUNC(e->ext >> 8));
829
830 devid_start = e->devid;
831 flags = e->flags;
832 devid_to = e->ext >> 8;
833 ext_flags = 0;
834 alias = true;
835 break;
836 case IVHD_DEV_EXT_SELECT:
837
838 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
839 "flags: %02x ext: %08x\n",
840 PCI_BUS(e->devid),
841 PCI_SLOT(e->devid),
842 PCI_FUNC(e->devid),
843 e->flags, e->ext);
844
845 devid = e->devid;
846 set_dev_entry_from_acpi(iommu, devid, e->flags,
847 e->ext);
848 break;
849 case IVHD_DEV_EXT_SELECT_RANGE:
850
851 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
852 "%02x:%02x.%x flags: %02x ext: %08x\n",
853 PCI_BUS(e->devid),
854 PCI_SLOT(e->devid),
855 PCI_FUNC(e->devid),
856 e->flags, e->ext);
857
858 devid_start = e->devid;
859 flags = e->flags;
860 ext_flags = e->ext;
861 alias = false;
862 break;
863 case IVHD_DEV_RANGE_END:
864
865 DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
866 PCI_BUS(e->devid),
867 PCI_SLOT(e->devid),
868 PCI_FUNC(e->devid));
869
870 devid = e->devid;
871 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
872 if (alias) {
873 amd_iommu_alias_table[dev_i] = devid_to;
874 set_dev_entry_from_acpi(iommu,
875 devid_to, flags, ext_flags);
876 }
877 set_dev_entry_from_acpi(iommu, dev_i,
878 flags, ext_flags);
879 }
880 break;
881 default:
882 break;
883 }
884
885 p += ivhd_entry_length(p);
886 }
887}
888
889/* Initializes the device->iommu mapping for the driver */
890static int __init init_iommu_devices(struct amd_iommu *iommu)
891{
892 u32 i;
893
894 for (i = iommu->first_device; i <= iommu->last_device; ++i)
895 set_iommu_for_device(iommu, i);
896
897 return 0;
898}
899
900static void __init free_iommu_one(struct amd_iommu *iommu)
901{
902 free_command_buffer(iommu);
903 free_event_buffer(iommu);
904 iommu_unmap_mmio_space(iommu);
905}
906
907static void __init free_iommu_all(void)
908{
909 struct amd_iommu *iommu, *next;
910
911 for_each_iommu_safe(iommu, next) {
912 list_del(&iommu->list);
913 free_iommu_one(iommu);
914 kfree(iommu);
915 }
916}
917
918/*
919 * This function clues the initialization function for one IOMMU
920 * together and also allocates the command buffer and programs the
921 * hardware. It does NOT enable the IOMMU. This is done afterwards.
922 */
923static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
924{
925 spin_lock_init(&iommu->lock);
926
927 /* Add IOMMU to internal data structures */
928 list_add_tail(&iommu->list, &amd_iommu_list);
929 iommu->index = amd_iommus_present++;
930
931 if (unlikely(iommu->index >= MAX_IOMMUS)) {
932 WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
933 return -ENOSYS;
934 }
935
936 /* Index is fine - add IOMMU to the array */
937 amd_iommus[iommu->index] = iommu;
938
939 /*
940 * Copy data from ACPI table entry to the iommu struct
941 */
942 iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff);
943 if (!iommu->dev)
944 return 1;
945
946 iommu->cap_ptr = h->cap_ptr;
947 iommu->pci_seg = h->pci_seg;
948 iommu->mmio_phys = h->mmio_phys;
949 iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
950 if (!iommu->mmio_base)
951 return -ENOMEM;
952
953 iommu->cmd_buf = alloc_command_buffer(iommu);
954 if (!iommu->cmd_buf)
955 return -ENOMEM;
956
957 iommu->evt_buf = alloc_event_buffer(iommu);
958 if (!iommu->evt_buf)
959 return -ENOMEM;
960
961 iommu->int_enabled = false;
962
963 init_iommu_from_pci(iommu);
964 init_iommu_from_acpi(iommu, h);
965 init_iommu_devices(iommu);
966
967 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
968 amd_iommu_np_cache = true;
969
970 return pci_enable_device(iommu->dev);
971}
972
973/*
974 * Iterates over all IOMMU entries in the ACPI table, allocates the
975 * IOMMU structure and initializes it with init_iommu_one()
976 */
977static int __init init_iommu_all(struct acpi_table_header *table)
978{
979 u8 *p = (u8 *)table, *end = (u8 *)table;
980 struct ivhd_header *h;
981 struct amd_iommu *iommu;
982 int ret;
983
984 end += table->length;
985 p += IVRS_HEADER_LENGTH;
986
987 while (p < end) {
988 h = (struct ivhd_header *)p;
989 switch (*p) {
990 case ACPI_IVHD_TYPE:
991
992 DUMP_printk("device: %02x:%02x.%01x cap: %04x "
993 "seg: %d flags: %01x info %04x\n",
994 PCI_BUS(h->devid), PCI_SLOT(h->devid),
995 PCI_FUNC(h->devid), h->cap_ptr,
996 h->pci_seg, h->flags, h->info);
997 DUMP_printk(" mmio-addr: %016llx\n",
998 h->mmio_phys);
999
1000 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1001 if (iommu == NULL) {
1002 amd_iommu_init_err = -ENOMEM;
1003 return 0;
1004 }
1005
1006 ret = init_iommu_one(iommu, h);
1007 if (ret) {
1008 amd_iommu_init_err = ret;
1009 return 0;
1010 }
1011 break;
1012 default:
1013 break;
1014 }
1015 p += h->length;
1016
1017 }
1018 WARN_ON(p != end);
1019
1020 return 0;
1021}
1022
1023/****************************************************************************
1024 *
1025 * The following functions initialize the MSI interrupts for all IOMMUs
1026 * in the system. Its a bit challenging because there could be multiple
1027 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
1028 * pci_dev.
1029 *
1030 ****************************************************************************/
1031
1032static int iommu_setup_msi(struct amd_iommu *iommu)
1033{
1034 int r;
1035
1036 if (pci_enable_msi(iommu->dev))
1037 return 1;
1038
1039 r = request_threaded_irq(iommu->dev->irq,
1040 amd_iommu_int_handler,
1041 amd_iommu_int_thread,
1042 0, "AMD-Vi",
1043 iommu->dev);
1044
1045 if (r) {
1046 pci_disable_msi(iommu->dev);
1047 return 1;
1048 }
1049
1050 iommu->int_enabled = true;
1051 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
1052
1053 return 0;
1054}
1055
1056static int iommu_init_msi(struct amd_iommu *iommu)
1057{
1058 if (iommu->int_enabled)
1059 return 0;
1060
1061 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
1062 return iommu_setup_msi(iommu);
1063
1064 return 1;
1065}
1066
1067/****************************************************************************
1068 *
1069 * The next functions belong to the third pass of parsing the ACPI
1070 * table. In this last pass the memory mapping requirements are
1071 * gathered (like exclusion and unity mapping reanges).
1072 *
1073 ****************************************************************************/
1074
1075static void __init free_unity_maps(void)
1076{
1077 struct unity_map_entry *entry, *next;
1078
1079 list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
1080 list_del(&entry->list);
1081 kfree(entry);
1082 }
1083}
1084
1085/* called when we find an exclusion range definition in ACPI */
1086static int __init init_exclusion_range(struct ivmd_header *m)
1087{
1088 int i;
1089
1090 switch (m->type) {
1091 case ACPI_IVMD_TYPE:
1092 set_device_exclusion_range(m->devid, m);
1093 break;
1094 case ACPI_IVMD_TYPE_ALL:
1095 for (i = 0; i <= amd_iommu_last_bdf; ++i)
1096 set_device_exclusion_range(i, m);
1097 break;
1098 case ACPI_IVMD_TYPE_RANGE:
1099 for (i = m->devid; i <= m->aux; ++i)
1100 set_device_exclusion_range(i, m);
1101 break;
1102 default:
1103 break;
1104 }
1105
1106 return 0;
1107}
1108
1109/* called for unity map ACPI definition */
1110static int __init init_unity_map_range(struct ivmd_header *m)
1111{
1112 struct unity_map_entry *e = 0;
1113 char *s;
1114
1115 e = kzalloc(sizeof(*e), GFP_KERNEL);
1116 if (e == NULL)
1117 return -ENOMEM;
1118
1119 switch (m->type) {
1120 default:
1121 kfree(e);
1122 return 0;
1123 case ACPI_IVMD_TYPE:
1124 s = "IVMD_TYPEi\t\t\t";
1125 e->devid_start = e->devid_end = m->devid;
1126 break;
1127 case ACPI_IVMD_TYPE_ALL:
1128 s = "IVMD_TYPE_ALL\t\t";
1129 e->devid_start = 0;
1130 e->devid_end = amd_iommu_last_bdf;
1131 break;
1132 case ACPI_IVMD_TYPE_RANGE:
1133 s = "IVMD_TYPE_RANGE\t\t";
1134 e->devid_start = m->devid;
1135 e->devid_end = m->aux;
1136 break;
1137 }
1138 e->address_start = PAGE_ALIGN(m->range_start);
1139 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
1140 e->prot = m->flags >> 1;
1141
1142 DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
1143 " range_start: %016llx range_end: %016llx flags: %x\n", s,
1144 PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
1145 PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
1146 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
1147 e->address_start, e->address_end, m->flags);
1148
1149 list_add_tail(&e->list, &amd_iommu_unity_map);
1150
1151 return 0;
1152}
1153
1154/* iterates over all memory definitions we find in the ACPI table */
1155static int __init init_memory_definitions(struct acpi_table_header *table)
1156{
1157 u8 *p = (u8 *)table, *end = (u8 *)table;
1158 struct ivmd_header *m;
1159
1160 end += table->length;
1161 p += IVRS_HEADER_LENGTH;
1162
1163 while (p < end) {
1164 m = (struct ivmd_header *)p;
1165 if (m->flags & IVMD_FLAG_EXCL_RANGE)
1166 init_exclusion_range(m);
1167 else if (m->flags & IVMD_FLAG_UNITY_MAP)
1168 init_unity_map_range(m);
1169
1170 p += m->length;
1171 }
1172
1173 return 0;
1174}
1175
1176/*
1177 * Init the device table to not allow DMA access for devices and
1178 * suppress all page faults
1179 */
1180static void init_device_table(void)
1181{
1182 u32 devid;
1183
1184 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
1185 set_dev_entry_bit(devid, DEV_ENTRY_VALID);
1186 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
1187 }
1188}
1189
1190static void iommu_init_flags(struct amd_iommu *iommu)
1191{
1192 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
1193 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
1194 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
1195
1196 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
1197 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
1198 iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
1199
1200 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
1201 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
1202 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
1203
1204 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
1205 iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
1206 iommu_feature_disable(iommu, CONTROL_ISOC_EN);
1207
1208 /*
1209 * make IOMMU memory accesses cache coherent
1210 */
1211 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1212}
1213
1214static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1215{
1216 int i, j;
1217 u32 ioc_feature_control;
1218 struct pci_dev *pdev = NULL;
1219
1220 /* RD890 BIOSes may not have completely reconfigured the iommu */
1221 if (!is_rd890_iommu(iommu->dev))
1222 return;
1223
1224 /*
1225 * First, we need to ensure that the iommu is enabled. This is
1226 * controlled by a register in the northbridge
1227 */
1228 pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
1229
1230 if (!pdev)
1231 return;
1232
1233 /* Select Northbridge indirect register 0x75 and enable writing */
1234 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
1235 pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
1236
1237 /* Enable the iommu */
1238 if (!(ioc_feature_control & 0x1))
1239 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
1240
1241 pci_dev_put(pdev);
1242
1243 /* Restore the iommu BAR */
1244 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1245 iommu->stored_addr_lo);
1246 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
1247 iommu->stored_addr_hi);
1248
1249 /* Restore the l1 indirect regs for each of the 6 l1s */
1250 for (i = 0; i < 6; i++)
1251 for (j = 0; j < 0x12; j++)
1252 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
1253
1254 /* Restore the l2 indirect regs */
1255 for (i = 0; i < 0x83; i++)
1256 iommu_write_l2(iommu, i, iommu->stored_l2[i]);
1257
1258 /* Lock PCI setup registers */
1259 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1260 iommu->stored_addr_lo | 1);
1261}
1262
1263/*
1264 * This function finally enables all IOMMUs found in the system after
1265 * they have been initialized
1266 */
1267static void enable_iommus(void)
1268{
1269 struct amd_iommu *iommu;
1270
1271 for_each_iommu(iommu) {
1272 iommu_disable(iommu);
1273 iommu_init_flags(iommu);
1274 iommu_set_device_table(iommu);
1275 iommu_enable_command_buffer(iommu);
1276 iommu_enable_event_buffer(iommu);
1277 iommu_set_exclusion_range(iommu);
1278 iommu_init_msi(iommu);
1279 iommu_enable(iommu);
1280 iommu_flush_all_caches(iommu);
1281 }
1282}
1283
1284static void disable_iommus(void)
1285{
1286 struct amd_iommu *iommu;
1287
1288 for_each_iommu(iommu)
1289 iommu_disable(iommu);
1290}
1291
1292/*
1293 * Suspend/Resume support
1294 * disable suspend until real resume implemented
1295 */
1296
1297static void amd_iommu_resume(void)
1298{
1299 struct amd_iommu *iommu;
1300
1301 for_each_iommu(iommu)
1302 iommu_apply_resume_quirks(iommu);
1303
1304 /* re-load the hardware */
1305 enable_iommus();
1306
1307 /*
1308 * we have to flush after the IOMMUs are enabled because a
1309 * disabled IOMMU will never execute the commands we send
1310 */
1311 for_each_iommu(iommu)
1312 iommu_flush_all_caches(iommu);
1313}
1314
1315static int amd_iommu_suspend(void)
1316{
1317 /* disable IOMMUs to go out of the way for BIOS */
1318 disable_iommus();
1319
1320 return 0;
1321}
1322
1323static struct syscore_ops amd_iommu_syscore_ops = {
1324 .suspend = amd_iommu_suspend,
1325 .resume = amd_iommu_resume,
1326};
1327
1328/*
1329 * This is the core init function for AMD IOMMU hardware in the system.
1330 * This function is called from the generic x86 DMA layer initialization
1331 * code.
1332 *
1333 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
1334 * three times:
1335 *
1336 * 1 pass) Find the highest PCI device id the driver has to handle.
1337 * Upon this information the size of the data structures is
1338 * determined that needs to be allocated.
1339 *
1340 * 2 pass) Initialize the data structures just allocated with the
1341 * information in the ACPI table about available AMD IOMMUs
1342 * in the system. It also maps the PCI devices in the
1343 * system to specific IOMMUs
1344 *
1345 * 3 pass) After the basic data structures are allocated and
1346 * initialized we update them with information about memory
1347 * remapping requirements parsed out of the ACPI table in
1348 * this last pass.
1349 *
1350 * After that the hardware is initialized and ready to go. In the last
1351 * step we do some Linux specific things like registering the driver in
1352 * the dma_ops interface and initializing the suspend/resume support
1353 * functions. Finally it prints some information about AMD IOMMUs and
1354 * the driver state and enables the hardware.
1355 */
1356static int __init amd_iommu_init(void)
1357{
1358 int i, ret = 0;
1359
1360 /*
1361 * First parse ACPI tables to find the largest Bus/Dev/Func
1362 * we need to handle. Upon this information the shared data
1363 * structures for the IOMMUs in the system will be allocated
1364 */
1365 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
1366 return -ENODEV;
1367
1368 ret = amd_iommu_init_err;
1369 if (ret)
1370 goto out;
1371
1372 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
1373 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
1374 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
1375
1376 ret = -ENOMEM;
1377
1378 /* Device table - directly used by all IOMMUs */
1379 amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1380 get_order(dev_table_size));
1381 if (amd_iommu_dev_table == NULL)
1382 goto out;
1383
1384 /*
1385 * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
1386 * IOMMU see for that device
1387 */
1388 amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
1389 get_order(alias_table_size));
1390 if (amd_iommu_alias_table == NULL)
1391 goto free;
1392
1393 /* IOMMU rlookup table - find the IOMMU for a specific device */
1394 amd_iommu_rlookup_table = (void *)__get_free_pages(
1395 GFP_KERNEL | __GFP_ZERO,
1396 get_order(rlookup_table_size));
1397 if (amd_iommu_rlookup_table == NULL)
1398 goto free;
1399
1400 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
1401 GFP_KERNEL | __GFP_ZERO,
1402 get_order(MAX_DOMAIN_ID/8));
1403 if (amd_iommu_pd_alloc_bitmap == NULL)
1404 goto free;
1405
1406 /* init the device table */
1407 init_device_table();
1408
1409 /*
1410 * let all alias entries point to itself
1411 */
1412 for (i = 0; i <= amd_iommu_last_bdf; ++i)
1413 amd_iommu_alias_table[i] = i;
1414
1415 /*
1416 * never allocate domain 0 because its used as the non-allocated and
1417 * error value placeholder
1418 */
1419 amd_iommu_pd_alloc_bitmap[0] = 1;
1420
1421 spin_lock_init(&amd_iommu_pd_lock);
1422
1423 /*
1424 * now the data structures are allocated and basically initialized
1425 * start the real acpi table scan
1426 */
1427 ret = -ENODEV;
1428 if (acpi_table_parse("IVRS", init_iommu_all) != 0)
1429 goto free;
1430
1431 if (amd_iommu_init_err) {
1432 ret = amd_iommu_init_err;
1433 goto free;
1434 }
1435
1436 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
1437 goto free;
1438
1439 if (amd_iommu_init_err) {
1440 ret = amd_iommu_init_err;
1441 goto free;
1442 }
1443
1444 ret = amd_iommu_init_devices();
1445 if (ret)
1446 goto free;
1447
1448 enable_iommus();
1449
1450 if (iommu_pass_through)
1451 ret = amd_iommu_init_passthrough();
1452 else
1453 ret = amd_iommu_init_dma_ops();
1454
1455 if (ret)
1456 goto free_disable;
1457
1458 amd_iommu_init_api();
1459
1460 amd_iommu_init_notifier();
1461
1462 register_syscore_ops(&amd_iommu_syscore_ops);
1463
1464 if (iommu_pass_through)
1465 goto out;
1466
1467 if (amd_iommu_unmap_flush)
1468 printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
1469 else
1470 printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
1471
1472 x86_platform.iommu_shutdown = disable_iommus;
1473out:
1474 return ret;
1475
1476free_disable:
1477 disable_iommus();
1478
1479free:
1480 amd_iommu_uninit_devices();
1481
1482 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1483 get_order(MAX_DOMAIN_ID/8));
1484
1485 free_pages((unsigned long)amd_iommu_rlookup_table,
1486 get_order(rlookup_table_size));
1487
1488 free_pages((unsigned long)amd_iommu_alias_table,
1489 get_order(alias_table_size));
1490
1491 free_pages((unsigned long)amd_iommu_dev_table,
1492 get_order(dev_table_size));
1493
1494 free_iommu_all();
1495
1496 free_unity_maps();
1497
1498#ifdef CONFIG_GART_IOMMU
1499 /*
1500 * We failed to initialize the AMD IOMMU - try fallback to GART
1501 * if possible.
1502 */
1503 gart_iommu_init();
1504
1505#endif
1506
1507 goto out;
1508}
1509
1510/****************************************************************************
1511 *
1512 * Early detect code. This code runs at IOMMU detection time in the DMA
1513 * layer. It just looks if there is an IVRS ACPI table to detect AMD
1514 * IOMMUs
1515 *
1516 ****************************************************************************/
1517static int __init early_amd_iommu_detect(struct acpi_table_header *table)
1518{
1519 return 0;
1520}
1521
1522int __init amd_iommu_detect(void)
1523{
1524 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1525 return -ENODEV;
1526
1527 if (amd_iommu_disabled)
1528 return -ENODEV;
1529
1530 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
1531 iommu_detected = 1;
1532 amd_iommu_detected = 1;
1533 x86_init.iommu.iommu_init = amd_iommu_init;
1534
1535 /* Make sure ACS will be enabled */
1536 pci_request_acs();
1537 return 1;
1538 }
1539 return -ENODEV;
1540}
1541
1542/****************************************************************************
1543 *
1544 * Parsing functions for the AMD IOMMU specific kernel command line
1545 * options.
1546 *
1547 ****************************************************************************/
1548
1549static int __init parse_amd_iommu_dump(char *str)
1550{
1551 amd_iommu_dump = true;
1552
1553 return 1;
1554}
1555
1556static int __init parse_amd_iommu_options(char *str)
1557{
1558 for (; *str; ++str) {
1559 if (strncmp(str, "fullflush", 9) == 0)
1560 amd_iommu_unmap_flush = true;
1561 if (strncmp(str, "off", 3) == 0)
1562 amd_iommu_disabled = true;
1563 }
1564
1565 return 1;
1566}
1567
1568__setup("amd_iommu_dump", parse_amd_iommu_dump);
1569__setup("amd_iommu=", parse_amd_iommu_options);
1570
1571IOMMU_INIT_FINISH(amd_iommu_detect,
1572 gart_iommu_hole_init,
1573 0,
1574 0);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
new file mode 100644
index 000000000000..7ffaa64410b0
--- /dev/null
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -0,0 +1,54 @@
1/*
2 * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
20#define _ASM_X86_AMD_IOMMU_PROTO_H
21
22#include "amd_iommu_types.h"
23
24extern int amd_iommu_init_dma_ops(void);
25extern int amd_iommu_init_passthrough(void);
26extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
27extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
28extern void amd_iommu_apply_erratum_63(u16 devid);
29extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
30extern int amd_iommu_init_devices(void);
31extern void amd_iommu_uninit_devices(void);
32extern void amd_iommu_init_notifier(void);
33extern void amd_iommu_init_api(void);
34#ifndef CONFIG_AMD_IOMMU_STATS
35
36static inline void amd_iommu_stats_init(void) { }
37
38#endif /* !CONFIG_AMD_IOMMU_STATS */
39
40static inline bool is_rd890_iommu(struct pci_dev *pdev)
41{
42 return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
43 (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
44}
45
46static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
47{
48 if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
49 return false;
50
51 return !!(iommu->features & f);
52}
53
54#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
new file mode 100644
index 000000000000..5b9c5075e81a
--- /dev/null
+++ b/drivers/iommu/amd_iommu_types.h
@@ -0,0 +1,585 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#ifndef _ASM_X86_AMD_IOMMU_TYPES_H
21#define _ASM_X86_AMD_IOMMU_TYPES_H
22
23#include <linux/types.h>
24#include <linux/mutex.h>
25#include <linux/list.h>
26#include <linux/spinlock.h>
27
28/*
29 * Maximum number of IOMMUs supported
30 */
31#define MAX_IOMMUS 32
32
33/*
34 * some size calculation constants
35 */
36#define DEV_TABLE_ENTRY_SIZE 32
37#define ALIAS_TABLE_ENTRY_SIZE 2
38#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
39
40/* Length of the MMIO region for the AMD IOMMU */
41#define MMIO_REGION_LENGTH 0x4000
42
43/* Capability offsets used by the driver */
44#define MMIO_CAP_HDR_OFFSET 0x00
45#define MMIO_RANGE_OFFSET 0x0c
46#define MMIO_MISC_OFFSET 0x10
47
48/* Masks, shifts and macros to parse the device range capability */
49#define MMIO_RANGE_LD_MASK 0xff000000
50#define MMIO_RANGE_FD_MASK 0x00ff0000
51#define MMIO_RANGE_BUS_MASK 0x0000ff00
52#define MMIO_RANGE_LD_SHIFT 24
53#define MMIO_RANGE_FD_SHIFT 16
54#define MMIO_RANGE_BUS_SHIFT 8
55#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT)
56#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT)
57#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
58#define MMIO_MSI_NUM(x) ((x) & 0x1f)
59
60/* Flag masks for the AMD IOMMU exclusion range */
61#define MMIO_EXCL_ENABLE_MASK 0x01ULL
62#define MMIO_EXCL_ALLOW_MASK 0x02ULL
63
64/* Used offsets into the MMIO space */
65#define MMIO_DEV_TABLE_OFFSET 0x0000
66#define MMIO_CMD_BUF_OFFSET 0x0008
67#define MMIO_EVT_BUF_OFFSET 0x0010
68#define MMIO_CONTROL_OFFSET 0x0018
69#define MMIO_EXCL_BASE_OFFSET 0x0020
70#define MMIO_EXCL_LIMIT_OFFSET 0x0028
71#define MMIO_EXT_FEATURES 0x0030
72#define MMIO_CMD_HEAD_OFFSET 0x2000
73#define MMIO_CMD_TAIL_OFFSET 0x2008
74#define MMIO_EVT_HEAD_OFFSET 0x2010
75#define MMIO_EVT_TAIL_OFFSET 0x2018
76#define MMIO_STATUS_OFFSET 0x2020
77
78
79/* Extended Feature Bits */
80#define FEATURE_PREFETCH (1ULL<<0)
81#define FEATURE_PPR (1ULL<<1)
82#define FEATURE_X2APIC (1ULL<<2)
83#define FEATURE_NX (1ULL<<3)
84#define FEATURE_GT (1ULL<<4)
85#define FEATURE_IA (1ULL<<6)
86#define FEATURE_GA (1ULL<<7)
87#define FEATURE_HE (1ULL<<8)
88#define FEATURE_PC (1ULL<<9)
89
90/* MMIO status bits */
91#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
92
93/* event logging constants */
94#define EVENT_ENTRY_SIZE 0x10
95#define EVENT_TYPE_SHIFT 28
96#define EVENT_TYPE_MASK 0xf
97#define EVENT_TYPE_ILL_DEV 0x1
98#define EVENT_TYPE_IO_FAULT 0x2
99#define EVENT_TYPE_DEV_TAB_ERR 0x3
100#define EVENT_TYPE_PAGE_TAB_ERR 0x4
101#define EVENT_TYPE_ILL_CMD 0x5
102#define EVENT_TYPE_CMD_HARD_ERR 0x6
103#define EVENT_TYPE_IOTLB_INV_TO 0x7
104#define EVENT_TYPE_INV_DEV_REQ 0x8
105#define EVENT_DEVID_MASK 0xffff
106#define EVENT_DEVID_SHIFT 0
107#define EVENT_DOMID_MASK 0xffff
108#define EVENT_DOMID_SHIFT 0
109#define EVENT_FLAGS_MASK 0xfff
110#define EVENT_FLAGS_SHIFT 0x10
111
112/* feature control bits */
113#define CONTROL_IOMMU_EN 0x00ULL
114#define CONTROL_HT_TUN_EN 0x01ULL
115#define CONTROL_EVT_LOG_EN 0x02ULL
116#define CONTROL_EVT_INT_EN 0x03ULL
117#define CONTROL_COMWAIT_EN 0x04ULL
118#define CONTROL_PASSPW_EN 0x08ULL
119#define CONTROL_RESPASSPW_EN 0x09ULL
120#define CONTROL_COHERENT_EN 0x0aULL
121#define CONTROL_ISOC_EN 0x0bULL
122#define CONTROL_CMDBUF_EN 0x0cULL
123#define CONTROL_PPFLOG_EN 0x0dULL
124#define CONTROL_PPFINT_EN 0x0eULL
125
126/* command specific defines */
127#define CMD_COMPL_WAIT 0x01
128#define CMD_INV_DEV_ENTRY 0x02
129#define CMD_INV_IOMMU_PAGES 0x03
130#define CMD_INV_IOTLB_PAGES 0x04
131#define CMD_INV_ALL 0x08
132
133#define CMD_COMPL_WAIT_STORE_MASK 0x01
134#define CMD_COMPL_WAIT_INT_MASK 0x02
135#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
136#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
137
138#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
139
140/* macros and definitions for device table entries */
141#define DEV_ENTRY_VALID 0x00
142#define DEV_ENTRY_TRANSLATION 0x01
143#define DEV_ENTRY_IR 0x3d
144#define DEV_ENTRY_IW 0x3e
145#define DEV_ENTRY_NO_PAGE_FAULT 0x62
146#define DEV_ENTRY_EX 0x67
147#define DEV_ENTRY_SYSMGT1 0x68
148#define DEV_ENTRY_SYSMGT2 0x69
149#define DEV_ENTRY_INIT_PASS 0xb8
150#define DEV_ENTRY_EINT_PASS 0xb9
151#define DEV_ENTRY_NMI_PASS 0xba
152#define DEV_ENTRY_LINT0_PASS 0xbe
153#define DEV_ENTRY_LINT1_PASS 0xbf
154#define DEV_ENTRY_MODE_MASK 0x07
155#define DEV_ENTRY_MODE_SHIFT 0x09
156
157/* constants to configure the command buffer */
158#define CMD_BUFFER_SIZE 8192
159#define CMD_BUFFER_UNINITIALIZED 1
160#define CMD_BUFFER_ENTRIES 512
161#define MMIO_CMD_SIZE_SHIFT 56
162#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
163
164/* constants for event buffer handling */
165#define EVT_BUFFER_SIZE 8192 /* 512 entries */
166#define EVT_LEN_MASK (0x9ULL << 56)
167
168#define PAGE_MODE_NONE 0x00
169#define PAGE_MODE_1_LEVEL 0x01
170#define PAGE_MODE_2_LEVEL 0x02
171#define PAGE_MODE_3_LEVEL 0x03
172#define PAGE_MODE_4_LEVEL 0x04
173#define PAGE_MODE_5_LEVEL 0x05
174#define PAGE_MODE_6_LEVEL 0x06
175
176#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
177#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
178 ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
179 (0xffffffffffffffffULL))
180#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
181#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL)
182#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
183 IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
184#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL)
185
186#define PM_MAP_4k 0
187#define PM_ADDR_MASK 0x000ffffffffff000ULL
188#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \
189 (~((1ULL << (12 + ((lvl) * 9))) - 1)))
190#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr))
191
192/*
193 * Returns the page table level to use for a given page size
194 * Pagesize is expected to be a power-of-two
195 */
196#define PAGE_SIZE_LEVEL(pagesize) \
197 ((__ffs(pagesize) - 12) / 9)
198/*
199 * Returns the number of ptes to use for a given page size
200 * Pagesize is expected to be a power-of-two
201 */
202#define PAGE_SIZE_PTE_COUNT(pagesize) \
203 (1ULL << ((__ffs(pagesize) - 12) % 9))
204
205/*
206 * Aligns a given io-virtual address to a given page size
207 * Pagesize is expected to be a power-of-two
208 */
209#define PAGE_SIZE_ALIGN(address, pagesize) \
210 ((address) & ~((pagesize) - 1))
211/*
212 * Creates an IOMMU PTE for an address an a given pagesize
213 * The PTE has no permission bits set
214 * Pagesize is expected to be a power-of-two larger than 4096
215 */
216#define PAGE_SIZE_PTE(address, pagesize) \
217 (((address) | ((pagesize) - 1)) & \
218 (~(pagesize >> 1)) & PM_ADDR_MASK)
219
220/*
221 * Takes a PTE value with mode=0x07 and returns the page size it maps
222 */
223#define PTE_PAGE_SIZE(pte) \
224 (1ULL << (1 + ffz(((pte) | 0xfffULL))))
225
226#define IOMMU_PTE_P (1ULL << 0)
227#define IOMMU_PTE_TV (1ULL << 1)
228#define IOMMU_PTE_U (1ULL << 59)
229#define IOMMU_PTE_FC (1ULL << 60)
230#define IOMMU_PTE_IR (1ULL << 61)
231#define IOMMU_PTE_IW (1ULL << 62)
232
233#define DTE_FLAG_IOTLB 0x01
234
235#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
236#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
237#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
238#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
239
240#define IOMMU_PROT_MASK 0x03
241#define IOMMU_PROT_IR 0x01
242#define IOMMU_PROT_IW 0x02
243
244/* IOMMU capabilities */
245#define IOMMU_CAP_IOTLB 24
246#define IOMMU_CAP_NPCACHE 26
247#define IOMMU_CAP_EFR 27
248
249#define MAX_DOMAIN_ID 65536
250
251/* FIXME: move this macro to <linux/pci.h> */
252#define PCI_BUS(x) (((x) >> 8) & 0xff)
253
254/* Protection domain flags */
255#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
256#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
257 domain for an IOMMU */
258#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
259 translation */
260
261extern bool amd_iommu_dump;
262#define DUMP_printk(format, arg...) \
263 do { \
264 if (amd_iommu_dump) \
265 printk(KERN_INFO "AMD-Vi: " format, ## arg); \
266 } while(0);
267
268/* global flag if IOMMUs cache non-present entries */
269extern bool amd_iommu_np_cache;
270/* Only true if all IOMMUs support device IOTLBs */
271extern bool amd_iommu_iotlb_sup;
272
273/*
274 * Make iterating over all IOMMUs easier
275 */
276#define for_each_iommu(iommu) \
277 list_for_each_entry((iommu), &amd_iommu_list, list)
278#define for_each_iommu_safe(iommu, next) \
279 list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
280
281#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
282#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
283#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
284#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
285#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
286#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
287
288/*
289 * This structure contains generic data for IOMMU protection domains
290 * independent of their use.
291 */
292struct protection_domain {
293 struct list_head list; /* for list of all protection domains */
294 struct list_head dev_list; /* List of all devices in this domain */
295 spinlock_t lock; /* mostly used to lock the page table*/
296 struct mutex api_lock; /* protect page tables in the iommu-api path */
297 u16 id; /* the domain id written to the device table */
298 int mode; /* paging mode (0-6 levels) */
299 u64 *pt_root; /* page table root pointer */
300 unsigned long flags; /* flags to find out type of domain */
301 bool updated; /* complete domain flush required */
302 unsigned dev_cnt; /* devices assigned to this domain */
303 unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
304 void *priv; /* private data */
305
306};
307
308/*
309 * This struct contains device specific data for the IOMMU
310 */
311struct iommu_dev_data {
312 struct list_head list; /* For domain->dev_list */
313 struct list_head dev_data_list; /* For global dev_data_list */
314 struct iommu_dev_data *alias_data;/* The alias dev_data */
315 struct protection_domain *domain; /* Domain the device is bound to */
316 atomic_t bind; /* Domain attach reverent count */
317 u16 devid; /* PCI Device ID */
318 struct {
319 bool enabled;
320 int qdep;
321 } ats; /* ATS state */
322};
323
324/*
325 * For dynamic growth the aperture size is split into ranges of 128MB of
326 * DMA address space each. This struct represents one such range.
327 */
328struct aperture_range {
329
330 /* address allocation bitmap */
331 unsigned long *bitmap;
332
333 /*
334 * Array of PTE pages for the aperture. In this array we save all the
335 * leaf pages of the domain page table used for the aperture. This way
336 * we don't need to walk the page table to find a specific PTE. We can
337 * just calculate its address in constant time.
338 */
339 u64 *pte_pages[64];
340
341 unsigned long offset;
342};
343
344/*
345 * Data container for a dma_ops specific protection domain
346 */
347struct dma_ops_domain {
348 struct list_head list;
349
350 /* generic protection domain information */
351 struct protection_domain domain;
352
353 /* size of the aperture for the mappings */
354 unsigned long aperture_size;
355
356 /* address we start to search for free addresses */
357 unsigned long next_address;
358
359 /* address space relevant data */
360 struct aperture_range *aperture[APERTURE_MAX_RANGES];
361
362 /* This will be set to true when TLB needs to be flushed */
363 bool need_flush;
364
365 /*
366 * if this is a preallocated domain, keep the device for which it was
367 * preallocated in this variable
368 */
369 u16 target_dev;
370};
371
372/*
373 * Structure where we save information about one hardware AMD IOMMU in the
374 * system.
375 */
376struct amd_iommu {
377 struct list_head list;
378
379 /* Index within the IOMMU array */
380 int index;
381
382 /* locks the accesses to the hardware */
383 spinlock_t lock;
384
385 /* Pointer to PCI device of this IOMMU */
386 struct pci_dev *dev;
387
388 /* physical address of MMIO space */
389 u64 mmio_phys;
390 /* virtual address of MMIO space */
391 u8 *mmio_base;
392
393 /* capabilities of that IOMMU read from ACPI */
394 u32 cap;
395
396 /* flags read from acpi table */
397 u8 acpi_flags;
398
399 /* Extended features */
400 u64 features;
401
402 /*
403 * Capability pointer. There could be more than one IOMMU per PCI
404 * device function if there are more than one AMD IOMMU capability
405 * pointers.
406 */
407 u16 cap_ptr;
408
409 /* pci domain of this IOMMU */
410 u16 pci_seg;
411
412 /* first device this IOMMU handles. read from PCI */
413 u16 first_device;
414 /* last device this IOMMU handles. read from PCI */
415 u16 last_device;
416
417 /* start of exclusion range of that IOMMU */
418 u64 exclusion_start;
419 /* length of exclusion range of that IOMMU */
420 u64 exclusion_length;
421
422 /* command buffer virtual address */
423 u8 *cmd_buf;
424 /* size of command buffer */
425 u32 cmd_buf_size;
426
427 /* size of event buffer */
428 u32 evt_buf_size;
429 /* event buffer virtual address */
430 u8 *evt_buf;
431 /* MSI number for event interrupt */
432 u16 evt_msi_num;
433
434 /* true if interrupts for this IOMMU are already enabled */
435 bool int_enabled;
436
437 /* if one, we need to send a completion wait command */
438 bool need_sync;
439
440 /* default dma_ops domain for that IOMMU */
441 struct dma_ops_domain *default_dom;
442
443 /*
444 * We can't rely on the BIOS to restore all values on reinit, so we
445 * need to stash them
446 */
447
448 /* The iommu BAR */
449 u32 stored_addr_lo;
450 u32 stored_addr_hi;
451
452 /*
453 * Each iommu has 6 l1s, each of which is documented as having 0x12
454 * registers
455 */
456 u32 stored_l1[6][0x12];
457
458 /* The l2 indirect registers */
459 u32 stored_l2[0x83];
460};
461
462/*
463 * List with all IOMMUs in the system. This list is not locked because it is
464 * only written and read at driver initialization or suspend time
465 */
466extern struct list_head amd_iommu_list;
467
468/*
469 * Array with pointers to each IOMMU struct
470 * The indices are referenced in the protection domains
471 */
472extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
473
474/* Number of IOMMUs present in the system */
475extern int amd_iommus_present;
476
477/*
478 * Declarations for the global list of all protection domains
479 */
480extern spinlock_t amd_iommu_pd_lock;
481extern struct list_head amd_iommu_pd_list;
482
483/*
484 * Structure defining one entry in the device table
485 */
486struct dev_table_entry {
487 u32 data[8];
488};
489
490/*
491 * One entry for unity mappings parsed out of the ACPI table.
492 */
493struct unity_map_entry {
494 struct list_head list;
495
496 /* starting device id this entry is used for (including) */
497 u16 devid_start;
498 /* end device id this entry is used for (including) */
499 u16 devid_end;
500
501 /* start address to unity map (including) */
502 u64 address_start;
503 /* end address to unity map (including) */
504 u64 address_end;
505
506 /* required protection */
507 int prot;
508};
509
510/*
511 * List of all unity mappings. It is not locked because as runtime it is only
512 * read. It is created at ACPI table parsing time.
513 */
514extern struct list_head amd_iommu_unity_map;
515
516/*
517 * Data structures for device handling
518 */
519
520/*
521 * Device table used by hardware. Read and write accesses by software are
522 * locked with the amd_iommu_pd_table lock.
523 */
524extern struct dev_table_entry *amd_iommu_dev_table;
525
526/*
527 * Alias table to find requestor ids to device ids. Not locked because only
528 * read on runtime.
529 */
530extern u16 *amd_iommu_alias_table;
531
532/*
533 * Reverse lookup table to find the IOMMU which translates a specific device.
534 */
535extern struct amd_iommu **amd_iommu_rlookup_table;
536
537/* size of the dma_ops aperture as power of 2 */
538extern unsigned amd_iommu_aperture_order;
539
540/* largest PCI device id we expect translation requests for */
541extern u16 amd_iommu_last_bdf;
542
543/* allocation bitmap for domain ids */
544extern unsigned long *amd_iommu_pd_alloc_bitmap;
545
546/*
547 * If true, the addresses will be flushed on unmap time, not when
548 * they are reused
549 */
550extern bool amd_iommu_unmap_flush;
551
552/* takes bus and device/function and returns the device id
553 * FIXME: should that be in generic PCI code? */
554static inline u16 calc_devid(u8 bus, u8 devfn)
555{
556 return (((u16)bus) << 8) | devfn;
557}
558
559#ifdef CONFIG_AMD_IOMMU_STATS
560
561struct __iommu_counter {
562 char *name;
563 struct dentry *dent;
564 u64 value;
565};
566
567#define DECLARE_STATS_COUNTER(nm) \
568 static struct __iommu_counter nm = { \
569 .name = #nm, \
570 }
571
572#define INC_STATS_COUNTER(name) name.value += 1
573#define ADD_STATS_COUNTER(name, x) name.value += (x)
574#define SUB_STATS_COUNTER(name, x) name.value -= (x)
575
576#else /* CONFIG_AMD_IOMMU_STATS */
577
578#define DECLARE_STATS_COUNTER(name)
579#define INC_STATS_COUNTER(name)
580#define ADD_STATS_COUNTER(name, x)
581#define SUB_STATS_COUNTER(name, x)
582
583#endif /* CONFIG_AMD_IOMMU_STATS */
584
585#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
new file mode 100644
index 000000000000..35c1e17fce1d
--- /dev/null
+++ b/drivers/iommu/dmar.c
@@ -0,0 +1,1311 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 *
22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables.
25 *
26 * These routines are used by both DMA-remapping and Interrupt-remapping
27 */
28
29#include <linux/pci.h>
30#include <linux/dmar.h>
31#include <linux/iova.h>
32#include <linux/intel-iommu.h>
33#include <linux/timer.h>
34#include <linux/irq.h>
35#include <linux/interrupt.h>
36#include <linux/tboot.h>
37#include <linux/dmi.h>
38#include <linux/slab.h>
39#include <asm/iommu_table.h>
40
41#define PREFIX "DMAR: "
42
43/* No locks are needed as DMA remapping hardware unit
44 * list is constructed at boot time and hotplug of
45 * these units are not supported by the architecture.
46 */
47LIST_HEAD(dmar_drhd_units);
48
49struct acpi_table_header * __initdata dmar_tbl;
50static acpi_size dmar_tbl_size;
51
52static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53{
54 /*
55 * add INCLUDE_ALL at the tail, so scan the list will find it at
56 * the very end.
57 */
58 if (drhd->include_all)
59 list_add_tail(&drhd->list, &dmar_drhd_units);
60 else
61 list_add(&drhd->list, &dmar_drhd_units);
62}
63
64static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
65 struct pci_dev **dev, u16 segment)
66{
67 struct pci_bus *bus;
68 struct pci_dev *pdev = NULL;
69 struct acpi_dmar_pci_path *path;
70 int count;
71
72 bus = pci_find_bus(segment, scope->bus);
73 path = (struct acpi_dmar_pci_path *)(scope + 1);
74 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
75 / sizeof(struct acpi_dmar_pci_path);
76
77 while (count) {
78 if (pdev)
79 pci_dev_put(pdev);
80 /*
81 * Some BIOSes list non-exist devices in DMAR table, just
82 * ignore it
83 */
84 if (!bus) {
85 printk(KERN_WARNING
86 PREFIX "Device scope bus [%d] not found\n",
87 scope->bus);
88 break;
89 }
90 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
91 if (!pdev) {
92 printk(KERN_WARNING PREFIX
93 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
94 segment, bus->number, path->dev, path->fn);
95 break;
96 }
97 path ++;
98 count --;
99 bus = pdev->subordinate;
100 }
101 if (!pdev) {
102 printk(KERN_WARNING PREFIX
103 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
104 segment, scope->bus, path->dev, path->fn);
105 *dev = NULL;
106 return 0;
107 }
108 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
109 pdev->subordinate) || (scope->entry_type == \
110 ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
111 pci_dev_put(pdev);
112 printk(KERN_WARNING PREFIX
113 "Device scope type does not match for %s\n",
114 pci_name(pdev));
115 return -EINVAL;
116 }
117 *dev = pdev;
118 return 0;
119}
120
121int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
122 struct pci_dev ***devices, u16 segment)
123{
124 struct acpi_dmar_device_scope *scope;
125 void * tmp = start;
126 int index;
127 int ret;
128
129 *cnt = 0;
130 while (start < end) {
131 scope = start;
132 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
133 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
134 (*cnt)++;
135 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
136 printk(KERN_WARNING PREFIX
137 "Unsupported device scope\n");
138 }
139 start += scope->length;
140 }
141 if (*cnt == 0)
142 return 0;
143
144 *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
145 if (!*devices)
146 return -ENOMEM;
147
148 start = tmp;
149 index = 0;
150 while (start < end) {
151 scope = start;
152 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
153 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
154 ret = dmar_parse_one_dev_scope(scope,
155 &(*devices)[index], segment);
156 if (ret) {
157 kfree(*devices);
158 return ret;
159 }
160 index ++;
161 }
162 start += scope->length;
163 }
164
165 return 0;
166}
167
168/**
169 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
170 * structure which uniquely represent one DMA remapping hardware unit
171 * present in the platform
172 */
173static int __init
174dmar_parse_one_drhd(struct acpi_dmar_header *header)
175{
176 struct acpi_dmar_hardware_unit *drhd;
177 struct dmar_drhd_unit *dmaru;
178 int ret = 0;
179
180 drhd = (struct acpi_dmar_hardware_unit *)header;
181 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
182 if (!dmaru)
183 return -ENOMEM;
184
185 dmaru->hdr = header;
186 dmaru->reg_base_addr = drhd->address;
187 dmaru->segment = drhd->segment;
188 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
189
190 ret = alloc_iommu(dmaru);
191 if (ret) {
192 kfree(dmaru);
193 return ret;
194 }
195 dmar_register_drhd_unit(dmaru);
196 return 0;
197}
198
199static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
200{
201 struct acpi_dmar_hardware_unit *drhd;
202 int ret = 0;
203
204 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
205
206 if (dmaru->include_all)
207 return 0;
208
209 ret = dmar_parse_dev_scope((void *)(drhd + 1),
210 ((void *)drhd) + drhd->header.length,
211 &dmaru->devices_cnt, &dmaru->devices,
212 drhd->segment);
213 if (ret) {
214 list_del(&dmaru->list);
215 kfree(dmaru);
216 }
217 return ret;
218}
219
220#ifdef CONFIG_ACPI_NUMA
221static int __init
222dmar_parse_one_rhsa(struct acpi_dmar_header *header)
223{
224 struct acpi_dmar_rhsa *rhsa;
225 struct dmar_drhd_unit *drhd;
226
227 rhsa = (struct acpi_dmar_rhsa *)header;
228 for_each_drhd_unit(drhd) {
229 if (drhd->reg_base_addr == rhsa->base_address) {
230 int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
231
232 if (!node_online(node))
233 node = -1;
234 drhd->iommu->node = node;
235 return 0;
236 }
237 }
238 WARN_TAINT(
239 1, TAINT_FIRMWARE_WORKAROUND,
240 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
241 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
242 drhd->reg_base_addr,
243 dmi_get_system_info(DMI_BIOS_VENDOR),
244 dmi_get_system_info(DMI_BIOS_VERSION),
245 dmi_get_system_info(DMI_PRODUCT_VERSION));
246
247 return 0;
248}
249#endif
250
251static void __init
252dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
253{
254 struct acpi_dmar_hardware_unit *drhd;
255 struct acpi_dmar_reserved_memory *rmrr;
256 struct acpi_dmar_atsr *atsr;
257 struct acpi_dmar_rhsa *rhsa;
258
259 switch (header->type) {
260 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
261 drhd = container_of(header, struct acpi_dmar_hardware_unit,
262 header);
263 printk (KERN_INFO PREFIX
264 "DRHD base: %#016Lx flags: %#x\n",
265 (unsigned long long)drhd->address, drhd->flags);
266 break;
267 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
268 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
269 header);
270 printk (KERN_INFO PREFIX
271 "RMRR base: %#016Lx end: %#016Lx\n",
272 (unsigned long long)rmrr->base_address,
273 (unsigned long long)rmrr->end_address);
274 break;
275 case ACPI_DMAR_TYPE_ATSR:
276 atsr = container_of(header, struct acpi_dmar_atsr, header);
277 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
278 break;
279 case ACPI_DMAR_HARDWARE_AFFINITY:
280 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
281 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
282 (unsigned long long)rhsa->base_address,
283 rhsa->proximity_domain);
284 break;
285 }
286}
287
288/**
289 * dmar_table_detect - checks to see if the platform supports DMAR devices
290 */
291static int __init dmar_table_detect(void)
292{
293 acpi_status status = AE_OK;
294
295 /* if we could find DMAR table, then there are DMAR devices */
296 status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
297 (struct acpi_table_header **)&dmar_tbl,
298 &dmar_tbl_size);
299
300 if (ACPI_SUCCESS(status) && !dmar_tbl) {
301 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
302 status = AE_NOT_FOUND;
303 }
304
305 return (ACPI_SUCCESS(status) ? 1 : 0);
306}
307
308/**
309 * parse_dmar_table - parses the DMA reporting table
310 */
311static int __init
312parse_dmar_table(void)
313{
314 struct acpi_table_dmar *dmar;
315 struct acpi_dmar_header *entry_header;
316 int ret = 0;
317
318 /*
319 * Do it again, earlier dmar_tbl mapping could be mapped with
320 * fixed map.
321 */
322 dmar_table_detect();
323
324 /*
325 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
326 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
327 */
328 dmar_tbl = tboot_get_dmar_table(dmar_tbl);
329
330 dmar = (struct acpi_table_dmar *)dmar_tbl;
331 if (!dmar)
332 return -ENODEV;
333
334 if (dmar->width < PAGE_SHIFT - 1) {
335 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
336 return -EINVAL;
337 }
338
339 printk (KERN_INFO PREFIX "Host address width %d\n",
340 dmar->width + 1);
341
342 entry_header = (struct acpi_dmar_header *)(dmar + 1);
343 while (((unsigned long)entry_header) <
344 (((unsigned long)dmar) + dmar_tbl->length)) {
345 /* Avoid looping forever on bad ACPI tables */
346 if (entry_header->length == 0) {
347 printk(KERN_WARNING PREFIX
348 "Invalid 0-length structure\n");
349 ret = -EINVAL;
350 break;
351 }
352
353 dmar_table_print_dmar_entry(entry_header);
354
355 switch (entry_header->type) {
356 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
357 ret = dmar_parse_one_drhd(entry_header);
358 break;
359 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
360 ret = dmar_parse_one_rmrr(entry_header);
361 break;
362 case ACPI_DMAR_TYPE_ATSR:
363 ret = dmar_parse_one_atsr(entry_header);
364 break;
365 case ACPI_DMAR_HARDWARE_AFFINITY:
366#ifdef CONFIG_ACPI_NUMA
367 ret = dmar_parse_one_rhsa(entry_header);
368#endif
369 break;
370 default:
371 printk(KERN_WARNING PREFIX
372 "Unknown DMAR structure type %d\n",
373 entry_header->type);
374 ret = 0; /* for forward compatibility */
375 break;
376 }
377 if (ret)
378 break;
379
380 entry_header = ((void *)entry_header + entry_header->length);
381 }
382 return ret;
383}
384
385static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
386 struct pci_dev *dev)
387{
388 int index;
389
390 while (dev) {
391 for (index = 0; index < cnt; index++)
392 if (dev == devices[index])
393 return 1;
394
395 /* Check our parent */
396 dev = dev->bus->self;
397 }
398
399 return 0;
400}
401
402struct dmar_drhd_unit *
403dmar_find_matched_drhd_unit(struct pci_dev *dev)
404{
405 struct dmar_drhd_unit *dmaru = NULL;
406 struct acpi_dmar_hardware_unit *drhd;
407
408 dev = pci_physfn(dev);
409
410 list_for_each_entry(dmaru, &dmar_drhd_units, list) {
411 drhd = container_of(dmaru->hdr,
412 struct acpi_dmar_hardware_unit,
413 header);
414
415 if (dmaru->include_all &&
416 drhd->segment == pci_domain_nr(dev->bus))
417 return dmaru;
418
419 if (dmar_pci_device_match(dmaru->devices,
420 dmaru->devices_cnt, dev))
421 return dmaru;
422 }
423
424 return NULL;
425}
426
427int __init dmar_dev_scope_init(void)
428{
429 static int dmar_dev_scope_initialized;
430 struct dmar_drhd_unit *drhd, *drhd_n;
431 int ret = -ENODEV;
432
433 if (dmar_dev_scope_initialized)
434 return dmar_dev_scope_initialized;
435
436 if (list_empty(&dmar_drhd_units))
437 goto fail;
438
439 list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
440 ret = dmar_parse_dev(drhd);
441 if (ret)
442 goto fail;
443 }
444
445 ret = dmar_parse_rmrr_atsr_dev();
446 if (ret)
447 goto fail;
448
449 dmar_dev_scope_initialized = 1;
450 return 0;
451
452fail:
453 dmar_dev_scope_initialized = ret;
454 return ret;
455}
456
457
458int __init dmar_table_init(void)
459{
460 static int dmar_table_initialized;
461 int ret;
462
463 if (dmar_table_initialized)
464 return 0;
465
466 dmar_table_initialized = 1;
467
468 ret = parse_dmar_table();
469 if (ret) {
470 if (ret != -ENODEV)
471 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
472 return ret;
473 }
474
475 if (list_empty(&dmar_drhd_units)) {
476 printk(KERN_INFO PREFIX "No DMAR devices found\n");
477 return -ENODEV;
478 }
479
480 return 0;
481}
482
483static void warn_invalid_dmar(u64 addr, const char *message)
484{
485 WARN_TAINT_ONCE(
486 1, TAINT_FIRMWARE_WORKAROUND,
487 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
488 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
489 addr, message,
490 dmi_get_system_info(DMI_BIOS_VENDOR),
491 dmi_get_system_info(DMI_BIOS_VERSION),
492 dmi_get_system_info(DMI_PRODUCT_VERSION));
493}
494
495int __init check_zero_address(void)
496{
497 struct acpi_table_dmar *dmar;
498 struct acpi_dmar_header *entry_header;
499 struct acpi_dmar_hardware_unit *drhd;
500
501 dmar = (struct acpi_table_dmar *)dmar_tbl;
502 entry_header = (struct acpi_dmar_header *)(dmar + 1);
503
504 while (((unsigned long)entry_header) <
505 (((unsigned long)dmar) + dmar_tbl->length)) {
506 /* Avoid looping forever on bad ACPI tables */
507 if (entry_header->length == 0) {
508 printk(KERN_WARNING PREFIX
509 "Invalid 0-length structure\n");
510 return 0;
511 }
512
513 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
514 void __iomem *addr;
515 u64 cap, ecap;
516
517 drhd = (void *)entry_header;
518 if (!drhd->address) {
519 warn_invalid_dmar(0, "");
520 goto failed;
521 }
522
523 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
524 if (!addr ) {
525 printk("IOMMU: can't validate: %llx\n", drhd->address);
526 goto failed;
527 }
528 cap = dmar_readq(addr + DMAR_CAP_REG);
529 ecap = dmar_readq(addr + DMAR_ECAP_REG);
530 early_iounmap(addr, VTD_PAGE_SIZE);
531 if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
532 warn_invalid_dmar(drhd->address,
533 " returns all ones");
534 goto failed;
535 }
536 }
537
538 entry_header = ((void *)entry_header + entry_header->length);
539 }
540 return 1;
541
542failed:
543 return 0;
544}
545
546int __init detect_intel_iommu(void)
547{
548 int ret;
549
550 ret = dmar_table_detect();
551 if (ret)
552 ret = check_zero_address();
553 {
554 struct acpi_table_dmar *dmar;
555
556 dmar = (struct acpi_table_dmar *) dmar_tbl;
557
558 if (ret && intr_remapping_enabled && cpu_has_x2apic &&
559 dmar->flags & 0x1)
560 printk(KERN_INFO
561 "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
562
563 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
564 iommu_detected = 1;
565 /* Make sure ACS will be enabled */
566 pci_request_acs();
567 }
568
569#ifdef CONFIG_X86
570 if (ret)
571 x86_init.iommu.iommu_init = intel_iommu_init;
572#endif
573 }
574 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
575 dmar_tbl = NULL;
576
577 return ret ? 1 : -ENODEV;
578}
579
580
581int alloc_iommu(struct dmar_drhd_unit *drhd)
582{
583 struct intel_iommu *iommu;
584 int map_size;
585 u32 ver;
586 static int iommu_allocated = 0;
587 int agaw = 0;
588 int msagaw = 0;
589
590 if (!drhd->reg_base_addr) {
591 warn_invalid_dmar(0, "");
592 return -EINVAL;
593 }
594
595 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
596 if (!iommu)
597 return -ENOMEM;
598
599 iommu->seq_id = iommu_allocated++;
600 sprintf (iommu->name, "dmar%d", iommu->seq_id);
601
602 iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
603 if (!iommu->reg) {
604 printk(KERN_ERR "IOMMU: can't map the region\n");
605 goto error;
606 }
607 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
608 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
609
610 if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
611 warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
612 goto err_unmap;
613 }
614
615 agaw = iommu_calculate_agaw(iommu);
616 if (agaw < 0) {
617 printk(KERN_ERR
618 "Cannot get a valid agaw for iommu (seq_id = %d)\n",
619 iommu->seq_id);
620 goto err_unmap;
621 }
622 msagaw = iommu_calculate_max_sagaw(iommu);
623 if (msagaw < 0) {
624 printk(KERN_ERR
625 "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
626 iommu->seq_id);
627 goto err_unmap;
628 }
629 iommu->agaw = agaw;
630 iommu->msagaw = msagaw;
631
632 iommu->node = -1;
633
634 /* the registers might be more than one page */
635 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
636 cap_max_fault_reg_offset(iommu->cap));
637 map_size = VTD_PAGE_ALIGN(map_size);
638 if (map_size > VTD_PAGE_SIZE) {
639 iounmap(iommu->reg);
640 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
641 if (!iommu->reg) {
642 printk(KERN_ERR "IOMMU: can't map the region\n");
643 goto error;
644 }
645 }
646
647 ver = readl(iommu->reg + DMAR_VER_REG);
648 pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
649 iommu->seq_id,
650 (unsigned long long)drhd->reg_base_addr,
651 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
652 (unsigned long long)iommu->cap,
653 (unsigned long long)iommu->ecap);
654
655 raw_spin_lock_init(&iommu->register_lock);
656
657 drhd->iommu = iommu;
658 return 0;
659
660 err_unmap:
661 iounmap(iommu->reg);
662 error:
663 kfree(iommu);
664 return -1;
665}
666
667void free_iommu(struct intel_iommu *iommu)
668{
669 if (!iommu)
670 return;
671
672 free_dmar_iommu(iommu);
673
674 if (iommu->reg)
675 iounmap(iommu->reg);
676 kfree(iommu);
677}
678
679/*
680 * Reclaim all the submitted descriptors which have completed its work.
681 */
682static inline void reclaim_free_desc(struct q_inval *qi)
683{
684 while (qi->desc_status[qi->free_tail] == QI_DONE ||
685 qi->desc_status[qi->free_tail] == QI_ABORT) {
686 qi->desc_status[qi->free_tail] = QI_FREE;
687 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
688 qi->free_cnt++;
689 }
690}
691
692static int qi_check_fault(struct intel_iommu *iommu, int index)
693{
694 u32 fault;
695 int head, tail;
696 struct q_inval *qi = iommu->qi;
697 int wait_index = (index + 1) % QI_LENGTH;
698
699 if (qi->desc_status[wait_index] == QI_ABORT)
700 return -EAGAIN;
701
702 fault = readl(iommu->reg + DMAR_FSTS_REG);
703
704 /*
705 * If IQE happens, the head points to the descriptor associated
706 * with the error. No new descriptors are fetched until the IQE
707 * is cleared.
708 */
709 if (fault & DMA_FSTS_IQE) {
710 head = readl(iommu->reg + DMAR_IQH_REG);
711 if ((head >> DMAR_IQ_SHIFT) == index) {
712 printk(KERN_ERR "VT-d detected invalid descriptor: "
713 "low=%llx, high=%llx\n",
714 (unsigned long long)qi->desc[index].low,
715 (unsigned long long)qi->desc[index].high);
716 memcpy(&qi->desc[index], &qi->desc[wait_index],
717 sizeof(struct qi_desc));
718 __iommu_flush_cache(iommu, &qi->desc[index],
719 sizeof(struct qi_desc));
720 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
721 return -EINVAL;
722 }
723 }
724
725 /*
726 * If ITE happens, all pending wait_desc commands are aborted.
727 * No new descriptors are fetched until the ITE is cleared.
728 */
729 if (fault & DMA_FSTS_ITE) {
730 head = readl(iommu->reg + DMAR_IQH_REG);
731 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
732 head |= 1;
733 tail = readl(iommu->reg + DMAR_IQT_REG);
734 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
735
736 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
737
738 do {
739 if (qi->desc_status[head] == QI_IN_USE)
740 qi->desc_status[head] = QI_ABORT;
741 head = (head - 2 + QI_LENGTH) % QI_LENGTH;
742 } while (head != tail);
743
744 if (qi->desc_status[wait_index] == QI_ABORT)
745 return -EAGAIN;
746 }
747
748 if (fault & DMA_FSTS_ICE)
749 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
750
751 return 0;
752}
753
754/*
755 * Submit the queued invalidation descriptor to the remapping
756 * hardware unit and wait for its completion.
757 */
758int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
759{
760 int rc;
761 struct q_inval *qi = iommu->qi;
762 struct qi_desc *hw, wait_desc;
763 int wait_index, index;
764 unsigned long flags;
765
766 if (!qi)
767 return 0;
768
769 hw = qi->desc;
770
771restart:
772 rc = 0;
773
774 raw_spin_lock_irqsave(&qi->q_lock, flags);
775 while (qi->free_cnt < 3) {
776 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
777 cpu_relax();
778 raw_spin_lock_irqsave(&qi->q_lock, flags);
779 }
780
781 index = qi->free_head;
782 wait_index = (index + 1) % QI_LENGTH;
783
784 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
785
786 hw[index] = *desc;
787
788 wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
789 QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
790 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
791
792 hw[wait_index] = wait_desc;
793
794 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
795 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
796
797 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
798 qi->free_cnt -= 2;
799
800 /*
801 * update the HW tail register indicating the presence of
802 * new descriptors.
803 */
804 writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
805
806 while (qi->desc_status[wait_index] != QI_DONE) {
807 /*
808 * We will leave the interrupts disabled, to prevent interrupt
809 * context to queue another cmd while a cmd is already submitted
810 * and waiting for completion on this cpu. This is to avoid
811 * a deadlock where the interrupt context can wait indefinitely
812 * for free slots in the queue.
813 */
814 rc = qi_check_fault(iommu, index);
815 if (rc)
816 break;
817
818 raw_spin_unlock(&qi->q_lock);
819 cpu_relax();
820 raw_spin_lock(&qi->q_lock);
821 }
822
823 qi->desc_status[index] = QI_DONE;
824
825 reclaim_free_desc(qi);
826 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
827
828 if (rc == -EAGAIN)
829 goto restart;
830
831 return rc;
832}
833
834/*
835 * Flush the global interrupt entry cache.
836 */
837void qi_global_iec(struct intel_iommu *iommu)
838{
839 struct qi_desc desc;
840
841 desc.low = QI_IEC_TYPE;
842 desc.high = 0;
843
844 /* should never fail */
845 qi_submit_sync(&desc, iommu);
846}
847
848void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
849 u64 type)
850{
851 struct qi_desc desc;
852
853 desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
854 | QI_CC_GRAN(type) | QI_CC_TYPE;
855 desc.high = 0;
856
857 qi_submit_sync(&desc, iommu);
858}
859
860void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
861 unsigned int size_order, u64 type)
862{
863 u8 dw = 0, dr = 0;
864
865 struct qi_desc desc;
866 int ih = 0;
867
868 if (cap_write_drain(iommu->cap))
869 dw = 1;
870
871 if (cap_read_drain(iommu->cap))
872 dr = 1;
873
874 desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
875 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
876 desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
877 | QI_IOTLB_AM(size_order);
878
879 qi_submit_sync(&desc, iommu);
880}
881
882void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
883 u64 addr, unsigned mask)
884{
885 struct qi_desc desc;
886
887 if (mask) {
888 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
889 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
890 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
891 } else
892 desc.high = QI_DEV_IOTLB_ADDR(addr);
893
894 if (qdep >= QI_DEV_IOTLB_MAX_INVS)
895 qdep = 0;
896
897 desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
898 QI_DIOTLB_TYPE;
899
900 qi_submit_sync(&desc, iommu);
901}
902
903/*
904 * Disable Queued Invalidation interface.
905 */
906void dmar_disable_qi(struct intel_iommu *iommu)
907{
908 unsigned long flags;
909 u32 sts;
910 cycles_t start_time = get_cycles();
911
912 if (!ecap_qis(iommu->ecap))
913 return;
914
915 raw_spin_lock_irqsave(&iommu->register_lock, flags);
916
917 sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
918 if (!(sts & DMA_GSTS_QIES))
919 goto end;
920
921 /*
922 * Give a chance to HW to complete the pending invalidation requests.
923 */
924 while ((readl(iommu->reg + DMAR_IQT_REG) !=
925 readl(iommu->reg + DMAR_IQH_REG)) &&
926 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
927 cpu_relax();
928
929 iommu->gcmd &= ~DMA_GCMD_QIE;
930 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
931
932 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
933 !(sts & DMA_GSTS_QIES), sts);
934end:
935 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
936}
937
938/*
939 * Enable queued invalidation.
940 */
941static void __dmar_enable_qi(struct intel_iommu *iommu)
942{
943 u32 sts;
944 unsigned long flags;
945 struct q_inval *qi = iommu->qi;
946
947 qi->free_head = qi->free_tail = 0;
948 qi->free_cnt = QI_LENGTH;
949
950 raw_spin_lock_irqsave(&iommu->register_lock, flags);
951
952 /* write zero to the tail reg */
953 writel(0, iommu->reg + DMAR_IQT_REG);
954
955 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
956
957 iommu->gcmd |= DMA_GCMD_QIE;
958 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
959
960 /* Make sure hardware complete it */
961 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
962
963 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
964}
965
966/*
967 * Enable Queued Invalidation interface. This is a must to support
968 * interrupt-remapping. Also used by DMA-remapping, which replaces
969 * register based IOTLB invalidation.
970 */
971int dmar_enable_qi(struct intel_iommu *iommu)
972{
973 struct q_inval *qi;
974 struct page *desc_page;
975
976 if (!ecap_qis(iommu->ecap))
977 return -ENOENT;
978
979 /*
980 * queued invalidation is already setup and enabled.
981 */
982 if (iommu->qi)
983 return 0;
984
985 iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
986 if (!iommu->qi)
987 return -ENOMEM;
988
989 qi = iommu->qi;
990
991
992 desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
993 if (!desc_page) {
994 kfree(qi);
995 iommu->qi = 0;
996 return -ENOMEM;
997 }
998
999 qi->desc = page_address(desc_page);
1000
1001 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1002 if (!qi->desc_status) {
1003 free_page((unsigned long) qi->desc);
1004 kfree(qi);
1005 iommu->qi = 0;
1006 return -ENOMEM;
1007 }
1008
1009 qi->free_head = qi->free_tail = 0;
1010 qi->free_cnt = QI_LENGTH;
1011
1012 raw_spin_lock_init(&qi->q_lock);
1013
1014 __dmar_enable_qi(iommu);
1015
1016 return 0;
1017}
1018
1019/* iommu interrupt handling. Most stuff are MSI-like. */
1020
1021enum faulttype {
1022 DMA_REMAP,
1023 INTR_REMAP,
1024 UNKNOWN,
1025};
1026
1027static const char *dma_remap_fault_reasons[] =
1028{
1029 "Software",
1030 "Present bit in root entry is clear",
1031 "Present bit in context entry is clear",
1032 "Invalid context entry",
1033 "Access beyond MGAW",
1034 "PTE Write access is not set",
1035 "PTE Read access is not set",
1036 "Next page table ptr is invalid",
1037 "Root table address invalid",
1038 "Context table ptr is invalid",
1039 "non-zero reserved fields in RTP",
1040 "non-zero reserved fields in CTP",
1041 "non-zero reserved fields in PTE",
1042};
1043
1044static const char *intr_remap_fault_reasons[] =
1045{
1046 "Detected reserved fields in the decoded interrupt-remapped request",
1047 "Interrupt index exceeded the interrupt-remapping table size",
1048 "Present field in the IRTE entry is clear",
1049 "Error accessing interrupt-remapping table pointed by IRTA_REG",
1050 "Detected reserved fields in the IRTE entry",
1051 "Blocked a compatibility format interrupt request",
1052 "Blocked an interrupt request due to source-id verification failure",
1053};
1054
1055#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1056
1057const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1058{
1059 if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1060 ARRAY_SIZE(intr_remap_fault_reasons))) {
1061 *fault_type = INTR_REMAP;
1062 return intr_remap_fault_reasons[fault_reason - 0x20];
1063 } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1064 *fault_type = DMA_REMAP;
1065 return dma_remap_fault_reasons[fault_reason];
1066 } else {
1067 *fault_type = UNKNOWN;
1068 return "Unknown";
1069 }
1070}
1071
1072void dmar_msi_unmask(struct irq_data *data)
1073{
1074 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1075 unsigned long flag;
1076
1077 /* unmask it */
1078 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1079 writel(0, iommu->reg + DMAR_FECTL_REG);
1080 /* Read a reg to force flush the post write */
1081 readl(iommu->reg + DMAR_FECTL_REG);
1082 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1083}
1084
1085void dmar_msi_mask(struct irq_data *data)
1086{
1087 unsigned long flag;
1088 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1089
1090 /* mask it */
1091 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1092 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1093 /* Read a reg to force flush the post write */
1094 readl(iommu->reg + DMAR_FECTL_REG);
1095 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1096}
1097
1098void dmar_msi_write(int irq, struct msi_msg *msg)
1099{
1100 struct intel_iommu *iommu = irq_get_handler_data(irq);
1101 unsigned long flag;
1102
1103 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1104 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1105 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1106 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1107 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1108}
1109
1110void dmar_msi_read(int irq, struct msi_msg *msg)
1111{
1112 struct intel_iommu *iommu = irq_get_handler_data(irq);
1113 unsigned long flag;
1114
1115 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1116 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1117 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1118 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1119 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1120}
1121
1122static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1123 u8 fault_reason, u16 source_id, unsigned long long addr)
1124{
1125 const char *reason;
1126 int fault_type;
1127
1128 reason = dmar_get_fault_reason(fault_reason, &fault_type);
1129
1130 if (fault_type == INTR_REMAP)
1131 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1132 "fault index %llx\n"
1133 "INTR-REMAP:[fault reason %02d] %s\n",
1134 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1135 PCI_FUNC(source_id & 0xFF), addr >> 48,
1136 fault_reason, reason);
1137 else
1138 printk(KERN_ERR
1139 "DMAR:[%s] Request device [%02x:%02x.%d] "
1140 "fault addr %llx \n"
1141 "DMAR:[fault reason %02d] %s\n",
1142 (type ? "DMA Read" : "DMA Write"),
1143 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1144 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1145 return 0;
1146}
1147
1148#define PRIMARY_FAULT_REG_LEN (16)
1149irqreturn_t dmar_fault(int irq, void *dev_id)
1150{
1151 struct intel_iommu *iommu = dev_id;
1152 int reg, fault_index;
1153 u32 fault_status;
1154 unsigned long flag;
1155
1156 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1157 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1158 if (fault_status)
1159 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1160 fault_status);
1161
1162 /* TBD: ignore advanced fault log currently */
1163 if (!(fault_status & DMA_FSTS_PPF))
1164 goto clear_rest;
1165
1166 fault_index = dma_fsts_fault_record_index(fault_status);
1167 reg = cap_fault_reg_offset(iommu->cap);
1168 while (1) {
1169 u8 fault_reason;
1170 u16 source_id;
1171 u64 guest_addr;
1172 int type;
1173 u32 data;
1174
1175 /* highest 32 bits */
1176 data = readl(iommu->reg + reg +
1177 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1178 if (!(data & DMA_FRCD_F))
1179 break;
1180
1181 fault_reason = dma_frcd_fault_reason(data);
1182 type = dma_frcd_type(data);
1183
1184 data = readl(iommu->reg + reg +
1185 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1186 source_id = dma_frcd_source_id(data);
1187
1188 guest_addr = dmar_readq(iommu->reg + reg +
1189 fault_index * PRIMARY_FAULT_REG_LEN);
1190 guest_addr = dma_frcd_page_addr(guest_addr);
1191 /* clear the fault */
1192 writel(DMA_FRCD_F, iommu->reg + reg +
1193 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1194
1195 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1196
1197 dmar_fault_do_one(iommu, type, fault_reason,
1198 source_id, guest_addr);
1199
1200 fault_index++;
1201 if (fault_index >= cap_num_fault_regs(iommu->cap))
1202 fault_index = 0;
1203 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1204 }
1205clear_rest:
1206 /* clear all the other faults */
1207 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1208 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1209
1210 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1211 return IRQ_HANDLED;
1212}
1213
1214int dmar_set_interrupt(struct intel_iommu *iommu)
1215{
1216 int irq, ret;
1217
1218 /*
1219 * Check if the fault interrupt is already initialized.
1220 */
1221 if (iommu->irq)
1222 return 0;
1223
1224 irq = create_irq();
1225 if (!irq) {
1226 printk(KERN_ERR "IOMMU: no free vectors\n");
1227 return -EINVAL;
1228 }
1229
1230 irq_set_handler_data(irq, iommu);
1231 iommu->irq = irq;
1232
1233 ret = arch_setup_dmar_msi(irq);
1234 if (ret) {
1235 irq_set_handler_data(irq, NULL);
1236 iommu->irq = 0;
1237 destroy_irq(irq);
1238 return ret;
1239 }
1240
1241 ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1242 if (ret)
1243 printk(KERN_ERR "IOMMU: can't request irq\n");
1244 return ret;
1245}
1246
1247int __init enable_drhd_fault_handling(void)
1248{
1249 struct dmar_drhd_unit *drhd;
1250
1251 /*
1252 * Enable fault control interrupt.
1253 */
1254 for_each_drhd_unit(drhd) {
1255 int ret;
1256 struct intel_iommu *iommu = drhd->iommu;
1257 ret = dmar_set_interrupt(iommu);
1258
1259 if (ret) {
1260 printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1261 " interrupt, ret %d\n",
1262 (unsigned long long)drhd->reg_base_addr, ret);
1263 return -1;
1264 }
1265
1266 /*
1267 * Clear any previous faults.
1268 */
1269 dmar_fault(iommu->irq, iommu);
1270 }
1271
1272 return 0;
1273}
1274
1275/*
1276 * Re-enable Queued Invalidation interface.
1277 */
1278int dmar_reenable_qi(struct intel_iommu *iommu)
1279{
1280 if (!ecap_qis(iommu->ecap))
1281 return -ENOENT;
1282
1283 if (!iommu->qi)
1284 return -ENOENT;
1285
1286 /*
1287 * First disable queued invalidation.
1288 */
1289 dmar_disable_qi(iommu);
1290 /*
1291 * Then enable queued invalidation again. Since there is no pending
1292 * invalidation requests now, it's safe to re-enable queued
1293 * invalidation.
1294 */
1295 __dmar_enable_qi(iommu);
1296
1297 return 0;
1298}
1299
1300/*
1301 * Check interrupt remapping support in DMAR table description.
1302 */
1303int __init dmar_ir_support(void)
1304{
1305 struct acpi_table_dmar *dmar;
1306 dmar = (struct acpi_table_dmar *)dmar_tbl;
1307 if (!dmar)
1308 return 0;
1309 return dmar->flags & 0x1;
1310}
1311IOMMU_INIT_POST(detect_intel_iommu);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
new file mode 100644
index 000000000000..bcbd693b351a
--- /dev/null
+++ b/drivers/iommu/intel-iommu.c
@@ -0,0 +1,4173 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
26#include <linux/debugfs.h>
27#include <linux/export.h>
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
36#include <linux/timer.h>
37#include <linux/iova.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/syscore_ops.h>
41#include <linux/tboot.h>
42#include <linux/dmi.h>
43#include <linux/pci-ats.h>
44#include <asm/cacheflush.h>
45#include <asm/iommu.h>
46
47#define ROOT_SIZE VTD_PAGE_SIZE
48#define CONTEXT_SIZE VTD_PAGE_SIZE
49
50#define IS_BRIDGE_HOST_DEVICE(pdev) \
51 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62#define MAX_AGAW_WIDTH 64
63
64#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
65#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
66
67/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
68 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
69#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
70 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
71#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
72
73#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
74#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
75#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
76
77/* page table handling */
78#define LEVEL_STRIDE (9)
79#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
80
81static inline int agaw_to_level(int agaw)
82{
83 return agaw + 2;
84}
85
86static inline int agaw_to_width(int agaw)
87{
88 return 30 + agaw * LEVEL_STRIDE;
89}
90
91static inline int width_to_agaw(int width)
92{
93 return (width - 30) / LEVEL_STRIDE;
94}
95
96static inline unsigned int level_to_offset_bits(int level)
97{
98 return (level - 1) * LEVEL_STRIDE;
99}
100
101static inline int pfn_level_offset(unsigned long pfn, int level)
102{
103 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
104}
105
106static inline unsigned long level_mask(int level)
107{
108 return -1UL << level_to_offset_bits(level);
109}
110
111static inline unsigned long level_size(int level)
112{
113 return 1UL << level_to_offset_bits(level);
114}
115
116static inline unsigned long align_to_level(unsigned long pfn, int level)
117{
118 return (pfn + level_size(level) - 1) & level_mask(level);
119}
120
121static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
122{
123 return 1 << ((lvl - 1) * LEVEL_STRIDE);
124}
125
126/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
127 are never going to work. */
128static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
129{
130 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
131}
132
133static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
134{
135 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
136}
137static inline unsigned long page_to_dma_pfn(struct page *pg)
138{
139 return mm_to_dma_pfn(page_to_pfn(pg));
140}
141static inline unsigned long virt_to_dma_pfn(void *p)
142{
143 return page_to_dma_pfn(virt_to_page(p));
144}
145
146/* global iommu list, set NULL for ignored DMAR units */
147static struct intel_iommu **g_iommus;
148
149static void __init check_tylersburg_isoch(void);
150static int rwbf_quirk;
151
152/*
153 * set to 1 to panic kernel if can't successfully enable VT-d
154 * (used when kernel is launched w/ TXT)
155 */
156static int force_on = 0;
157
158/*
159 * 0: Present
160 * 1-11: Reserved
161 * 12-63: Context Ptr (12 - (haw-1))
162 * 64-127: Reserved
163 */
164struct root_entry {
165 u64 val;
166 u64 rsvd1;
167};
168#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
169static inline bool root_present(struct root_entry *root)
170{
171 return (root->val & 1);
172}
173static inline void set_root_present(struct root_entry *root)
174{
175 root->val |= 1;
176}
177static inline void set_root_value(struct root_entry *root, unsigned long value)
178{
179 root->val |= value & VTD_PAGE_MASK;
180}
181
182static inline struct context_entry *
183get_context_addr_from_root(struct root_entry *root)
184{
185 return (struct context_entry *)
186 (root_present(root)?phys_to_virt(
187 root->val & VTD_PAGE_MASK) :
188 NULL);
189}
190
191/*
192 * low 64 bits:
193 * 0: present
194 * 1: fault processing disable
195 * 2-3: translation type
196 * 12-63: address space root
197 * high 64 bits:
198 * 0-2: address width
199 * 3-6: aval
200 * 8-23: domain id
201 */
202struct context_entry {
203 u64 lo;
204 u64 hi;
205};
206
207static inline bool context_present(struct context_entry *context)
208{
209 return (context->lo & 1);
210}
211static inline void context_set_present(struct context_entry *context)
212{
213 context->lo |= 1;
214}
215
216static inline void context_set_fault_enable(struct context_entry *context)
217{
218 context->lo &= (((u64)-1) << 2) | 1;
219}
220
221static inline void context_set_translation_type(struct context_entry *context,
222 unsigned long value)
223{
224 context->lo &= (((u64)-1) << 4) | 3;
225 context->lo |= (value & 3) << 2;
226}
227
228static inline void context_set_address_root(struct context_entry *context,
229 unsigned long value)
230{
231 context->lo |= value & VTD_PAGE_MASK;
232}
233
234static inline void context_set_address_width(struct context_entry *context,
235 unsigned long value)
236{
237 context->hi |= value & 7;
238}
239
240static inline void context_set_domain_id(struct context_entry *context,
241 unsigned long value)
242{
243 context->hi |= (value & ((1 << 16) - 1)) << 8;
244}
245
246static inline void context_clear_entry(struct context_entry *context)
247{
248 context->lo = 0;
249 context->hi = 0;
250}
251
252/*
253 * 0: readable
254 * 1: writable
255 * 2-6: reserved
256 * 7: super page
257 * 8-10: available
258 * 11: snoop behavior
259 * 12-63: Host physcial address
260 */
261struct dma_pte {
262 u64 val;
263};
264
265static inline void dma_clear_pte(struct dma_pte *pte)
266{
267 pte->val = 0;
268}
269
270static inline void dma_set_pte_readable(struct dma_pte *pte)
271{
272 pte->val |= DMA_PTE_READ;
273}
274
275static inline void dma_set_pte_writable(struct dma_pte *pte)
276{
277 pte->val |= DMA_PTE_WRITE;
278}
279
280static inline void dma_set_pte_snp(struct dma_pte *pte)
281{
282 pte->val |= DMA_PTE_SNP;
283}
284
285static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
286{
287 pte->val = (pte->val & ~3) | (prot & 3);
288}
289
290static inline u64 dma_pte_addr(struct dma_pte *pte)
291{
292#ifdef CONFIG_64BIT
293 return pte->val & VTD_PAGE_MASK;
294#else
295 /* Must have a full atomic 64-bit read */
296 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
297#endif
298}
299
300static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
301{
302 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
303}
304
305static inline bool dma_pte_present(struct dma_pte *pte)
306{
307 return (pte->val & 3) != 0;
308}
309
310static inline bool dma_pte_superpage(struct dma_pte *pte)
311{
312 return (pte->val & (1 << 7));
313}
314
315static inline int first_pte_in_page(struct dma_pte *pte)
316{
317 return !((unsigned long)pte & ~VTD_PAGE_MASK);
318}
319
320/*
321 * This domain is a statically identity mapping domain.
322 * 1. This domain creats a static 1:1 mapping to all usable memory.
323 * 2. It maps to each iommu if successful.
324 * 3. Each iommu mapps to this domain if successful.
325 */
326static struct dmar_domain *si_domain;
327static int hw_pass_through = 1;
328
329/* devices under the same p2p bridge are owned in one domain */
330#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
331
332/* domain represents a virtual machine, more than one devices
333 * across iommus may be owned in one domain, e.g. kvm guest.
334 */
335#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
336
337/* si_domain contains mulitple devices */
338#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
339
340struct dmar_domain {
341 int id; /* domain id */
342 int nid; /* node id */
343 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
344
345 struct list_head devices; /* all devices' list */
346 struct iova_domain iovad; /* iova's that belong to this domain */
347
348 struct dma_pte *pgd; /* virtual address */
349 int gaw; /* max guest address width */
350
351 /* adjusted guest address width, 0 is level 2 30-bit */
352 int agaw;
353
354 int flags; /* flags to find out type of domain */
355
356 int iommu_coherency;/* indicate coherency of iommu access */
357 int iommu_snooping; /* indicate snooping control feature*/
358 int iommu_count; /* reference count of iommu */
359 int iommu_superpage;/* Level of superpages supported:
360 0 == 4KiB (no superpages), 1 == 2MiB,
361 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
362 spinlock_t iommu_lock; /* protect iommu set in domain */
363 u64 max_addr; /* maximum mapped address */
364};
365
366/* PCI domain-device relationship */
367struct device_domain_info {
368 struct list_head link; /* link to domain siblings */
369 struct list_head global; /* link to global list */
370 int segment; /* PCI domain */
371 u8 bus; /* PCI bus number */
372 u8 devfn; /* PCI devfn number */
373 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
374 struct intel_iommu *iommu; /* IOMMU used by this device */
375 struct dmar_domain *domain; /* pointer to domain */
376};
377
378static void flush_unmaps_timeout(unsigned long data);
379
380DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
381
382#define HIGH_WATER_MARK 250
383struct deferred_flush_tables {
384 int next;
385 struct iova *iova[HIGH_WATER_MARK];
386 struct dmar_domain *domain[HIGH_WATER_MARK];
387};
388
389static struct deferred_flush_tables *deferred_flush;
390
391/* bitmap for indexing intel_iommus */
392static int g_num_of_iommus;
393
394static DEFINE_SPINLOCK(async_umap_flush_lock);
395static LIST_HEAD(unmaps_to_do);
396
397static int timer_on;
398static long list_size;
399
400static void domain_remove_dev_info(struct dmar_domain *domain);
401
402#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
403int dmar_disabled = 0;
404#else
405int dmar_disabled = 1;
406#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
407
408static int dmar_map_gfx = 1;
409static int dmar_forcedac;
410static int intel_iommu_strict;
411static int intel_iommu_superpage = 1;
412
413int intel_iommu_gfx_mapped;
414EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
415
416#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
417static DEFINE_SPINLOCK(device_domain_lock);
418static LIST_HEAD(device_domain_list);
419
420static struct iommu_ops intel_iommu_ops;
421
422static int __init intel_iommu_setup(char *str)
423{
424 if (!str)
425 return -EINVAL;
426 while (*str) {
427 if (!strncmp(str, "on", 2)) {
428 dmar_disabled = 0;
429 printk(KERN_INFO "Intel-IOMMU: enabled\n");
430 } else if (!strncmp(str, "off", 3)) {
431 dmar_disabled = 1;
432 printk(KERN_INFO "Intel-IOMMU: disabled\n");
433 } else if (!strncmp(str, "igfx_off", 8)) {
434 dmar_map_gfx = 0;
435 printk(KERN_INFO
436 "Intel-IOMMU: disable GFX device mapping\n");
437 } else if (!strncmp(str, "forcedac", 8)) {
438 printk(KERN_INFO
439 "Intel-IOMMU: Forcing DAC for PCI devices\n");
440 dmar_forcedac = 1;
441 } else if (!strncmp(str, "strict", 6)) {
442 printk(KERN_INFO
443 "Intel-IOMMU: disable batched IOTLB flush\n");
444 intel_iommu_strict = 1;
445 } else if (!strncmp(str, "sp_off", 6)) {
446 printk(KERN_INFO
447 "Intel-IOMMU: disable supported super page\n");
448 intel_iommu_superpage = 0;
449 }
450
451 str += strcspn(str, ",");
452 while (*str == ',')
453 str++;
454 }
455 return 0;
456}
457__setup("intel_iommu=", intel_iommu_setup);
458
459static struct kmem_cache *iommu_domain_cache;
460static struct kmem_cache *iommu_devinfo_cache;
461static struct kmem_cache *iommu_iova_cache;
462
463static inline void *alloc_pgtable_page(int node)
464{
465 struct page *page;
466 void *vaddr = NULL;
467
468 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
469 if (page)
470 vaddr = page_address(page);
471 return vaddr;
472}
473
474static inline void free_pgtable_page(void *vaddr)
475{
476 free_page((unsigned long)vaddr);
477}
478
479static inline void *alloc_domain_mem(void)
480{
481 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
482}
483
484static void free_domain_mem(void *vaddr)
485{
486 kmem_cache_free(iommu_domain_cache, vaddr);
487}
488
489static inline void * alloc_devinfo_mem(void)
490{
491 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
492}
493
494static inline void free_devinfo_mem(void *vaddr)
495{
496 kmem_cache_free(iommu_devinfo_cache, vaddr);
497}
498
499struct iova *alloc_iova_mem(void)
500{
501 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
502}
503
504void free_iova_mem(struct iova *iova)
505{
506 kmem_cache_free(iommu_iova_cache, iova);
507}
508
509
510static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
511{
512 unsigned long sagaw;
513 int agaw = -1;
514
515 sagaw = cap_sagaw(iommu->cap);
516 for (agaw = width_to_agaw(max_gaw);
517 agaw >= 0; agaw--) {
518 if (test_bit(agaw, &sagaw))
519 break;
520 }
521
522 return agaw;
523}
524
525/*
526 * Calculate max SAGAW for each iommu.
527 */
528int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
529{
530 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
531}
532
533/*
534 * calculate agaw for each iommu.
535 * "SAGAW" may be different across iommus, use a default agaw, and
536 * get a supported less agaw for iommus that don't support the default agaw.
537 */
538int iommu_calculate_agaw(struct intel_iommu *iommu)
539{
540 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
541}
542
543/* This functionin only returns single iommu in a domain */
544static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
545{
546 int iommu_id;
547
548 /* si_domain and vm domain should not get here. */
549 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
550 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
551
552 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
553 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
554 return NULL;
555
556 return g_iommus[iommu_id];
557}
558
559static void domain_update_iommu_coherency(struct dmar_domain *domain)
560{
561 int i;
562
563 domain->iommu_coherency = 1;
564
565 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
566 if (!ecap_coherent(g_iommus[i]->ecap)) {
567 domain->iommu_coherency = 0;
568 break;
569 }
570 }
571}
572
573static void domain_update_iommu_snooping(struct dmar_domain *domain)
574{
575 int i;
576
577 domain->iommu_snooping = 1;
578
579 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
580 if (!ecap_sc_support(g_iommus[i]->ecap)) {
581 domain->iommu_snooping = 0;
582 break;
583 }
584 }
585}
586
587static void domain_update_iommu_superpage(struct dmar_domain *domain)
588{
589 struct dmar_drhd_unit *drhd;
590 struct intel_iommu *iommu = NULL;
591 int mask = 0xf;
592
593 if (!intel_iommu_superpage) {
594 domain->iommu_superpage = 0;
595 return;
596 }
597
598 /* set iommu_superpage to the smallest common denominator */
599 for_each_active_iommu(iommu, drhd) {
600 mask &= cap_super_page_val(iommu->cap);
601 if (!mask) {
602 break;
603 }
604 }
605 domain->iommu_superpage = fls(mask);
606}
607
608/* Some capabilities may be different across iommus */
609static void domain_update_iommu_cap(struct dmar_domain *domain)
610{
611 domain_update_iommu_coherency(domain);
612 domain_update_iommu_snooping(domain);
613 domain_update_iommu_superpage(domain);
614}
615
616static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
617{
618 struct dmar_drhd_unit *drhd = NULL;
619 int i;
620
621 for_each_drhd_unit(drhd) {
622 if (drhd->ignored)
623 continue;
624 if (segment != drhd->segment)
625 continue;
626
627 for (i = 0; i < drhd->devices_cnt; i++) {
628 if (drhd->devices[i] &&
629 drhd->devices[i]->bus->number == bus &&
630 drhd->devices[i]->devfn == devfn)
631 return drhd->iommu;
632 if (drhd->devices[i] &&
633 drhd->devices[i]->subordinate &&
634 drhd->devices[i]->subordinate->number <= bus &&
635 drhd->devices[i]->subordinate->subordinate >= bus)
636 return drhd->iommu;
637 }
638
639 if (drhd->include_all)
640 return drhd->iommu;
641 }
642
643 return NULL;
644}
645
646static void domain_flush_cache(struct dmar_domain *domain,
647 void *addr, int size)
648{
649 if (!domain->iommu_coherency)
650 clflush_cache_range(addr, size);
651}
652
653/* Gets context entry for a given bus and devfn */
654static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
655 u8 bus, u8 devfn)
656{
657 struct root_entry *root;
658 struct context_entry *context;
659 unsigned long phy_addr;
660 unsigned long flags;
661
662 spin_lock_irqsave(&iommu->lock, flags);
663 root = &iommu->root_entry[bus];
664 context = get_context_addr_from_root(root);
665 if (!context) {
666 context = (struct context_entry *)
667 alloc_pgtable_page(iommu->node);
668 if (!context) {
669 spin_unlock_irqrestore(&iommu->lock, flags);
670 return NULL;
671 }
672 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
673 phy_addr = virt_to_phys((void *)context);
674 set_root_value(root, phy_addr);
675 set_root_present(root);
676 __iommu_flush_cache(iommu, root, sizeof(*root));
677 }
678 spin_unlock_irqrestore(&iommu->lock, flags);
679 return &context[devfn];
680}
681
682static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
683{
684 struct root_entry *root;
685 struct context_entry *context;
686 int ret;
687 unsigned long flags;
688
689 spin_lock_irqsave(&iommu->lock, flags);
690 root = &iommu->root_entry[bus];
691 context = get_context_addr_from_root(root);
692 if (!context) {
693 ret = 0;
694 goto out;
695 }
696 ret = context_present(&context[devfn]);
697out:
698 spin_unlock_irqrestore(&iommu->lock, flags);
699 return ret;
700}
701
702static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
703{
704 struct root_entry *root;
705 struct context_entry *context;
706 unsigned long flags;
707
708 spin_lock_irqsave(&iommu->lock, flags);
709 root = &iommu->root_entry[bus];
710 context = get_context_addr_from_root(root);
711 if (context) {
712 context_clear_entry(&context[devfn]);
713 __iommu_flush_cache(iommu, &context[devfn], \
714 sizeof(*context));
715 }
716 spin_unlock_irqrestore(&iommu->lock, flags);
717}
718
719static void free_context_table(struct intel_iommu *iommu)
720{
721 struct root_entry *root;
722 int i;
723 unsigned long flags;
724 struct context_entry *context;
725
726 spin_lock_irqsave(&iommu->lock, flags);
727 if (!iommu->root_entry) {
728 goto out;
729 }
730 for (i = 0; i < ROOT_ENTRY_NR; i++) {
731 root = &iommu->root_entry[i];
732 context = get_context_addr_from_root(root);
733 if (context)
734 free_pgtable_page(context);
735 }
736 free_pgtable_page(iommu->root_entry);
737 iommu->root_entry = NULL;
738out:
739 spin_unlock_irqrestore(&iommu->lock, flags);
740}
741
742static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
743 unsigned long pfn, int target_level)
744{
745 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
746 struct dma_pte *parent, *pte = NULL;
747 int level = agaw_to_level(domain->agaw);
748 int offset;
749
750 BUG_ON(!domain->pgd);
751 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
752 parent = domain->pgd;
753
754 while (level > 0) {
755 void *tmp_page;
756
757 offset = pfn_level_offset(pfn, level);
758 pte = &parent[offset];
759 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
760 break;
761 if (level == target_level)
762 break;
763
764 if (!dma_pte_present(pte)) {
765 uint64_t pteval;
766
767 tmp_page = alloc_pgtable_page(domain->nid);
768
769 if (!tmp_page)
770 return NULL;
771
772 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
773 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
774 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
775 /* Someone else set it while we were thinking; use theirs. */
776 free_pgtable_page(tmp_page);
777 } else {
778 dma_pte_addr(pte);
779 domain_flush_cache(domain, pte, sizeof(*pte));
780 }
781 }
782 parent = phys_to_virt(dma_pte_addr(pte));
783 level--;
784 }
785
786 return pte;
787}
788
789
790/* return address's pte at specific level */
791static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
792 unsigned long pfn,
793 int level, int *large_page)
794{
795 struct dma_pte *parent, *pte = NULL;
796 int total = agaw_to_level(domain->agaw);
797 int offset;
798
799 parent = domain->pgd;
800 while (level <= total) {
801 offset = pfn_level_offset(pfn, total);
802 pte = &parent[offset];
803 if (level == total)
804 return pte;
805
806 if (!dma_pte_present(pte)) {
807 *large_page = total;
808 break;
809 }
810
811 if (pte->val & DMA_PTE_LARGE_PAGE) {
812 *large_page = total;
813 return pte;
814 }
815
816 parent = phys_to_virt(dma_pte_addr(pte));
817 total--;
818 }
819 return NULL;
820}
821
822/* clear last level pte, a tlb flush should be followed */
823static int dma_pte_clear_range(struct dmar_domain *domain,
824 unsigned long start_pfn,
825 unsigned long last_pfn)
826{
827 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
828 unsigned int large_page = 1;
829 struct dma_pte *first_pte, *pte;
830 int order;
831
832 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
833 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
834 BUG_ON(start_pfn > last_pfn);
835
836 /* we don't need lock here; nobody else touches the iova range */
837 do {
838 large_page = 1;
839 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
840 if (!pte) {
841 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
842 continue;
843 }
844 do {
845 dma_clear_pte(pte);
846 start_pfn += lvl_to_nr_pages(large_page);
847 pte++;
848 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
849
850 domain_flush_cache(domain, first_pte,
851 (void *)pte - (void *)first_pte);
852
853 } while (start_pfn && start_pfn <= last_pfn);
854
855 order = (large_page - 1) * 9;
856 return order;
857}
858
859/* free page table pages. last level pte should already be cleared */
860static void dma_pte_free_pagetable(struct dmar_domain *domain,
861 unsigned long start_pfn,
862 unsigned long last_pfn)
863{
864 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
865 struct dma_pte *first_pte, *pte;
866 int total = agaw_to_level(domain->agaw);
867 int level;
868 unsigned long tmp;
869 int large_page = 2;
870
871 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
872 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
873 BUG_ON(start_pfn > last_pfn);
874
875 /* We don't need lock here; nobody else touches the iova range */
876 level = 2;
877 while (level <= total) {
878 tmp = align_to_level(start_pfn, level);
879
880 /* If we can't even clear one PTE at this level, we're done */
881 if (tmp + level_size(level) - 1 > last_pfn)
882 return;
883
884 do {
885 large_page = level;
886 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
887 if (large_page > level)
888 level = large_page + 1;
889 if (!pte) {
890 tmp = align_to_level(tmp + 1, level + 1);
891 continue;
892 }
893 do {
894 if (dma_pte_present(pte)) {
895 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
896 dma_clear_pte(pte);
897 }
898 pte++;
899 tmp += level_size(level);
900 } while (!first_pte_in_page(pte) &&
901 tmp + level_size(level) - 1 <= last_pfn);
902
903 domain_flush_cache(domain, first_pte,
904 (void *)pte - (void *)first_pte);
905
906 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
907 level++;
908 }
909 /* free pgd */
910 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
911 free_pgtable_page(domain->pgd);
912 domain->pgd = NULL;
913 }
914}
915
916/* iommu handling */
917static int iommu_alloc_root_entry(struct intel_iommu *iommu)
918{
919 struct root_entry *root;
920 unsigned long flags;
921
922 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
923 if (!root)
924 return -ENOMEM;
925
926 __iommu_flush_cache(iommu, root, ROOT_SIZE);
927
928 spin_lock_irqsave(&iommu->lock, flags);
929 iommu->root_entry = root;
930 spin_unlock_irqrestore(&iommu->lock, flags);
931
932 return 0;
933}
934
935static void iommu_set_root_entry(struct intel_iommu *iommu)
936{
937 void *addr;
938 u32 sts;
939 unsigned long flag;
940
941 addr = iommu->root_entry;
942
943 raw_spin_lock_irqsave(&iommu->register_lock, flag);
944 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
945
946 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
947
948 /* Make sure hardware complete it */
949 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
950 readl, (sts & DMA_GSTS_RTPS), sts);
951
952 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
953}
954
955static void iommu_flush_write_buffer(struct intel_iommu *iommu)
956{
957 u32 val;
958 unsigned long flag;
959
960 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
961 return;
962
963 raw_spin_lock_irqsave(&iommu->register_lock, flag);
964 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
965
966 /* Make sure hardware complete it */
967 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
968 readl, (!(val & DMA_GSTS_WBFS)), val);
969
970 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
971}
972
973/* return value determine if we need a write buffer flush */
974static void __iommu_flush_context(struct intel_iommu *iommu,
975 u16 did, u16 source_id, u8 function_mask,
976 u64 type)
977{
978 u64 val = 0;
979 unsigned long flag;
980
981 switch (type) {
982 case DMA_CCMD_GLOBAL_INVL:
983 val = DMA_CCMD_GLOBAL_INVL;
984 break;
985 case DMA_CCMD_DOMAIN_INVL:
986 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
987 break;
988 case DMA_CCMD_DEVICE_INVL:
989 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
990 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
991 break;
992 default:
993 BUG();
994 }
995 val |= DMA_CCMD_ICC;
996
997 raw_spin_lock_irqsave(&iommu->register_lock, flag);
998 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
999
1000 /* Make sure hardware complete it */
1001 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1002 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1003
1004 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1005}
1006
1007/* return value determine if we need a write buffer flush */
1008static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1009 u64 addr, unsigned int size_order, u64 type)
1010{
1011 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1012 u64 val = 0, val_iva = 0;
1013 unsigned long flag;
1014
1015 switch (type) {
1016 case DMA_TLB_GLOBAL_FLUSH:
1017 /* global flush doesn't need set IVA_REG */
1018 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1019 break;
1020 case DMA_TLB_DSI_FLUSH:
1021 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1022 break;
1023 case DMA_TLB_PSI_FLUSH:
1024 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1025 /* Note: always flush non-leaf currently */
1026 val_iva = size_order | addr;
1027 break;
1028 default:
1029 BUG();
1030 }
1031 /* Note: set drain read/write */
1032#if 0
1033 /*
1034 * This is probably to be super secure.. Looks like we can
1035 * ignore it without any impact.
1036 */
1037 if (cap_read_drain(iommu->cap))
1038 val |= DMA_TLB_READ_DRAIN;
1039#endif
1040 if (cap_write_drain(iommu->cap))
1041 val |= DMA_TLB_WRITE_DRAIN;
1042
1043 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1044 /* Note: Only uses first TLB reg currently */
1045 if (val_iva)
1046 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1047 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1048
1049 /* Make sure hardware complete it */
1050 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1051 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1052
1053 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1054
1055 /* check IOTLB invalidation granularity */
1056 if (DMA_TLB_IAIG(val) == 0)
1057 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1058 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1059 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1060 (unsigned long long)DMA_TLB_IIRG(type),
1061 (unsigned long long)DMA_TLB_IAIG(val));
1062}
1063
1064static struct device_domain_info *iommu_support_dev_iotlb(
1065 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1066{
1067 int found = 0;
1068 unsigned long flags;
1069 struct device_domain_info *info;
1070 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1071
1072 if (!ecap_dev_iotlb_support(iommu->ecap))
1073 return NULL;
1074
1075 if (!iommu->qi)
1076 return NULL;
1077
1078 spin_lock_irqsave(&device_domain_lock, flags);
1079 list_for_each_entry(info, &domain->devices, link)
1080 if (info->bus == bus && info->devfn == devfn) {
1081 found = 1;
1082 break;
1083 }
1084 spin_unlock_irqrestore(&device_domain_lock, flags);
1085
1086 if (!found || !info->dev)
1087 return NULL;
1088
1089 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1090 return NULL;
1091
1092 if (!dmar_find_matched_atsr_unit(info->dev))
1093 return NULL;
1094
1095 info->iommu = iommu;
1096
1097 return info;
1098}
1099
1100static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1101{
1102 if (!info)
1103 return;
1104
1105 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1106}
1107
1108static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1109{
1110 if (!info->dev || !pci_ats_enabled(info->dev))
1111 return;
1112
1113 pci_disable_ats(info->dev);
1114}
1115
1116static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1117 u64 addr, unsigned mask)
1118{
1119 u16 sid, qdep;
1120 unsigned long flags;
1121 struct device_domain_info *info;
1122
1123 spin_lock_irqsave(&device_domain_lock, flags);
1124 list_for_each_entry(info, &domain->devices, link) {
1125 if (!info->dev || !pci_ats_enabled(info->dev))
1126 continue;
1127
1128 sid = info->bus << 8 | info->devfn;
1129 qdep = pci_ats_queue_depth(info->dev);
1130 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1131 }
1132 spin_unlock_irqrestore(&device_domain_lock, flags);
1133}
1134
1135static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1136 unsigned long pfn, unsigned int pages, int map)
1137{
1138 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1139 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1140
1141 BUG_ON(pages == 0);
1142
1143 /*
1144 * Fallback to domain selective flush if no PSI support or the size is
1145 * too big.
1146 * PSI requires page size to be 2 ^ x, and the base address is naturally
1147 * aligned to the size
1148 */
1149 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1150 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1151 DMA_TLB_DSI_FLUSH);
1152 else
1153 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1154 DMA_TLB_PSI_FLUSH);
1155
1156 /*
1157 * In caching mode, changes of pages from non-present to present require
1158 * flush. However, device IOTLB doesn't need to be flushed in this case.
1159 */
1160 if (!cap_caching_mode(iommu->cap) || !map)
1161 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1162}
1163
1164static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1165{
1166 u32 pmen;
1167 unsigned long flags;
1168
1169 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1170 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1171 pmen &= ~DMA_PMEN_EPM;
1172 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1173
1174 /* wait for the protected region status bit to clear */
1175 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1176 readl, !(pmen & DMA_PMEN_PRS), pmen);
1177
1178 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1179}
1180
1181static int iommu_enable_translation(struct intel_iommu *iommu)
1182{
1183 u32 sts;
1184 unsigned long flags;
1185
1186 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1187 iommu->gcmd |= DMA_GCMD_TE;
1188 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1189
1190 /* Make sure hardware complete it */
1191 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1192 readl, (sts & DMA_GSTS_TES), sts);
1193
1194 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1195 return 0;
1196}
1197
1198static int iommu_disable_translation(struct intel_iommu *iommu)
1199{
1200 u32 sts;
1201 unsigned long flag;
1202
1203 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1204 iommu->gcmd &= ~DMA_GCMD_TE;
1205 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1206
1207 /* Make sure hardware complete it */
1208 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1209 readl, (!(sts & DMA_GSTS_TES)), sts);
1210
1211 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1212 return 0;
1213}
1214
1215
1216static int iommu_init_domains(struct intel_iommu *iommu)
1217{
1218 unsigned long ndomains;
1219 unsigned long nlongs;
1220
1221 ndomains = cap_ndoms(iommu->cap);
1222 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1223 ndomains);
1224 nlongs = BITS_TO_LONGS(ndomains);
1225
1226 spin_lock_init(&iommu->lock);
1227
1228 /* TBD: there might be 64K domains,
1229 * consider other allocation for future chip
1230 */
1231 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1232 if (!iommu->domain_ids) {
1233 printk(KERN_ERR "Allocating domain id array failed\n");
1234 return -ENOMEM;
1235 }
1236 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1237 GFP_KERNEL);
1238 if (!iommu->domains) {
1239 printk(KERN_ERR "Allocating domain array failed\n");
1240 return -ENOMEM;
1241 }
1242
1243 /*
1244 * if Caching mode is set, then invalid translations are tagged
1245 * with domainid 0. Hence we need to pre-allocate it.
1246 */
1247 if (cap_caching_mode(iommu->cap))
1248 set_bit(0, iommu->domain_ids);
1249 return 0;
1250}
1251
1252
1253static void domain_exit(struct dmar_domain *domain);
1254static void vm_domain_exit(struct dmar_domain *domain);
1255
1256void free_dmar_iommu(struct intel_iommu *iommu)
1257{
1258 struct dmar_domain *domain;
1259 int i;
1260 unsigned long flags;
1261
1262 if ((iommu->domains) && (iommu->domain_ids)) {
1263 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1264 domain = iommu->domains[i];
1265 clear_bit(i, iommu->domain_ids);
1266
1267 spin_lock_irqsave(&domain->iommu_lock, flags);
1268 if (--domain->iommu_count == 0) {
1269 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1270 vm_domain_exit(domain);
1271 else
1272 domain_exit(domain);
1273 }
1274 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1275 }
1276 }
1277
1278 if (iommu->gcmd & DMA_GCMD_TE)
1279 iommu_disable_translation(iommu);
1280
1281 if (iommu->irq) {
1282 irq_set_handler_data(iommu->irq, NULL);
1283 /* This will mask the irq */
1284 free_irq(iommu->irq, iommu);
1285 destroy_irq(iommu->irq);
1286 }
1287
1288 kfree(iommu->domains);
1289 kfree(iommu->domain_ids);
1290
1291 g_iommus[iommu->seq_id] = NULL;
1292
1293 /* if all iommus are freed, free g_iommus */
1294 for (i = 0; i < g_num_of_iommus; i++) {
1295 if (g_iommus[i])
1296 break;
1297 }
1298
1299 if (i == g_num_of_iommus)
1300 kfree(g_iommus);
1301
1302 /* free context mapping */
1303 free_context_table(iommu);
1304}
1305
1306static struct dmar_domain *alloc_domain(void)
1307{
1308 struct dmar_domain *domain;
1309
1310 domain = alloc_domain_mem();
1311 if (!domain)
1312 return NULL;
1313
1314 domain->nid = -1;
1315 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1316 domain->flags = 0;
1317
1318 return domain;
1319}
1320
1321static int iommu_attach_domain(struct dmar_domain *domain,
1322 struct intel_iommu *iommu)
1323{
1324 int num;
1325 unsigned long ndomains;
1326 unsigned long flags;
1327
1328 ndomains = cap_ndoms(iommu->cap);
1329
1330 spin_lock_irqsave(&iommu->lock, flags);
1331
1332 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1333 if (num >= ndomains) {
1334 spin_unlock_irqrestore(&iommu->lock, flags);
1335 printk(KERN_ERR "IOMMU: no free domain ids\n");
1336 return -ENOMEM;
1337 }
1338
1339 domain->id = num;
1340 set_bit(num, iommu->domain_ids);
1341 set_bit(iommu->seq_id, &domain->iommu_bmp);
1342 iommu->domains[num] = domain;
1343 spin_unlock_irqrestore(&iommu->lock, flags);
1344
1345 return 0;
1346}
1347
1348static void iommu_detach_domain(struct dmar_domain *domain,
1349 struct intel_iommu *iommu)
1350{
1351 unsigned long flags;
1352 int num, ndomains;
1353 int found = 0;
1354
1355 spin_lock_irqsave(&iommu->lock, flags);
1356 ndomains = cap_ndoms(iommu->cap);
1357 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1358 if (iommu->domains[num] == domain) {
1359 found = 1;
1360 break;
1361 }
1362 }
1363
1364 if (found) {
1365 clear_bit(num, iommu->domain_ids);
1366 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1367 iommu->domains[num] = NULL;
1368 }
1369 spin_unlock_irqrestore(&iommu->lock, flags);
1370}
1371
1372static struct iova_domain reserved_iova_list;
1373static struct lock_class_key reserved_rbtree_key;
1374
1375static int dmar_init_reserved_ranges(void)
1376{
1377 struct pci_dev *pdev = NULL;
1378 struct iova *iova;
1379 int i;
1380
1381 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1382
1383 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1384 &reserved_rbtree_key);
1385
1386 /* IOAPIC ranges shouldn't be accessed by DMA */
1387 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1388 IOVA_PFN(IOAPIC_RANGE_END));
1389 if (!iova) {
1390 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1391 return -ENODEV;
1392 }
1393
1394 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1395 for_each_pci_dev(pdev) {
1396 struct resource *r;
1397
1398 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1399 r = &pdev->resource[i];
1400 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1401 continue;
1402 iova = reserve_iova(&reserved_iova_list,
1403 IOVA_PFN(r->start),
1404 IOVA_PFN(r->end));
1405 if (!iova) {
1406 printk(KERN_ERR "Reserve iova failed\n");
1407 return -ENODEV;
1408 }
1409 }
1410 }
1411 return 0;
1412}
1413
1414static void domain_reserve_special_ranges(struct dmar_domain *domain)
1415{
1416 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1417}
1418
1419static inline int guestwidth_to_adjustwidth(int gaw)
1420{
1421 int agaw;
1422 int r = (gaw - 12) % 9;
1423
1424 if (r == 0)
1425 agaw = gaw;
1426 else
1427 agaw = gaw + 9 - r;
1428 if (agaw > 64)
1429 agaw = 64;
1430 return agaw;
1431}
1432
1433static int domain_init(struct dmar_domain *domain, int guest_width)
1434{
1435 struct intel_iommu *iommu;
1436 int adjust_width, agaw;
1437 unsigned long sagaw;
1438
1439 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1440 spin_lock_init(&domain->iommu_lock);
1441
1442 domain_reserve_special_ranges(domain);
1443
1444 /* calculate AGAW */
1445 iommu = domain_get_iommu(domain);
1446 if (guest_width > cap_mgaw(iommu->cap))
1447 guest_width = cap_mgaw(iommu->cap);
1448 domain->gaw = guest_width;
1449 adjust_width = guestwidth_to_adjustwidth(guest_width);
1450 agaw = width_to_agaw(adjust_width);
1451 sagaw = cap_sagaw(iommu->cap);
1452 if (!test_bit(agaw, &sagaw)) {
1453 /* hardware doesn't support it, choose a bigger one */
1454 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1455 agaw = find_next_bit(&sagaw, 5, agaw);
1456 if (agaw >= 5)
1457 return -ENODEV;
1458 }
1459 domain->agaw = agaw;
1460 INIT_LIST_HEAD(&domain->devices);
1461
1462 if (ecap_coherent(iommu->ecap))
1463 domain->iommu_coherency = 1;
1464 else
1465 domain->iommu_coherency = 0;
1466
1467 if (ecap_sc_support(iommu->ecap))
1468 domain->iommu_snooping = 1;
1469 else
1470 domain->iommu_snooping = 0;
1471
1472 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1473 domain->iommu_count = 1;
1474 domain->nid = iommu->node;
1475
1476 /* always allocate the top pgd */
1477 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1478 if (!domain->pgd)
1479 return -ENOMEM;
1480 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1481 return 0;
1482}
1483
1484static void domain_exit(struct dmar_domain *domain)
1485{
1486 struct dmar_drhd_unit *drhd;
1487 struct intel_iommu *iommu;
1488
1489 /* Domain 0 is reserved, so dont process it */
1490 if (!domain)
1491 return;
1492
1493 /* Flush any lazy unmaps that may reference this domain */
1494 if (!intel_iommu_strict)
1495 flush_unmaps_timeout(0);
1496
1497 domain_remove_dev_info(domain);
1498 /* destroy iovas */
1499 put_iova_domain(&domain->iovad);
1500
1501 /* clear ptes */
1502 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1503
1504 /* free page tables */
1505 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1506
1507 for_each_active_iommu(iommu, drhd)
1508 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1509 iommu_detach_domain(domain, iommu);
1510
1511 free_domain_mem(domain);
1512}
1513
1514static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1515 u8 bus, u8 devfn, int translation)
1516{
1517 struct context_entry *context;
1518 unsigned long flags;
1519 struct intel_iommu *iommu;
1520 struct dma_pte *pgd;
1521 unsigned long num;
1522 unsigned long ndomains;
1523 int id;
1524 int agaw;
1525 struct device_domain_info *info = NULL;
1526
1527 pr_debug("Set context mapping for %02x:%02x.%d\n",
1528 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1529
1530 BUG_ON(!domain->pgd);
1531 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1532 translation != CONTEXT_TT_MULTI_LEVEL);
1533
1534 iommu = device_to_iommu(segment, bus, devfn);
1535 if (!iommu)
1536 return -ENODEV;
1537
1538 context = device_to_context_entry(iommu, bus, devfn);
1539 if (!context)
1540 return -ENOMEM;
1541 spin_lock_irqsave(&iommu->lock, flags);
1542 if (context_present(context)) {
1543 spin_unlock_irqrestore(&iommu->lock, flags);
1544 return 0;
1545 }
1546
1547 id = domain->id;
1548 pgd = domain->pgd;
1549
1550 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1551 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1552 int found = 0;
1553
1554 /* find an available domain id for this device in iommu */
1555 ndomains = cap_ndoms(iommu->cap);
1556 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1557 if (iommu->domains[num] == domain) {
1558 id = num;
1559 found = 1;
1560 break;
1561 }
1562 }
1563
1564 if (found == 0) {
1565 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1566 if (num >= ndomains) {
1567 spin_unlock_irqrestore(&iommu->lock, flags);
1568 printk(KERN_ERR "IOMMU: no free domain ids\n");
1569 return -EFAULT;
1570 }
1571
1572 set_bit(num, iommu->domain_ids);
1573 iommu->domains[num] = domain;
1574 id = num;
1575 }
1576
1577 /* Skip top levels of page tables for
1578 * iommu which has less agaw than default.
1579 * Unnecessary for PT mode.
1580 */
1581 if (translation != CONTEXT_TT_PASS_THROUGH) {
1582 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1583 pgd = phys_to_virt(dma_pte_addr(pgd));
1584 if (!dma_pte_present(pgd)) {
1585 spin_unlock_irqrestore(&iommu->lock, flags);
1586 return -ENOMEM;
1587 }
1588 }
1589 }
1590 }
1591
1592 context_set_domain_id(context, id);
1593
1594 if (translation != CONTEXT_TT_PASS_THROUGH) {
1595 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1596 translation = info ? CONTEXT_TT_DEV_IOTLB :
1597 CONTEXT_TT_MULTI_LEVEL;
1598 }
1599 /*
1600 * In pass through mode, AW must be programmed to indicate the largest
1601 * AGAW value supported by hardware. And ASR is ignored by hardware.
1602 */
1603 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1604 context_set_address_width(context, iommu->msagaw);
1605 else {
1606 context_set_address_root(context, virt_to_phys(pgd));
1607 context_set_address_width(context, iommu->agaw);
1608 }
1609
1610 context_set_translation_type(context, translation);
1611 context_set_fault_enable(context);
1612 context_set_present(context);
1613 domain_flush_cache(domain, context, sizeof(*context));
1614
1615 /*
1616 * It's a non-present to present mapping. If hardware doesn't cache
1617 * non-present entry we only need to flush the write-buffer. If the
1618 * _does_ cache non-present entries, then it does so in the special
1619 * domain #0, which we have to flush:
1620 */
1621 if (cap_caching_mode(iommu->cap)) {
1622 iommu->flush.flush_context(iommu, 0,
1623 (((u16)bus) << 8) | devfn,
1624 DMA_CCMD_MASK_NOBIT,
1625 DMA_CCMD_DEVICE_INVL);
1626 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1627 } else {
1628 iommu_flush_write_buffer(iommu);
1629 }
1630 iommu_enable_dev_iotlb(info);
1631 spin_unlock_irqrestore(&iommu->lock, flags);
1632
1633 spin_lock_irqsave(&domain->iommu_lock, flags);
1634 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1635 domain->iommu_count++;
1636 if (domain->iommu_count == 1)
1637 domain->nid = iommu->node;
1638 domain_update_iommu_cap(domain);
1639 }
1640 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1641 return 0;
1642}
1643
1644static int
1645domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1646 int translation)
1647{
1648 int ret;
1649 struct pci_dev *tmp, *parent;
1650
1651 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1652 pdev->bus->number, pdev->devfn,
1653 translation);
1654 if (ret)
1655 return ret;
1656
1657 /* dependent device mapping */
1658 tmp = pci_find_upstream_pcie_bridge(pdev);
1659 if (!tmp)
1660 return 0;
1661 /* Secondary interface's bus number and devfn 0 */
1662 parent = pdev->bus->self;
1663 while (parent != tmp) {
1664 ret = domain_context_mapping_one(domain,
1665 pci_domain_nr(parent->bus),
1666 parent->bus->number,
1667 parent->devfn, translation);
1668 if (ret)
1669 return ret;
1670 parent = parent->bus->self;
1671 }
1672 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1673 return domain_context_mapping_one(domain,
1674 pci_domain_nr(tmp->subordinate),
1675 tmp->subordinate->number, 0,
1676 translation);
1677 else /* this is a legacy PCI bridge */
1678 return domain_context_mapping_one(domain,
1679 pci_domain_nr(tmp->bus),
1680 tmp->bus->number,
1681 tmp->devfn,
1682 translation);
1683}
1684
1685static int domain_context_mapped(struct pci_dev *pdev)
1686{
1687 int ret;
1688 struct pci_dev *tmp, *parent;
1689 struct intel_iommu *iommu;
1690
1691 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1692 pdev->devfn);
1693 if (!iommu)
1694 return -ENODEV;
1695
1696 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1697 if (!ret)
1698 return ret;
1699 /* dependent device mapping */
1700 tmp = pci_find_upstream_pcie_bridge(pdev);
1701 if (!tmp)
1702 return ret;
1703 /* Secondary interface's bus number and devfn 0 */
1704 parent = pdev->bus->self;
1705 while (parent != tmp) {
1706 ret = device_context_mapped(iommu, parent->bus->number,
1707 parent->devfn);
1708 if (!ret)
1709 return ret;
1710 parent = parent->bus->self;
1711 }
1712 if (pci_is_pcie(tmp))
1713 return device_context_mapped(iommu, tmp->subordinate->number,
1714 0);
1715 else
1716 return device_context_mapped(iommu, tmp->bus->number,
1717 tmp->devfn);
1718}
1719
1720/* Returns a number of VTD pages, but aligned to MM page size */
1721static inline unsigned long aligned_nrpages(unsigned long host_addr,
1722 size_t size)
1723{
1724 host_addr &= ~PAGE_MASK;
1725 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1726}
1727
1728/* Return largest possible superpage level for a given mapping */
1729static inline int hardware_largepage_caps(struct dmar_domain *domain,
1730 unsigned long iov_pfn,
1731 unsigned long phy_pfn,
1732 unsigned long pages)
1733{
1734 int support, level = 1;
1735 unsigned long pfnmerge;
1736
1737 support = domain->iommu_superpage;
1738
1739 /* To use a large page, the virtual *and* physical addresses
1740 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1741 of them will mean we have to use smaller pages. So just
1742 merge them and check both at once. */
1743 pfnmerge = iov_pfn | phy_pfn;
1744
1745 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1746 pages >>= VTD_STRIDE_SHIFT;
1747 if (!pages)
1748 break;
1749 pfnmerge >>= VTD_STRIDE_SHIFT;
1750 level++;
1751 support--;
1752 }
1753 return level;
1754}
1755
1756static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1757 struct scatterlist *sg, unsigned long phys_pfn,
1758 unsigned long nr_pages, int prot)
1759{
1760 struct dma_pte *first_pte = NULL, *pte = NULL;
1761 phys_addr_t uninitialized_var(pteval);
1762 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1763 unsigned long sg_res;
1764 unsigned int largepage_lvl = 0;
1765 unsigned long lvl_pages = 0;
1766
1767 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1768
1769 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1770 return -EINVAL;
1771
1772 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1773
1774 if (sg)
1775 sg_res = 0;
1776 else {
1777 sg_res = nr_pages + 1;
1778 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1779 }
1780
1781 while (nr_pages > 0) {
1782 uint64_t tmp;
1783
1784 if (!sg_res) {
1785 sg_res = aligned_nrpages(sg->offset, sg->length);
1786 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1787 sg->dma_length = sg->length;
1788 pteval = page_to_phys(sg_page(sg)) | prot;
1789 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1790 }
1791
1792 if (!pte) {
1793 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1794
1795 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1796 if (!pte)
1797 return -ENOMEM;
1798 /* It is large page*/
1799 if (largepage_lvl > 1)
1800 pteval |= DMA_PTE_LARGE_PAGE;
1801 else
1802 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1803
1804 }
1805 /* We don't need lock here, nobody else
1806 * touches the iova range
1807 */
1808 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1809 if (tmp) {
1810 static int dumps = 5;
1811 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1812 iov_pfn, tmp, (unsigned long long)pteval);
1813 if (dumps) {
1814 dumps--;
1815 debug_dma_dump_mappings(NULL);
1816 }
1817 WARN_ON(1);
1818 }
1819
1820 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1821
1822 BUG_ON(nr_pages < lvl_pages);
1823 BUG_ON(sg_res < lvl_pages);
1824
1825 nr_pages -= lvl_pages;
1826 iov_pfn += lvl_pages;
1827 phys_pfn += lvl_pages;
1828 pteval += lvl_pages * VTD_PAGE_SIZE;
1829 sg_res -= lvl_pages;
1830
1831 /* If the next PTE would be the first in a new page, then we
1832 need to flush the cache on the entries we've just written.
1833 And then we'll need to recalculate 'pte', so clear it and
1834 let it get set again in the if (!pte) block above.
1835
1836 If we're done (!nr_pages) we need to flush the cache too.
1837
1838 Also if we've been setting superpages, we may need to
1839 recalculate 'pte' and switch back to smaller pages for the
1840 end of the mapping, if the trailing size is not enough to
1841 use another superpage (i.e. sg_res < lvl_pages). */
1842 pte++;
1843 if (!nr_pages || first_pte_in_page(pte) ||
1844 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1845 domain_flush_cache(domain, first_pte,
1846 (void *)pte - (void *)first_pte);
1847 pte = NULL;
1848 }
1849
1850 if (!sg_res && nr_pages)
1851 sg = sg_next(sg);
1852 }
1853 return 0;
1854}
1855
1856static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1857 struct scatterlist *sg, unsigned long nr_pages,
1858 int prot)
1859{
1860 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1861}
1862
1863static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1864 unsigned long phys_pfn, unsigned long nr_pages,
1865 int prot)
1866{
1867 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1868}
1869
1870static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1871{
1872 if (!iommu)
1873 return;
1874
1875 clear_context_table(iommu, bus, devfn);
1876 iommu->flush.flush_context(iommu, 0, 0, 0,
1877 DMA_CCMD_GLOBAL_INVL);
1878 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1879}
1880
1881static void domain_remove_dev_info(struct dmar_domain *domain)
1882{
1883 struct device_domain_info *info;
1884 unsigned long flags;
1885 struct intel_iommu *iommu;
1886
1887 spin_lock_irqsave(&device_domain_lock, flags);
1888 while (!list_empty(&domain->devices)) {
1889 info = list_entry(domain->devices.next,
1890 struct device_domain_info, link);
1891 list_del(&info->link);
1892 list_del(&info->global);
1893 if (info->dev)
1894 info->dev->dev.archdata.iommu = NULL;
1895 spin_unlock_irqrestore(&device_domain_lock, flags);
1896
1897 iommu_disable_dev_iotlb(info);
1898 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1899 iommu_detach_dev(iommu, info->bus, info->devfn);
1900 free_devinfo_mem(info);
1901
1902 spin_lock_irqsave(&device_domain_lock, flags);
1903 }
1904 spin_unlock_irqrestore(&device_domain_lock, flags);
1905}
1906
1907/*
1908 * find_domain
1909 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1910 */
1911static struct dmar_domain *
1912find_domain(struct pci_dev *pdev)
1913{
1914 struct device_domain_info *info;
1915
1916 /* No lock here, assumes no domain exit in normal case */
1917 info = pdev->dev.archdata.iommu;
1918 if (info)
1919 return info->domain;
1920 return NULL;
1921}
1922
1923/* domain is initialized */
1924static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1925{
1926 struct dmar_domain *domain, *found = NULL;
1927 struct intel_iommu *iommu;
1928 struct dmar_drhd_unit *drhd;
1929 struct device_domain_info *info, *tmp;
1930 struct pci_dev *dev_tmp;
1931 unsigned long flags;
1932 int bus = 0, devfn = 0;
1933 int segment;
1934 int ret;
1935
1936 domain = find_domain(pdev);
1937 if (domain)
1938 return domain;
1939
1940 segment = pci_domain_nr(pdev->bus);
1941
1942 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1943 if (dev_tmp) {
1944 if (pci_is_pcie(dev_tmp)) {
1945 bus = dev_tmp->subordinate->number;
1946 devfn = 0;
1947 } else {
1948 bus = dev_tmp->bus->number;
1949 devfn = dev_tmp->devfn;
1950 }
1951 spin_lock_irqsave(&device_domain_lock, flags);
1952 list_for_each_entry(info, &device_domain_list, global) {
1953 if (info->segment == segment &&
1954 info->bus == bus && info->devfn == devfn) {
1955 found = info->domain;
1956 break;
1957 }
1958 }
1959 spin_unlock_irqrestore(&device_domain_lock, flags);
1960 /* pcie-pci bridge already has a domain, uses it */
1961 if (found) {
1962 domain = found;
1963 goto found_domain;
1964 }
1965 }
1966
1967 domain = alloc_domain();
1968 if (!domain)
1969 goto error;
1970
1971 /* Allocate new domain for the device */
1972 drhd = dmar_find_matched_drhd_unit(pdev);
1973 if (!drhd) {
1974 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1975 pci_name(pdev));
1976 return NULL;
1977 }
1978 iommu = drhd->iommu;
1979
1980 ret = iommu_attach_domain(domain, iommu);
1981 if (ret) {
1982 free_domain_mem(domain);
1983 goto error;
1984 }
1985
1986 if (domain_init(domain, gaw)) {
1987 domain_exit(domain);
1988 goto error;
1989 }
1990
1991 /* register pcie-to-pci device */
1992 if (dev_tmp) {
1993 info = alloc_devinfo_mem();
1994 if (!info) {
1995 domain_exit(domain);
1996 goto error;
1997 }
1998 info->segment = segment;
1999 info->bus = bus;
2000 info->devfn = devfn;
2001 info->dev = NULL;
2002 info->domain = domain;
2003 /* This domain is shared by devices under p2p bridge */
2004 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2005
2006 /* pcie-to-pci bridge already has a domain, uses it */
2007 found = NULL;
2008 spin_lock_irqsave(&device_domain_lock, flags);
2009 list_for_each_entry(tmp, &device_domain_list, global) {
2010 if (tmp->segment == segment &&
2011 tmp->bus == bus && tmp->devfn == devfn) {
2012 found = tmp->domain;
2013 break;
2014 }
2015 }
2016 if (found) {
2017 spin_unlock_irqrestore(&device_domain_lock, flags);
2018 free_devinfo_mem(info);
2019 domain_exit(domain);
2020 domain = found;
2021 } else {
2022 list_add(&info->link, &domain->devices);
2023 list_add(&info->global, &device_domain_list);
2024 spin_unlock_irqrestore(&device_domain_lock, flags);
2025 }
2026 }
2027
2028found_domain:
2029 info = alloc_devinfo_mem();
2030 if (!info)
2031 goto error;
2032 info->segment = segment;
2033 info->bus = pdev->bus->number;
2034 info->devfn = pdev->devfn;
2035 info->dev = pdev;
2036 info->domain = domain;
2037 spin_lock_irqsave(&device_domain_lock, flags);
2038 /* somebody is fast */
2039 found = find_domain(pdev);
2040 if (found != NULL) {
2041 spin_unlock_irqrestore(&device_domain_lock, flags);
2042 if (found != domain) {
2043 domain_exit(domain);
2044 domain = found;
2045 }
2046 free_devinfo_mem(info);
2047 return domain;
2048 }
2049 list_add(&info->link, &domain->devices);
2050 list_add(&info->global, &device_domain_list);
2051 pdev->dev.archdata.iommu = info;
2052 spin_unlock_irqrestore(&device_domain_lock, flags);
2053 return domain;
2054error:
2055 /* recheck it here, maybe others set it */
2056 return find_domain(pdev);
2057}
2058
2059static int iommu_identity_mapping;
2060#define IDENTMAP_ALL 1
2061#define IDENTMAP_GFX 2
2062#define IDENTMAP_AZALIA 4
2063
2064static int iommu_domain_identity_map(struct dmar_domain *domain,
2065 unsigned long long start,
2066 unsigned long long end)
2067{
2068 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2069 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2070
2071 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2072 dma_to_mm_pfn(last_vpfn))) {
2073 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2074 return -ENOMEM;
2075 }
2076
2077 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2078 start, end, domain->id);
2079 /*
2080 * RMRR range might have overlap with physical memory range,
2081 * clear it first
2082 */
2083 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2084
2085 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2086 last_vpfn - first_vpfn + 1,
2087 DMA_PTE_READ|DMA_PTE_WRITE);
2088}
2089
2090static int iommu_prepare_identity_map(struct pci_dev *pdev,
2091 unsigned long long start,
2092 unsigned long long end)
2093{
2094 struct dmar_domain *domain;
2095 int ret;
2096
2097 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2098 if (!domain)
2099 return -ENOMEM;
2100
2101 /* For _hardware_ passthrough, don't bother. But for software
2102 passthrough, we do it anyway -- it may indicate a memory
2103 range which is reserved in E820, so which didn't get set
2104 up to start with in si_domain */
2105 if (domain == si_domain && hw_pass_through) {
2106 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2107 pci_name(pdev), start, end);
2108 return 0;
2109 }
2110
2111 printk(KERN_INFO
2112 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2113 pci_name(pdev), start, end);
2114
2115 if (end < start) {
2116 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2117 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2118 dmi_get_system_info(DMI_BIOS_VENDOR),
2119 dmi_get_system_info(DMI_BIOS_VERSION),
2120 dmi_get_system_info(DMI_PRODUCT_VERSION));
2121 ret = -EIO;
2122 goto error;
2123 }
2124
2125 if (end >> agaw_to_width(domain->agaw)) {
2126 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2127 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2128 agaw_to_width(domain->agaw),
2129 dmi_get_system_info(DMI_BIOS_VENDOR),
2130 dmi_get_system_info(DMI_BIOS_VERSION),
2131 dmi_get_system_info(DMI_PRODUCT_VERSION));
2132 ret = -EIO;
2133 goto error;
2134 }
2135
2136 ret = iommu_domain_identity_map(domain, start, end);
2137 if (ret)
2138 goto error;
2139
2140 /* context entry init */
2141 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2142 if (ret)
2143 goto error;
2144
2145 return 0;
2146
2147 error:
2148 domain_exit(domain);
2149 return ret;
2150}
2151
2152static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2153 struct pci_dev *pdev)
2154{
2155 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2156 return 0;
2157 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2158 rmrr->end_address);
2159}
2160
2161#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2162static inline void iommu_prepare_isa(void)
2163{
2164 struct pci_dev *pdev;
2165 int ret;
2166
2167 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2168 if (!pdev)
2169 return;
2170
2171 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2172 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2173
2174 if (ret)
2175 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2176 "floppy might not work\n");
2177
2178}
2179#else
2180static inline void iommu_prepare_isa(void)
2181{
2182 return;
2183}
2184#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2185
2186static int md_domain_init(struct dmar_domain *domain, int guest_width);
2187
2188static int __init si_domain_init(int hw)
2189{
2190 struct dmar_drhd_unit *drhd;
2191 struct intel_iommu *iommu;
2192 int nid, ret = 0;
2193
2194 si_domain = alloc_domain();
2195 if (!si_domain)
2196 return -EFAULT;
2197
2198 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2199
2200 for_each_active_iommu(iommu, drhd) {
2201 ret = iommu_attach_domain(si_domain, iommu);
2202 if (ret) {
2203 domain_exit(si_domain);
2204 return -EFAULT;
2205 }
2206 }
2207
2208 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2209 domain_exit(si_domain);
2210 return -EFAULT;
2211 }
2212
2213 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2214
2215 if (hw)
2216 return 0;
2217
2218 for_each_online_node(nid) {
2219 unsigned long start_pfn, end_pfn;
2220 int i;
2221
2222 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2223 ret = iommu_domain_identity_map(si_domain,
2224 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2225 if (ret)
2226 return ret;
2227 }
2228 }
2229
2230 return 0;
2231}
2232
2233static void domain_remove_one_dev_info(struct dmar_domain *domain,
2234 struct pci_dev *pdev);
2235static int identity_mapping(struct pci_dev *pdev)
2236{
2237 struct device_domain_info *info;
2238
2239 if (likely(!iommu_identity_mapping))
2240 return 0;
2241
2242 info = pdev->dev.archdata.iommu;
2243 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2244 return (info->domain == si_domain);
2245
2246 return 0;
2247}
2248
2249static int domain_add_dev_info(struct dmar_domain *domain,
2250 struct pci_dev *pdev,
2251 int translation)
2252{
2253 struct device_domain_info *info;
2254 unsigned long flags;
2255 int ret;
2256
2257 info = alloc_devinfo_mem();
2258 if (!info)
2259 return -ENOMEM;
2260
2261 ret = domain_context_mapping(domain, pdev, translation);
2262 if (ret) {
2263 free_devinfo_mem(info);
2264 return ret;
2265 }
2266
2267 info->segment = pci_domain_nr(pdev->bus);
2268 info->bus = pdev->bus->number;
2269 info->devfn = pdev->devfn;
2270 info->dev = pdev;
2271 info->domain = domain;
2272
2273 spin_lock_irqsave(&device_domain_lock, flags);
2274 list_add(&info->link, &domain->devices);
2275 list_add(&info->global, &device_domain_list);
2276 pdev->dev.archdata.iommu = info;
2277 spin_unlock_irqrestore(&device_domain_lock, flags);
2278
2279 return 0;
2280}
2281
2282static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2283{
2284 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2285 return 1;
2286
2287 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2288 return 1;
2289
2290 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2291 return 0;
2292
2293 /*
2294 * We want to start off with all devices in the 1:1 domain, and
2295 * take them out later if we find they can't access all of memory.
2296 *
2297 * However, we can't do this for PCI devices behind bridges,
2298 * because all PCI devices behind the same bridge will end up
2299 * with the same source-id on their transactions.
2300 *
2301 * Practically speaking, we can't change things around for these
2302 * devices at run-time, because we can't be sure there'll be no
2303 * DMA transactions in flight for any of their siblings.
2304 *
2305 * So PCI devices (unless they're on the root bus) as well as
2306 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2307 * the 1:1 domain, just in _case_ one of their siblings turns out
2308 * not to be able to map all of memory.
2309 */
2310 if (!pci_is_pcie(pdev)) {
2311 if (!pci_is_root_bus(pdev->bus))
2312 return 0;
2313 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2314 return 0;
2315 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2316 return 0;
2317
2318 /*
2319 * At boot time, we don't yet know if devices will be 64-bit capable.
2320 * Assume that they will -- if they turn out not to be, then we can
2321 * take them out of the 1:1 domain later.
2322 */
2323 if (!startup) {
2324 /*
2325 * If the device's dma_mask is less than the system's memory
2326 * size then this is not a candidate for identity mapping.
2327 */
2328 u64 dma_mask = pdev->dma_mask;
2329
2330 if (pdev->dev.coherent_dma_mask &&
2331 pdev->dev.coherent_dma_mask < dma_mask)
2332 dma_mask = pdev->dev.coherent_dma_mask;
2333
2334 return dma_mask >= dma_get_required_mask(&pdev->dev);
2335 }
2336
2337 return 1;
2338}
2339
2340static int __init iommu_prepare_static_identity_mapping(int hw)
2341{
2342 struct pci_dev *pdev = NULL;
2343 int ret;
2344
2345 ret = si_domain_init(hw);
2346 if (ret)
2347 return -EFAULT;
2348
2349 for_each_pci_dev(pdev) {
2350 /* Skip Host/PCI Bridge devices */
2351 if (IS_BRIDGE_HOST_DEVICE(pdev))
2352 continue;
2353 if (iommu_should_identity_map(pdev, 1)) {
2354 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2355 hw ? "hardware" : "software", pci_name(pdev));
2356
2357 ret = domain_add_dev_info(si_domain, pdev,
2358 hw ? CONTEXT_TT_PASS_THROUGH :
2359 CONTEXT_TT_MULTI_LEVEL);
2360 if (ret)
2361 return ret;
2362 }
2363 }
2364
2365 return 0;
2366}
2367
2368static int __init init_dmars(void)
2369{
2370 struct dmar_drhd_unit *drhd;
2371 struct dmar_rmrr_unit *rmrr;
2372 struct pci_dev *pdev;
2373 struct intel_iommu *iommu;
2374 int i, ret;
2375
2376 /*
2377 * for each drhd
2378 * allocate root
2379 * initialize and program root entry to not present
2380 * endfor
2381 */
2382 for_each_drhd_unit(drhd) {
2383 g_num_of_iommus++;
2384 /*
2385 * lock not needed as this is only incremented in the single
2386 * threaded kernel __init code path all other access are read
2387 * only
2388 */
2389 }
2390
2391 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2392 GFP_KERNEL);
2393 if (!g_iommus) {
2394 printk(KERN_ERR "Allocating global iommu array failed\n");
2395 ret = -ENOMEM;
2396 goto error;
2397 }
2398
2399 deferred_flush = kzalloc(g_num_of_iommus *
2400 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2401 if (!deferred_flush) {
2402 ret = -ENOMEM;
2403 goto error;
2404 }
2405
2406 for_each_drhd_unit(drhd) {
2407 if (drhd->ignored)
2408 continue;
2409
2410 iommu = drhd->iommu;
2411 g_iommus[iommu->seq_id] = iommu;
2412
2413 ret = iommu_init_domains(iommu);
2414 if (ret)
2415 goto error;
2416
2417 /*
2418 * TBD:
2419 * we could share the same root & context tables
2420 * among all IOMMU's. Need to Split it later.
2421 */
2422 ret = iommu_alloc_root_entry(iommu);
2423 if (ret) {
2424 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2425 goto error;
2426 }
2427 if (!ecap_pass_through(iommu->ecap))
2428 hw_pass_through = 0;
2429 }
2430
2431 /*
2432 * Start from the sane iommu hardware state.
2433 */
2434 for_each_drhd_unit(drhd) {
2435 if (drhd->ignored)
2436 continue;
2437
2438 iommu = drhd->iommu;
2439
2440 /*
2441 * If the queued invalidation is already initialized by us
2442 * (for example, while enabling interrupt-remapping) then
2443 * we got the things already rolling from a sane state.
2444 */
2445 if (iommu->qi)
2446 continue;
2447
2448 /*
2449 * Clear any previous faults.
2450 */
2451 dmar_fault(-1, iommu);
2452 /*
2453 * Disable queued invalidation if supported and already enabled
2454 * before OS handover.
2455 */
2456 dmar_disable_qi(iommu);
2457 }
2458
2459 for_each_drhd_unit(drhd) {
2460 if (drhd->ignored)
2461 continue;
2462
2463 iommu = drhd->iommu;
2464
2465 if (dmar_enable_qi(iommu)) {
2466 /*
2467 * Queued Invalidate not enabled, use Register Based
2468 * Invalidate
2469 */
2470 iommu->flush.flush_context = __iommu_flush_context;
2471 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2472 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2473 "invalidation\n",
2474 iommu->seq_id,
2475 (unsigned long long)drhd->reg_base_addr);
2476 } else {
2477 iommu->flush.flush_context = qi_flush_context;
2478 iommu->flush.flush_iotlb = qi_flush_iotlb;
2479 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2480 "invalidation\n",
2481 iommu->seq_id,
2482 (unsigned long long)drhd->reg_base_addr);
2483 }
2484 }
2485
2486 if (iommu_pass_through)
2487 iommu_identity_mapping |= IDENTMAP_ALL;
2488
2489#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2490 iommu_identity_mapping |= IDENTMAP_GFX;
2491#endif
2492
2493 check_tylersburg_isoch();
2494
2495 /*
2496 * If pass through is not set or not enabled, setup context entries for
2497 * identity mappings for rmrr, gfx, and isa and may fall back to static
2498 * identity mapping if iommu_identity_mapping is set.
2499 */
2500 if (iommu_identity_mapping) {
2501 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2502 if (ret) {
2503 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2504 goto error;
2505 }
2506 }
2507 /*
2508 * For each rmrr
2509 * for each dev attached to rmrr
2510 * do
2511 * locate drhd for dev, alloc domain for dev
2512 * allocate free domain
2513 * allocate page table entries for rmrr
2514 * if context not allocated for bus
2515 * allocate and init context
2516 * set present in root table for this bus
2517 * init context with domain, translation etc
2518 * endfor
2519 * endfor
2520 */
2521 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2522 for_each_rmrr_units(rmrr) {
2523 for (i = 0; i < rmrr->devices_cnt; i++) {
2524 pdev = rmrr->devices[i];
2525 /*
2526 * some BIOS lists non-exist devices in DMAR
2527 * table.
2528 */
2529 if (!pdev)
2530 continue;
2531 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2532 if (ret)
2533 printk(KERN_ERR
2534 "IOMMU: mapping reserved region failed\n");
2535 }
2536 }
2537
2538 iommu_prepare_isa();
2539
2540 /*
2541 * for each drhd
2542 * enable fault log
2543 * global invalidate context cache
2544 * global invalidate iotlb
2545 * enable translation
2546 */
2547 for_each_drhd_unit(drhd) {
2548 if (drhd->ignored) {
2549 /*
2550 * we always have to disable PMRs or DMA may fail on
2551 * this device
2552 */
2553 if (force_on)
2554 iommu_disable_protect_mem_regions(drhd->iommu);
2555 continue;
2556 }
2557 iommu = drhd->iommu;
2558
2559 iommu_flush_write_buffer(iommu);
2560
2561 ret = dmar_set_interrupt(iommu);
2562 if (ret)
2563 goto error;
2564
2565 iommu_set_root_entry(iommu);
2566
2567 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2568 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2569
2570 ret = iommu_enable_translation(iommu);
2571 if (ret)
2572 goto error;
2573
2574 iommu_disable_protect_mem_regions(iommu);
2575 }
2576
2577 return 0;
2578error:
2579 for_each_drhd_unit(drhd) {
2580 if (drhd->ignored)
2581 continue;
2582 iommu = drhd->iommu;
2583 free_iommu(iommu);
2584 }
2585 kfree(g_iommus);
2586 return ret;
2587}
2588
2589/* This takes a number of _MM_ pages, not VTD pages */
2590static struct iova *intel_alloc_iova(struct device *dev,
2591 struct dmar_domain *domain,
2592 unsigned long nrpages, uint64_t dma_mask)
2593{
2594 struct pci_dev *pdev = to_pci_dev(dev);
2595 struct iova *iova = NULL;
2596
2597 /* Restrict dma_mask to the width that the iommu can handle */
2598 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2599
2600 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2601 /*
2602 * First try to allocate an io virtual address in
2603 * DMA_BIT_MASK(32) and if that fails then try allocating
2604 * from higher range
2605 */
2606 iova = alloc_iova(&domain->iovad, nrpages,
2607 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2608 if (iova)
2609 return iova;
2610 }
2611 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2612 if (unlikely(!iova)) {
2613 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2614 nrpages, pci_name(pdev));
2615 return NULL;
2616 }
2617
2618 return iova;
2619}
2620
2621static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2622{
2623 struct dmar_domain *domain;
2624 int ret;
2625
2626 domain = get_domain_for_dev(pdev,
2627 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2628 if (!domain) {
2629 printk(KERN_ERR
2630 "Allocating domain for %s failed", pci_name(pdev));
2631 return NULL;
2632 }
2633
2634 /* make sure context mapping is ok */
2635 if (unlikely(!domain_context_mapped(pdev))) {
2636 ret = domain_context_mapping(domain, pdev,
2637 CONTEXT_TT_MULTI_LEVEL);
2638 if (ret) {
2639 printk(KERN_ERR
2640 "Domain context map for %s failed",
2641 pci_name(pdev));
2642 return NULL;
2643 }
2644 }
2645
2646 return domain;
2647}
2648
2649static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2650{
2651 struct device_domain_info *info;
2652
2653 /* No lock here, assumes no domain exit in normal case */
2654 info = dev->dev.archdata.iommu;
2655 if (likely(info))
2656 return info->domain;
2657
2658 return __get_valid_domain_for_dev(dev);
2659}
2660
2661static int iommu_dummy(struct pci_dev *pdev)
2662{
2663 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2664}
2665
2666/* Check if the pdev needs to go through non-identity map and unmap process.*/
2667static int iommu_no_mapping(struct device *dev)
2668{
2669 struct pci_dev *pdev;
2670 int found;
2671
2672 if (unlikely(dev->bus != &pci_bus_type))
2673 return 1;
2674
2675 pdev = to_pci_dev(dev);
2676 if (iommu_dummy(pdev))
2677 return 1;
2678
2679 if (!iommu_identity_mapping)
2680 return 0;
2681
2682 found = identity_mapping(pdev);
2683 if (found) {
2684 if (iommu_should_identity_map(pdev, 0))
2685 return 1;
2686 else {
2687 /*
2688 * 32 bit DMA is removed from si_domain and fall back
2689 * to non-identity mapping.
2690 */
2691 domain_remove_one_dev_info(si_domain, pdev);
2692 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2693 pci_name(pdev));
2694 return 0;
2695 }
2696 } else {
2697 /*
2698 * In case of a detached 64 bit DMA device from vm, the device
2699 * is put into si_domain for identity mapping.
2700 */
2701 if (iommu_should_identity_map(pdev, 0)) {
2702 int ret;
2703 ret = domain_add_dev_info(si_domain, pdev,
2704 hw_pass_through ?
2705 CONTEXT_TT_PASS_THROUGH :
2706 CONTEXT_TT_MULTI_LEVEL);
2707 if (!ret) {
2708 printk(KERN_INFO "64bit %s uses identity mapping\n",
2709 pci_name(pdev));
2710 return 1;
2711 }
2712 }
2713 }
2714
2715 return 0;
2716}
2717
2718static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2719 size_t size, int dir, u64 dma_mask)
2720{
2721 struct pci_dev *pdev = to_pci_dev(hwdev);
2722 struct dmar_domain *domain;
2723 phys_addr_t start_paddr;
2724 struct iova *iova;
2725 int prot = 0;
2726 int ret;
2727 struct intel_iommu *iommu;
2728 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2729
2730 BUG_ON(dir == DMA_NONE);
2731
2732 if (iommu_no_mapping(hwdev))
2733 return paddr;
2734
2735 domain = get_valid_domain_for_dev(pdev);
2736 if (!domain)
2737 return 0;
2738
2739 iommu = domain_get_iommu(domain);
2740 size = aligned_nrpages(paddr, size);
2741
2742 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2743 if (!iova)
2744 goto error;
2745
2746 /*
2747 * Check if DMAR supports zero-length reads on write only
2748 * mappings..
2749 */
2750 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2751 !cap_zlr(iommu->cap))
2752 prot |= DMA_PTE_READ;
2753 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2754 prot |= DMA_PTE_WRITE;
2755 /*
2756 * paddr - (paddr + size) might be partial page, we should map the whole
2757 * page. Note: if two part of one page are separately mapped, we
2758 * might have two guest_addr mapping to the same host paddr, but this
2759 * is not a big problem
2760 */
2761 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2762 mm_to_dma_pfn(paddr_pfn), size, prot);
2763 if (ret)
2764 goto error;
2765
2766 /* it's a non-present to present mapping. Only flush if caching mode */
2767 if (cap_caching_mode(iommu->cap))
2768 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2769 else
2770 iommu_flush_write_buffer(iommu);
2771
2772 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2773 start_paddr += paddr & ~PAGE_MASK;
2774 return start_paddr;
2775
2776error:
2777 if (iova)
2778 __free_iova(&domain->iovad, iova);
2779 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2780 pci_name(pdev), size, (unsigned long long)paddr, dir);
2781 return 0;
2782}
2783
2784static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2785 unsigned long offset, size_t size,
2786 enum dma_data_direction dir,
2787 struct dma_attrs *attrs)
2788{
2789 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2790 dir, to_pci_dev(dev)->dma_mask);
2791}
2792
2793static void flush_unmaps(void)
2794{
2795 int i, j;
2796
2797 timer_on = 0;
2798
2799 /* just flush them all */
2800 for (i = 0; i < g_num_of_iommus; i++) {
2801 struct intel_iommu *iommu = g_iommus[i];
2802 if (!iommu)
2803 continue;
2804
2805 if (!deferred_flush[i].next)
2806 continue;
2807
2808 /* In caching mode, global flushes turn emulation expensive */
2809 if (!cap_caching_mode(iommu->cap))
2810 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2811 DMA_TLB_GLOBAL_FLUSH);
2812 for (j = 0; j < deferred_flush[i].next; j++) {
2813 unsigned long mask;
2814 struct iova *iova = deferred_flush[i].iova[j];
2815 struct dmar_domain *domain = deferred_flush[i].domain[j];
2816
2817 /* On real hardware multiple invalidations are expensive */
2818 if (cap_caching_mode(iommu->cap))
2819 iommu_flush_iotlb_psi(iommu, domain->id,
2820 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2821 else {
2822 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2823 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2824 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2825 }
2826 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2827 }
2828 deferred_flush[i].next = 0;
2829 }
2830
2831 list_size = 0;
2832}
2833
2834static void flush_unmaps_timeout(unsigned long data)
2835{
2836 unsigned long flags;
2837
2838 spin_lock_irqsave(&async_umap_flush_lock, flags);
2839 flush_unmaps();
2840 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2841}
2842
2843static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2844{
2845 unsigned long flags;
2846 int next, iommu_id;
2847 struct intel_iommu *iommu;
2848
2849 spin_lock_irqsave(&async_umap_flush_lock, flags);
2850 if (list_size == HIGH_WATER_MARK)
2851 flush_unmaps();
2852
2853 iommu = domain_get_iommu(dom);
2854 iommu_id = iommu->seq_id;
2855
2856 next = deferred_flush[iommu_id].next;
2857 deferred_flush[iommu_id].domain[next] = dom;
2858 deferred_flush[iommu_id].iova[next] = iova;
2859 deferred_flush[iommu_id].next++;
2860
2861 if (!timer_on) {
2862 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2863 timer_on = 1;
2864 }
2865 list_size++;
2866 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2867}
2868
2869static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2870 size_t size, enum dma_data_direction dir,
2871 struct dma_attrs *attrs)
2872{
2873 struct pci_dev *pdev = to_pci_dev(dev);
2874 struct dmar_domain *domain;
2875 unsigned long start_pfn, last_pfn;
2876 struct iova *iova;
2877 struct intel_iommu *iommu;
2878
2879 if (iommu_no_mapping(dev))
2880 return;
2881
2882 domain = find_domain(pdev);
2883 BUG_ON(!domain);
2884
2885 iommu = domain_get_iommu(domain);
2886
2887 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2888 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2889 (unsigned long long)dev_addr))
2890 return;
2891
2892 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2893 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2894
2895 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2896 pci_name(pdev), start_pfn, last_pfn);
2897
2898 /* clear the whole page */
2899 dma_pte_clear_range(domain, start_pfn, last_pfn);
2900
2901 /* free page tables */
2902 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2903
2904 if (intel_iommu_strict) {
2905 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2906 last_pfn - start_pfn + 1, 0);
2907 /* free iova */
2908 __free_iova(&domain->iovad, iova);
2909 } else {
2910 add_unmap(domain, iova);
2911 /*
2912 * queue up the release of the unmap to save the 1/6th of the
2913 * cpu used up by the iotlb flush operation...
2914 */
2915 }
2916}
2917
2918static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2919 dma_addr_t *dma_handle, gfp_t flags)
2920{
2921 void *vaddr;
2922 int order;
2923
2924 size = PAGE_ALIGN(size);
2925 order = get_order(size);
2926
2927 if (!iommu_no_mapping(hwdev))
2928 flags &= ~(GFP_DMA | GFP_DMA32);
2929 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2930 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2931 flags |= GFP_DMA;
2932 else
2933 flags |= GFP_DMA32;
2934 }
2935
2936 vaddr = (void *)__get_free_pages(flags, order);
2937 if (!vaddr)
2938 return NULL;
2939 memset(vaddr, 0, size);
2940
2941 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2942 DMA_BIDIRECTIONAL,
2943 hwdev->coherent_dma_mask);
2944 if (*dma_handle)
2945 return vaddr;
2946 free_pages((unsigned long)vaddr, order);
2947 return NULL;
2948}
2949
2950static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2951 dma_addr_t dma_handle)
2952{
2953 int order;
2954
2955 size = PAGE_ALIGN(size);
2956 order = get_order(size);
2957
2958 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2959 free_pages((unsigned long)vaddr, order);
2960}
2961
2962static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2963 int nelems, enum dma_data_direction dir,
2964 struct dma_attrs *attrs)
2965{
2966 struct pci_dev *pdev = to_pci_dev(hwdev);
2967 struct dmar_domain *domain;
2968 unsigned long start_pfn, last_pfn;
2969 struct iova *iova;
2970 struct intel_iommu *iommu;
2971
2972 if (iommu_no_mapping(hwdev))
2973 return;
2974
2975 domain = find_domain(pdev);
2976 BUG_ON(!domain);
2977
2978 iommu = domain_get_iommu(domain);
2979
2980 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2981 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2982 (unsigned long long)sglist[0].dma_address))
2983 return;
2984
2985 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2986 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2987
2988 /* clear the whole page */
2989 dma_pte_clear_range(domain, start_pfn, last_pfn);
2990
2991 /* free page tables */
2992 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2993
2994 if (intel_iommu_strict) {
2995 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2996 last_pfn - start_pfn + 1, 0);
2997 /* free iova */
2998 __free_iova(&domain->iovad, iova);
2999 } else {
3000 add_unmap(domain, iova);
3001 /*
3002 * queue up the release of the unmap to save the 1/6th of the
3003 * cpu used up by the iotlb flush operation...
3004 */
3005 }
3006}
3007
3008static int intel_nontranslate_map_sg(struct device *hddev,
3009 struct scatterlist *sglist, int nelems, int dir)
3010{
3011 int i;
3012 struct scatterlist *sg;
3013
3014 for_each_sg(sglist, sg, nelems, i) {
3015 BUG_ON(!sg_page(sg));
3016 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3017 sg->dma_length = sg->length;
3018 }
3019 return nelems;
3020}
3021
3022static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3023 enum dma_data_direction dir, struct dma_attrs *attrs)
3024{
3025 int i;
3026 struct pci_dev *pdev = to_pci_dev(hwdev);
3027 struct dmar_domain *domain;
3028 size_t size = 0;
3029 int prot = 0;
3030 struct iova *iova = NULL;
3031 int ret;
3032 struct scatterlist *sg;
3033 unsigned long start_vpfn;
3034 struct intel_iommu *iommu;
3035
3036 BUG_ON(dir == DMA_NONE);
3037 if (iommu_no_mapping(hwdev))
3038 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3039
3040 domain = get_valid_domain_for_dev(pdev);
3041 if (!domain)
3042 return 0;
3043
3044 iommu = domain_get_iommu(domain);
3045
3046 for_each_sg(sglist, sg, nelems, i)
3047 size += aligned_nrpages(sg->offset, sg->length);
3048
3049 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3050 pdev->dma_mask);
3051 if (!iova) {
3052 sglist->dma_length = 0;
3053 return 0;
3054 }
3055
3056 /*
3057 * Check if DMAR supports zero-length reads on write only
3058 * mappings..
3059 */
3060 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3061 !cap_zlr(iommu->cap))
3062 prot |= DMA_PTE_READ;
3063 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3064 prot |= DMA_PTE_WRITE;
3065
3066 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3067
3068 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3069 if (unlikely(ret)) {
3070 /* clear the page */
3071 dma_pte_clear_range(domain, start_vpfn,
3072 start_vpfn + size - 1);
3073 /* free page tables */
3074 dma_pte_free_pagetable(domain, start_vpfn,
3075 start_vpfn + size - 1);
3076 /* free iova */
3077 __free_iova(&domain->iovad, iova);
3078 return 0;
3079 }
3080
3081 /* it's a non-present to present mapping. Only flush if caching mode */
3082 if (cap_caching_mode(iommu->cap))
3083 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3084 else
3085 iommu_flush_write_buffer(iommu);
3086
3087 return nelems;
3088}
3089
3090static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3091{
3092 return !dma_addr;
3093}
3094
3095struct dma_map_ops intel_dma_ops = {
3096 .alloc_coherent = intel_alloc_coherent,
3097 .free_coherent = intel_free_coherent,
3098 .map_sg = intel_map_sg,
3099 .unmap_sg = intel_unmap_sg,
3100 .map_page = intel_map_page,
3101 .unmap_page = intel_unmap_page,
3102 .mapping_error = intel_mapping_error,
3103};
3104
3105static inline int iommu_domain_cache_init(void)
3106{
3107 int ret = 0;
3108
3109 iommu_domain_cache = kmem_cache_create("iommu_domain",
3110 sizeof(struct dmar_domain),
3111 0,
3112 SLAB_HWCACHE_ALIGN,
3113
3114 NULL);
3115 if (!iommu_domain_cache) {
3116 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3117 ret = -ENOMEM;
3118 }
3119
3120 return ret;
3121}
3122
3123static inline int iommu_devinfo_cache_init(void)
3124{
3125 int ret = 0;
3126
3127 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3128 sizeof(struct device_domain_info),
3129 0,
3130 SLAB_HWCACHE_ALIGN,
3131 NULL);
3132 if (!iommu_devinfo_cache) {
3133 printk(KERN_ERR "Couldn't create devinfo cache\n");
3134 ret = -ENOMEM;
3135 }
3136
3137 return ret;
3138}
3139
3140static inline int iommu_iova_cache_init(void)
3141{
3142 int ret = 0;
3143
3144 iommu_iova_cache = kmem_cache_create("iommu_iova",
3145 sizeof(struct iova),
3146 0,
3147 SLAB_HWCACHE_ALIGN,
3148 NULL);
3149 if (!iommu_iova_cache) {
3150 printk(KERN_ERR "Couldn't create iova cache\n");
3151 ret = -ENOMEM;
3152 }
3153
3154 return ret;
3155}
3156
3157static int __init iommu_init_mempool(void)
3158{
3159 int ret;
3160 ret = iommu_iova_cache_init();
3161 if (ret)
3162 return ret;
3163
3164 ret = iommu_domain_cache_init();
3165 if (ret)
3166 goto domain_error;
3167
3168 ret = iommu_devinfo_cache_init();
3169 if (!ret)
3170 return ret;
3171
3172 kmem_cache_destroy(iommu_domain_cache);
3173domain_error:
3174 kmem_cache_destroy(iommu_iova_cache);
3175
3176 return -ENOMEM;
3177}
3178
3179static void __init iommu_exit_mempool(void)
3180{
3181 kmem_cache_destroy(iommu_devinfo_cache);
3182 kmem_cache_destroy(iommu_domain_cache);
3183 kmem_cache_destroy(iommu_iova_cache);
3184
3185}
3186
3187static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3188{
3189 struct dmar_drhd_unit *drhd;
3190 u32 vtbar;
3191 int rc;
3192
3193 /* We know that this device on this chipset has its own IOMMU.
3194 * If we find it under a different IOMMU, then the BIOS is lying
3195 * to us. Hope that the IOMMU for this device is actually
3196 * disabled, and it needs no translation...
3197 */
3198 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3199 if (rc) {
3200 /* "can't" happen */
3201 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3202 return;
3203 }
3204 vtbar &= 0xffff0000;
3205
3206 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3207 drhd = dmar_find_matched_drhd_unit(pdev);
3208 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3209 TAINT_FIRMWARE_WORKAROUND,
3210 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3211 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3212}
3213DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3214
3215static void __init init_no_remapping_devices(void)
3216{
3217 struct dmar_drhd_unit *drhd;
3218
3219 for_each_drhd_unit(drhd) {
3220 if (!drhd->include_all) {
3221 int i;
3222 for (i = 0; i < drhd->devices_cnt; i++)
3223 if (drhd->devices[i] != NULL)
3224 break;
3225 /* ignore DMAR unit if no pci devices exist */
3226 if (i == drhd->devices_cnt)
3227 drhd->ignored = 1;
3228 }
3229 }
3230
3231 for_each_drhd_unit(drhd) {
3232 int i;
3233 if (drhd->ignored || drhd->include_all)
3234 continue;
3235
3236 for (i = 0; i < drhd->devices_cnt; i++)
3237 if (drhd->devices[i] &&
3238 !IS_GFX_DEVICE(drhd->devices[i]))
3239 break;
3240
3241 if (i < drhd->devices_cnt)
3242 continue;
3243
3244 /* This IOMMU has *only* gfx devices. Either bypass it or
3245 set the gfx_mapped flag, as appropriate */
3246 if (dmar_map_gfx) {
3247 intel_iommu_gfx_mapped = 1;
3248 } else {
3249 drhd->ignored = 1;
3250 for (i = 0; i < drhd->devices_cnt; i++) {
3251 if (!drhd->devices[i])
3252 continue;
3253 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3254 }
3255 }
3256 }
3257}
3258
3259#ifdef CONFIG_SUSPEND
3260static int init_iommu_hw(void)
3261{
3262 struct dmar_drhd_unit *drhd;
3263 struct intel_iommu *iommu = NULL;
3264
3265 for_each_active_iommu(iommu, drhd)
3266 if (iommu->qi)
3267 dmar_reenable_qi(iommu);
3268
3269 for_each_iommu(iommu, drhd) {
3270 if (drhd->ignored) {
3271 /*
3272 * we always have to disable PMRs or DMA may fail on
3273 * this device
3274 */
3275 if (force_on)
3276 iommu_disable_protect_mem_regions(iommu);
3277 continue;
3278 }
3279
3280 iommu_flush_write_buffer(iommu);
3281
3282 iommu_set_root_entry(iommu);
3283
3284 iommu->flush.flush_context(iommu, 0, 0, 0,
3285 DMA_CCMD_GLOBAL_INVL);
3286 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3287 DMA_TLB_GLOBAL_FLUSH);
3288 if (iommu_enable_translation(iommu))
3289 return 1;
3290 iommu_disable_protect_mem_regions(iommu);
3291 }
3292
3293 return 0;
3294}
3295
3296static void iommu_flush_all(void)
3297{
3298 struct dmar_drhd_unit *drhd;
3299 struct intel_iommu *iommu;
3300
3301 for_each_active_iommu(iommu, drhd) {
3302 iommu->flush.flush_context(iommu, 0, 0, 0,
3303 DMA_CCMD_GLOBAL_INVL);
3304 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3305 DMA_TLB_GLOBAL_FLUSH);
3306 }
3307}
3308
3309static int iommu_suspend(void)
3310{
3311 struct dmar_drhd_unit *drhd;
3312 struct intel_iommu *iommu = NULL;
3313 unsigned long flag;
3314
3315 for_each_active_iommu(iommu, drhd) {
3316 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3317 GFP_ATOMIC);
3318 if (!iommu->iommu_state)
3319 goto nomem;
3320 }
3321
3322 iommu_flush_all();
3323
3324 for_each_active_iommu(iommu, drhd) {
3325 iommu_disable_translation(iommu);
3326
3327 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3328
3329 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3330 readl(iommu->reg + DMAR_FECTL_REG);
3331 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3332 readl(iommu->reg + DMAR_FEDATA_REG);
3333 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3334 readl(iommu->reg + DMAR_FEADDR_REG);
3335 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3336 readl(iommu->reg + DMAR_FEUADDR_REG);
3337
3338 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3339 }
3340 return 0;
3341
3342nomem:
3343 for_each_active_iommu(iommu, drhd)
3344 kfree(iommu->iommu_state);
3345
3346 return -ENOMEM;
3347}
3348
3349static void iommu_resume(void)
3350{
3351 struct dmar_drhd_unit *drhd;
3352 struct intel_iommu *iommu = NULL;
3353 unsigned long flag;
3354
3355 if (init_iommu_hw()) {
3356 if (force_on)
3357 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3358 else
3359 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3360 return;
3361 }
3362
3363 for_each_active_iommu(iommu, drhd) {
3364
3365 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3366
3367 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3368 iommu->reg + DMAR_FECTL_REG);
3369 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3370 iommu->reg + DMAR_FEDATA_REG);
3371 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3372 iommu->reg + DMAR_FEADDR_REG);
3373 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3374 iommu->reg + DMAR_FEUADDR_REG);
3375
3376 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3377 }
3378
3379 for_each_active_iommu(iommu, drhd)
3380 kfree(iommu->iommu_state);
3381}
3382
3383static struct syscore_ops iommu_syscore_ops = {
3384 .resume = iommu_resume,
3385 .suspend = iommu_suspend,
3386};
3387
3388static void __init init_iommu_pm_ops(void)
3389{
3390 register_syscore_ops(&iommu_syscore_ops);
3391}
3392
3393#else
3394static inline void init_iommu_pm_ops(void) {}
3395#endif /* CONFIG_PM */
3396
3397LIST_HEAD(dmar_rmrr_units);
3398
3399static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3400{
3401 list_add(&rmrr->list, &dmar_rmrr_units);
3402}
3403
3404
3405int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3406{
3407 struct acpi_dmar_reserved_memory *rmrr;
3408 struct dmar_rmrr_unit *rmrru;
3409
3410 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3411 if (!rmrru)
3412 return -ENOMEM;
3413
3414 rmrru->hdr = header;
3415 rmrr = (struct acpi_dmar_reserved_memory *)header;
3416 rmrru->base_address = rmrr->base_address;
3417 rmrru->end_address = rmrr->end_address;
3418
3419 dmar_register_rmrr_unit(rmrru);
3420 return 0;
3421}
3422
3423static int __init
3424rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3425{
3426 struct acpi_dmar_reserved_memory *rmrr;
3427 int ret;
3428
3429 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3430 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3431 ((void *)rmrr) + rmrr->header.length,
3432 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3433
3434 if (ret || (rmrru->devices_cnt == 0)) {
3435 list_del(&rmrru->list);
3436 kfree(rmrru);
3437 }
3438 return ret;
3439}
3440
3441static LIST_HEAD(dmar_atsr_units);
3442
3443int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3444{
3445 struct acpi_dmar_atsr *atsr;
3446 struct dmar_atsr_unit *atsru;
3447
3448 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3449 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3450 if (!atsru)
3451 return -ENOMEM;
3452
3453 atsru->hdr = hdr;
3454 atsru->include_all = atsr->flags & 0x1;
3455
3456 list_add(&atsru->list, &dmar_atsr_units);
3457
3458 return 0;
3459}
3460
3461static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3462{
3463 int rc;
3464 struct acpi_dmar_atsr *atsr;
3465
3466 if (atsru->include_all)
3467 return 0;
3468
3469 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3470 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3471 (void *)atsr + atsr->header.length,
3472 &atsru->devices_cnt, &atsru->devices,
3473 atsr->segment);
3474 if (rc || !atsru->devices_cnt) {
3475 list_del(&atsru->list);
3476 kfree(atsru);
3477 }
3478
3479 return rc;
3480}
3481
3482int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3483{
3484 int i;
3485 struct pci_bus *bus;
3486 struct acpi_dmar_atsr *atsr;
3487 struct dmar_atsr_unit *atsru;
3488
3489 dev = pci_physfn(dev);
3490
3491 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3492 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3493 if (atsr->segment == pci_domain_nr(dev->bus))
3494 goto found;
3495 }
3496
3497 return 0;
3498
3499found:
3500 for (bus = dev->bus; bus; bus = bus->parent) {
3501 struct pci_dev *bridge = bus->self;
3502
3503 if (!bridge || !pci_is_pcie(bridge) ||
3504 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
3505 return 0;
3506
3507 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
3508 for (i = 0; i < atsru->devices_cnt; i++)
3509 if (atsru->devices[i] == bridge)
3510 return 1;
3511 break;
3512 }
3513 }
3514
3515 if (atsru->include_all)
3516 return 1;
3517
3518 return 0;
3519}
3520
3521int dmar_parse_rmrr_atsr_dev(void)
3522{
3523 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3524 struct dmar_atsr_unit *atsr, *atsr_n;
3525 int ret = 0;
3526
3527 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3528 ret = rmrr_parse_dev(rmrr);
3529 if (ret)
3530 return ret;
3531 }
3532
3533 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3534 ret = atsr_parse_dev(atsr);
3535 if (ret)
3536 return ret;
3537 }
3538
3539 return ret;
3540}
3541
3542/*
3543 * Here we only respond to action of unbound device from driver.
3544 *
3545 * Added device is not attached to its DMAR domain here yet. That will happen
3546 * when mapping the device to iova.
3547 */
3548static int device_notifier(struct notifier_block *nb,
3549 unsigned long action, void *data)
3550{
3551 struct device *dev = data;
3552 struct pci_dev *pdev = to_pci_dev(dev);
3553 struct dmar_domain *domain;
3554
3555 if (iommu_no_mapping(dev))
3556 return 0;
3557
3558 domain = find_domain(pdev);
3559 if (!domain)
3560 return 0;
3561
3562 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3563 domain_remove_one_dev_info(domain, pdev);
3564
3565 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3566 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3567 list_empty(&domain->devices))
3568 domain_exit(domain);
3569 }
3570
3571 return 0;
3572}
3573
3574static struct notifier_block device_nb = {
3575 .notifier_call = device_notifier,
3576};
3577
3578int __init intel_iommu_init(void)
3579{
3580 int ret = 0;
3581
3582 /* VT-d is required for a TXT/tboot launch, so enforce that */
3583 force_on = tboot_force_iommu();
3584
3585 if (dmar_table_init()) {
3586 if (force_on)
3587 panic("tboot: Failed to initialize DMAR table\n");
3588 return -ENODEV;
3589 }
3590
3591 if (dmar_dev_scope_init() < 0) {
3592 if (force_on)
3593 panic("tboot: Failed to initialize DMAR device scope\n");
3594 return -ENODEV;
3595 }
3596
3597 if (no_iommu || dmar_disabled)
3598 return -ENODEV;
3599
3600 if (iommu_init_mempool()) {
3601 if (force_on)
3602 panic("tboot: Failed to initialize iommu memory\n");
3603 return -ENODEV;
3604 }
3605
3606 if (list_empty(&dmar_rmrr_units))
3607 printk(KERN_INFO "DMAR: No RMRR found\n");
3608
3609 if (list_empty(&dmar_atsr_units))
3610 printk(KERN_INFO "DMAR: No ATSR found\n");
3611
3612 if (dmar_init_reserved_ranges()) {
3613 if (force_on)
3614 panic("tboot: Failed to reserve iommu ranges\n");
3615 return -ENODEV;
3616 }
3617
3618 init_no_remapping_devices();
3619
3620 ret = init_dmars();
3621 if (ret) {
3622 if (force_on)
3623 panic("tboot: Failed to initialize DMARs\n");
3624 printk(KERN_ERR "IOMMU: dmar init failed\n");
3625 put_iova_domain(&reserved_iova_list);
3626 iommu_exit_mempool();
3627 return ret;
3628 }
3629 printk(KERN_INFO
3630 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3631
3632 init_timer(&unmap_timer);
3633#ifdef CONFIG_SWIOTLB
3634 swiotlb = 0;
3635#endif
3636 dma_ops = &intel_dma_ops;
3637
3638 init_iommu_pm_ops();
3639
3640 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3641
3642 bus_register_notifier(&pci_bus_type, &device_nb);
3643
3644 return 0;
3645}
3646
3647static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3648 struct pci_dev *pdev)
3649{
3650 struct pci_dev *tmp, *parent;
3651
3652 if (!iommu || !pdev)
3653 return;
3654
3655 /* dependent device detach */
3656 tmp = pci_find_upstream_pcie_bridge(pdev);
3657 /* Secondary interface's bus number and devfn 0 */
3658 if (tmp) {
3659 parent = pdev->bus->self;
3660 while (parent != tmp) {
3661 iommu_detach_dev(iommu, parent->bus->number,
3662 parent->devfn);
3663 parent = parent->bus->self;
3664 }
3665 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3666 iommu_detach_dev(iommu,
3667 tmp->subordinate->number, 0);
3668 else /* this is a legacy PCI bridge */
3669 iommu_detach_dev(iommu, tmp->bus->number,
3670 tmp->devfn);
3671 }
3672}
3673
3674static void domain_remove_one_dev_info(struct dmar_domain *domain,
3675 struct pci_dev *pdev)
3676{
3677 struct device_domain_info *info;
3678 struct intel_iommu *iommu;
3679 unsigned long flags;
3680 int found = 0;
3681 struct list_head *entry, *tmp;
3682
3683 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3684 pdev->devfn);
3685 if (!iommu)
3686 return;
3687
3688 spin_lock_irqsave(&device_domain_lock, flags);
3689 list_for_each_safe(entry, tmp, &domain->devices) {
3690 info = list_entry(entry, struct device_domain_info, link);
3691 if (info->segment == pci_domain_nr(pdev->bus) &&
3692 info->bus == pdev->bus->number &&
3693 info->devfn == pdev->devfn) {
3694 list_del(&info->link);
3695 list_del(&info->global);
3696 if (info->dev)
3697 info->dev->dev.archdata.iommu = NULL;
3698 spin_unlock_irqrestore(&device_domain_lock, flags);
3699
3700 iommu_disable_dev_iotlb(info);
3701 iommu_detach_dev(iommu, info->bus, info->devfn);
3702 iommu_detach_dependent_devices(iommu, pdev);
3703 free_devinfo_mem(info);
3704
3705 spin_lock_irqsave(&device_domain_lock, flags);
3706
3707 if (found)
3708 break;
3709 else
3710 continue;
3711 }
3712
3713 /* if there is no other devices under the same iommu
3714 * owned by this domain, clear this iommu in iommu_bmp
3715 * update iommu count and coherency
3716 */
3717 if (iommu == device_to_iommu(info->segment, info->bus,
3718 info->devfn))
3719 found = 1;
3720 }
3721
3722 spin_unlock_irqrestore(&device_domain_lock, flags);
3723
3724 if (found == 0) {
3725 unsigned long tmp_flags;
3726 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3727 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3728 domain->iommu_count--;
3729 domain_update_iommu_cap(domain);
3730 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3731
3732 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3733 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3734 spin_lock_irqsave(&iommu->lock, tmp_flags);
3735 clear_bit(domain->id, iommu->domain_ids);
3736 iommu->domains[domain->id] = NULL;
3737 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3738 }
3739 }
3740}
3741
3742static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3743{
3744 struct device_domain_info *info;
3745 struct intel_iommu *iommu;
3746 unsigned long flags1, flags2;
3747
3748 spin_lock_irqsave(&device_domain_lock, flags1);
3749 while (!list_empty(&domain->devices)) {
3750 info = list_entry(domain->devices.next,
3751 struct device_domain_info, link);
3752 list_del(&info->link);
3753 list_del(&info->global);
3754 if (info->dev)
3755 info->dev->dev.archdata.iommu = NULL;
3756
3757 spin_unlock_irqrestore(&device_domain_lock, flags1);
3758
3759 iommu_disable_dev_iotlb(info);
3760 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3761 iommu_detach_dev(iommu, info->bus, info->devfn);
3762 iommu_detach_dependent_devices(iommu, info->dev);
3763
3764 /* clear this iommu in iommu_bmp, update iommu count
3765 * and capabilities
3766 */
3767 spin_lock_irqsave(&domain->iommu_lock, flags2);
3768 if (test_and_clear_bit(iommu->seq_id,
3769 &domain->iommu_bmp)) {
3770 domain->iommu_count--;
3771 domain_update_iommu_cap(domain);
3772 }
3773 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3774
3775 free_devinfo_mem(info);
3776 spin_lock_irqsave(&device_domain_lock, flags1);
3777 }
3778 spin_unlock_irqrestore(&device_domain_lock, flags1);
3779}
3780
3781/* domain id for virtual machine, it won't be set in context */
3782static unsigned long vm_domid;
3783
3784static struct dmar_domain *iommu_alloc_vm_domain(void)
3785{
3786 struct dmar_domain *domain;
3787
3788 domain = alloc_domain_mem();
3789 if (!domain)
3790 return NULL;
3791
3792 domain->id = vm_domid++;
3793 domain->nid = -1;
3794 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3795 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3796
3797 return domain;
3798}
3799
3800static int md_domain_init(struct dmar_domain *domain, int guest_width)
3801{
3802 int adjust_width;
3803
3804 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3805 spin_lock_init(&domain->iommu_lock);
3806
3807 domain_reserve_special_ranges(domain);
3808
3809 /* calculate AGAW */
3810 domain->gaw = guest_width;
3811 adjust_width = guestwidth_to_adjustwidth(guest_width);
3812 domain->agaw = width_to_agaw(adjust_width);
3813
3814 INIT_LIST_HEAD(&domain->devices);
3815
3816 domain->iommu_count = 0;
3817 domain->iommu_coherency = 0;
3818 domain->iommu_snooping = 0;
3819 domain->iommu_superpage = 0;
3820 domain->max_addr = 0;
3821 domain->nid = -1;
3822
3823 /* always allocate the top pgd */
3824 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3825 if (!domain->pgd)
3826 return -ENOMEM;
3827 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3828 return 0;
3829}
3830
3831static void iommu_free_vm_domain(struct dmar_domain *domain)
3832{
3833 unsigned long flags;
3834 struct dmar_drhd_unit *drhd;
3835 struct intel_iommu *iommu;
3836 unsigned long i;
3837 unsigned long ndomains;
3838
3839 for_each_drhd_unit(drhd) {
3840 if (drhd->ignored)
3841 continue;
3842 iommu = drhd->iommu;
3843
3844 ndomains = cap_ndoms(iommu->cap);
3845 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3846 if (iommu->domains[i] == domain) {
3847 spin_lock_irqsave(&iommu->lock, flags);
3848 clear_bit(i, iommu->domain_ids);
3849 iommu->domains[i] = NULL;
3850 spin_unlock_irqrestore(&iommu->lock, flags);
3851 break;
3852 }
3853 }
3854 }
3855}
3856
3857static void vm_domain_exit(struct dmar_domain *domain)
3858{
3859 /* Domain 0 is reserved, so dont process it */
3860 if (!domain)
3861 return;
3862
3863 vm_domain_remove_all_dev_info(domain);
3864 /* destroy iovas */
3865 put_iova_domain(&domain->iovad);
3866
3867 /* clear ptes */
3868 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3869
3870 /* free page tables */
3871 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3872
3873 iommu_free_vm_domain(domain);
3874 free_domain_mem(domain);
3875}
3876
3877static int intel_iommu_domain_init(struct iommu_domain *domain)
3878{
3879 struct dmar_domain *dmar_domain;
3880
3881 dmar_domain = iommu_alloc_vm_domain();
3882 if (!dmar_domain) {
3883 printk(KERN_ERR
3884 "intel_iommu_domain_init: dmar_domain == NULL\n");
3885 return -ENOMEM;
3886 }
3887 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3888 printk(KERN_ERR
3889 "intel_iommu_domain_init() failed\n");
3890 vm_domain_exit(dmar_domain);
3891 return -ENOMEM;
3892 }
3893 domain_update_iommu_cap(dmar_domain);
3894 domain->priv = dmar_domain;
3895
3896 return 0;
3897}
3898
3899static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3900{
3901 struct dmar_domain *dmar_domain = domain->priv;
3902
3903 domain->priv = NULL;
3904 vm_domain_exit(dmar_domain);
3905}
3906
3907static int intel_iommu_attach_device(struct iommu_domain *domain,
3908 struct device *dev)
3909{
3910 struct dmar_domain *dmar_domain = domain->priv;
3911 struct pci_dev *pdev = to_pci_dev(dev);
3912 struct intel_iommu *iommu;
3913 int addr_width;
3914
3915 /* normally pdev is not mapped */
3916 if (unlikely(domain_context_mapped(pdev))) {
3917 struct dmar_domain *old_domain;
3918
3919 old_domain = find_domain(pdev);
3920 if (old_domain) {
3921 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3922 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3923 domain_remove_one_dev_info(old_domain, pdev);
3924 else
3925 domain_remove_dev_info(old_domain);
3926 }
3927 }
3928
3929 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3930 pdev->devfn);
3931 if (!iommu)
3932 return -ENODEV;
3933
3934 /* check if this iommu agaw is sufficient for max mapped address */
3935 addr_width = agaw_to_width(iommu->agaw);
3936 if (addr_width > cap_mgaw(iommu->cap))
3937 addr_width = cap_mgaw(iommu->cap);
3938
3939 if (dmar_domain->max_addr > (1LL << addr_width)) {
3940 printk(KERN_ERR "%s: iommu width (%d) is not "
3941 "sufficient for the mapped address (%llx)\n",
3942 __func__, addr_width, dmar_domain->max_addr);
3943 return -EFAULT;
3944 }
3945 dmar_domain->gaw = addr_width;
3946
3947 /*
3948 * Knock out extra levels of page tables if necessary
3949 */
3950 while (iommu->agaw < dmar_domain->agaw) {
3951 struct dma_pte *pte;
3952
3953 pte = dmar_domain->pgd;
3954 if (dma_pte_present(pte)) {
3955 dmar_domain->pgd = (struct dma_pte *)
3956 phys_to_virt(dma_pte_addr(pte));
3957 free_pgtable_page(pte);
3958 }
3959 dmar_domain->agaw--;
3960 }
3961
3962 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3963}
3964
3965static void intel_iommu_detach_device(struct iommu_domain *domain,
3966 struct device *dev)
3967{
3968 struct dmar_domain *dmar_domain = domain->priv;
3969 struct pci_dev *pdev = to_pci_dev(dev);
3970
3971 domain_remove_one_dev_info(dmar_domain, pdev);
3972}
3973
3974static int intel_iommu_map(struct iommu_domain *domain,
3975 unsigned long iova, phys_addr_t hpa,
3976 int gfp_order, int iommu_prot)
3977{
3978 struct dmar_domain *dmar_domain = domain->priv;
3979 u64 max_addr;
3980 int prot = 0;
3981 size_t size;
3982 int ret;
3983
3984 if (iommu_prot & IOMMU_READ)
3985 prot |= DMA_PTE_READ;
3986 if (iommu_prot & IOMMU_WRITE)
3987 prot |= DMA_PTE_WRITE;
3988 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3989 prot |= DMA_PTE_SNP;
3990
3991 size = PAGE_SIZE << gfp_order;
3992 max_addr = iova + size;
3993 if (dmar_domain->max_addr < max_addr) {
3994 u64 end;
3995
3996 /* check if minimum agaw is sufficient for mapped address */
3997 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3998 if (end < max_addr) {
3999 printk(KERN_ERR "%s: iommu width (%d) is not "
4000 "sufficient for the mapped address (%llx)\n",
4001 __func__, dmar_domain->gaw, max_addr);
4002 return -EFAULT;
4003 }
4004 dmar_domain->max_addr = max_addr;
4005 }
4006 /* Round up size to next multiple of PAGE_SIZE, if it and
4007 the low bits of hpa would take us onto the next page */
4008 size = aligned_nrpages(hpa, size);
4009 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4010 hpa >> VTD_PAGE_SHIFT, size, prot);
4011 return ret;
4012}
4013
4014static int intel_iommu_unmap(struct iommu_domain *domain,
4015 unsigned long iova, int gfp_order)
4016{
4017 struct dmar_domain *dmar_domain = domain->priv;
4018 size_t size = PAGE_SIZE << gfp_order;
4019 int order;
4020
4021 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4022 (iova + size - 1) >> VTD_PAGE_SHIFT);
4023
4024 if (dmar_domain->max_addr == iova + size)
4025 dmar_domain->max_addr = iova;
4026
4027 return order;
4028}
4029
4030static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4031 unsigned long iova)
4032{
4033 struct dmar_domain *dmar_domain = domain->priv;
4034 struct dma_pte *pte;
4035 u64 phys = 0;
4036
4037 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4038 if (pte)
4039 phys = dma_pte_addr(pte);
4040
4041 return phys;
4042}
4043
4044static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4045 unsigned long cap)
4046{
4047 struct dmar_domain *dmar_domain = domain->priv;
4048
4049 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4050 return dmar_domain->iommu_snooping;
4051 if (cap == IOMMU_CAP_INTR_REMAP)
4052 return intr_remapping_enabled;
4053
4054 return 0;
4055}
4056
4057static struct iommu_ops intel_iommu_ops = {
4058 .domain_init = intel_iommu_domain_init,
4059 .domain_destroy = intel_iommu_domain_destroy,
4060 .attach_dev = intel_iommu_attach_device,
4061 .detach_dev = intel_iommu_detach_device,
4062 .map = intel_iommu_map,
4063 .unmap = intel_iommu_unmap,
4064 .iova_to_phys = intel_iommu_iova_to_phys,
4065 .domain_has_cap = intel_iommu_domain_has_cap,
4066};
4067
4068static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4069{
4070 /*
4071 * Mobile 4 Series Chipset neglects to set RWBF capability,
4072 * but needs it:
4073 */
4074 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4075 rwbf_quirk = 1;
4076
4077 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
4078 if (dev->revision == 0x07) {
4079 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4080 dmar_map_gfx = 0;
4081 }
4082}
4083
4084DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4085
4086#define GGC 0x52
4087#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4088#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4089#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4090#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4091#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4092#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4093#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4094#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4095
4096static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4097{
4098 unsigned short ggc;
4099
4100 if (pci_read_config_word(dev, GGC, &ggc))
4101 return;
4102
4103 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4104 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4105 dmar_map_gfx = 0;
4106 } else if (dmar_map_gfx) {
4107 /* we have to ensure the gfx device is idle before we flush */
4108 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4109 intel_iommu_strict = 1;
4110 }
4111}
4112DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4113DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4114DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4115DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4116
4117/* On Tylersburg chipsets, some BIOSes have been known to enable the
4118 ISOCH DMAR unit for the Azalia sound device, but not give it any
4119 TLB entries, which causes it to deadlock. Check for that. We do
4120 this in a function called from init_dmars(), instead of in a PCI
4121 quirk, because we don't want to print the obnoxious "BIOS broken"
4122 message if VT-d is actually disabled.
4123*/
4124static void __init check_tylersburg_isoch(void)
4125{
4126 struct pci_dev *pdev;
4127 uint32_t vtisochctrl;
4128
4129 /* If there's no Azalia in the system anyway, forget it. */
4130 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4131 if (!pdev)
4132 return;
4133 pci_dev_put(pdev);
4134
4135 /* System Management Registers. Might be hidden, in which case
4136 we can't do the sanity check. But that's OK, because the
4137 known-broken BIOSes _don't_ actually hide it, so far. */
4138 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4139 if (!pdev)
4140 return;
4141
4142 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4143 pci_dev_put(pdev);
4144 return;
4145 }
4146
4147 pci_dev_put(pdev);
4148
4149 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4150 if (vtisochctrl & 1)
4151 return;
4152
4153 /* Drop all bits other than the number of TLB entries */
4154 vtisochctrl &= 0x1c;
4155
4156 /* If we have the recommended number of TLB entries (16), fine. */
4157 if (vtisochctrl == 0x10)
4158 return;
4159
4160 /* Zero TLB entries? You get to ride the short bus to school. */
4161 if (!vtisochctrl) {
4162 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4163 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4164 dmi_get_system_info(DMI_BIOS_VENDOR),
4165 dmi_get_system_info(DMI_BIOS_VERSION),
4166 dmi_get_system_info(DMI_PRODUCT_VERSION));
4167 iommu_identity_mapping |= IDENTMAP_AZALIA;
4168 return;
4169 }
4170
4171 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4172 vtisochctrl);
4173}
diff --git a/drivers/iommu/intr_remapping.c b/drivers/iommu/intr_remapping.c
new file mode 100644
index 000000000000..07c9f189f314
--- /dev/null
+++ b/drivers/iommu/intr_remapping.c
@@ -0,0 +1,834 @@
1#include <linux/interrupt.h>
2#include <linux/dmar.h>
3#include <linux/spinlock.h>
4#include <linux/slab.h>
5#include <linux/jiffies.h>
6#include <linux/hpet.h>
7#include <linux/pci.h>
8#include <linux/irq.h>
9#include <asm/io_apic.h>
10#include <asm/smp.h>
11#include <asm/cpu.h>
12#include <linux/intel-iommu.h>
13#include "intr_remapping.h"
14#include <acpi/acpi.h>
15#include <asm/pci-direct.h>
16
17static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
18static struct hpet_scope ir_hpet[MAX_HPET_TBS];
19static int ir_ioapic_num, ir_hpet_num;
20int intr_remapping_enabled;
21
22static int disable_intremap;
23static int disable_sourceid_checking;
24static int no_x2apic_optout;
25
26static __init int setup_nointremap(char *str)
27{
28 disable_intremap = 1;
29 return 0;
30}
31early_param("nointremap", setup_nointremap);
32
33static __init int setup_intremap(char *str)
34{
35 if (!str)
36 return -EINVAL;
37
38 while (*str) {
39 if (!strncmp(str, "on", 2))
40 disable_intremap = 0;
41 else if (!strncmp(str, "off", 3))
42 disable_intremap = 1;
43 else if (!strncmp(str, "nosid", 5))
44 disable_sourceid_checking = 1;
45 else if (!strncmp(str, "no_x2apic_optout", 16))
46 no_x2apic_optout = 1;
47
48 str += strcspn(str, ",");
49 while (*str == ',')
50 str++;
51 }
52
53 return 0;
54}
55early_param("intremap", setup_intremap);
56
57static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
58
59static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
60{
61 struct irq_cfg *cfg = irq_get_chip_data(irq);
62 return cfg ? &cfg->irq_2_iommu : NULL;
63}
64
65int get_irte(int irq, struct irte *entry)
66{
67 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
68 unsigned long flags;
69 int index;
70
71 if (!entry || !irq_iommu)
72 return -1;
73
74 raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
75
76 index = irq_iommu->irte_index + irq_iommu->sub_handle;
77 *entry = *(irq_iommu->iommu->ir_table->base + index);
78
79 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
80 return 0;
81}
82
83int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
84{
85 struct ir_table *table = iommu->ir_table;
86 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
87 u16 index, start_index;
88 unsigned int mask = 0;
89 unsigned long flags;
90 int i;
91
92 if (!count || !irq_iommu)
93 return -1;
94
95 /*
96 * start the IRTE search from index 0.
97 */
98 index = start_index = 0;
99
100 if (count > 1) {
101 count = __roundup_pow_of_two(count);
102 mask = ilog2(count);
103 }
104
105 if (mask > ecap_max_handle_mask(iommu->ecap)) {
106 printk(KERN_ERR
107 "Requested mask %x exceeds the max invalidation handle"
108 " mask value %Lx\n", mask,
109 ecap_max_handle_mask(iommu->ecap));
110 return -1;
111 }
112
113 raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
114 do {
115 for (i = index; i < index + count; i++)
116 if (table->base[i].present)
117 break;
118 /* empty index found */
119 if (i == index + count)
120 break;
121
122 index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
123
124 if (index == start_index) {
125 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
126 printk(KERN_ERR "can't allocate an IRTE\n");
127 return -1;
128 }
129 } while (1);
130
131 for (i = index; i < index + count; i++)
132 table->base[i].present = 1;
133
134 irq_iommu->iommu = iommu;
135 irq_iommu->irte_index = index;
136 irq_iommu->sub_handle = 0;
137 irq_iommu->irte_mask = mask;
138
139 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
140
141 return index;
142}
143
144static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
145{
146 struct qi_desc desc;
147
148 desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
149 | QI_IEC_SELECTIVE;
150 desc.high = 0;
151
152 return qi_submit_sync(&desc, iommu);
153}
154
155int map_irq_to_irte_handle(int irq, u16 *sub_handle)
156{
157 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
158 unsigned long flags;
159 int index;
160
161 if (!irq_iommu)
162 return -1;
163
164 raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
165 *sub_handle = irq_iommu->sub_handle;
166 index = irq_iommu->irte_index;
167 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
168 return index;
169}
170
171int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
172{
173 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
174 unsigned long flags;
175
176 if (!irq_iommu)
177 return -1;
178
179 raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
180
181 irq_iommu->iommu = iommu;
182 irq_iommu->irte_index = index;
183 irq_iommu->sub_handle = subhandle;
184 irq_iommu->irte_mask = 0;
185
186 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
187
188 return 0;
189}
190
191int modify_irte(int irq, struct irte *irte_modified)
192{
193 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
194 struct intel_iommu *iommu;
195 unsigned long flags;
196 struct irte *irte;
197 int rc, index;
198
199 if (!irq_iommu)
200 return -1;
201
202 raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
203
204 iommu = irq_iommu->iommu;
205
206 index = irq_iommu->irte_index + irq_iommu->sub_handle;
207 irte = &iommu->ir_table->base[index];
208
209 set_64bit(&irte->low, irte_modified->low);
210 set_64bit(&irte->high, irte_modified->high);
211 __iommu_flush_cache(iommu, irte, sizeof(*irte));
212
213 rc = qi_flush_iec(iommu, index, 0);
214 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
215
216 return rc;
217}
218
219struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
220{
221 int i;
222
223 for (i = 0; i < MAX_HPET_TBS; i++)
224 if (ir_hpet[i].id == hpet_id)
225 return ir_hpet[i].iommu;
226 return NULL;
227}
228
229struct intel_iommu *map_ioapic_to_ir(int apic)
230{
231 int i;
232
233 for (i = 0; i < MAX_IO_APICS; i++)
234 if (ir_ioapic[i].id == apic)
235 return ir_ioapic[i].iommu;
236 return NULL;
237}
238
239struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
240{
241 struct dmar_drhd_unit *drhd;
242
243 drhd = dmar_find_matched_drhd_unit(dev);
244 if (!drhd)
245 return NULL;
246
247 return drhd->iommu;
248}
249
250static int clear_entries(struct irq_2_iommu *irq_iommu)
251{
252 struct irte *start, *entry, *end;
253 struct intel_iommu *iommu;
254 int index;
255
256 if (irq_iommu->sub_handle)
257 return 0;
258
259 iommu = irq_iommu->iommu;
260 index = irq_iommu->irte_index + irq_iommu->sub_handle;
261
262 start = iommu->ir_table->base + index;
263 end = start + (1 << irq_iommu->irte_mask);
264
265 for (entry = start; entry < end; entry++) {
266 set_64bit(&entry->low, 0);
267 set_64bit(&entry->high, 0);
268 }
269
270 return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
271}
272
273int free_irte(int irq)
274{
275 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
276 unsigned long flags;
277 int rc;
278
279 if (!irq_iommu)
280 return -1;
281
282 raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
283
284 rc = clear_entries(irq_iommu);
285
286 irq_iommu->iommu = NULL;
287 irq_iommu->irte_index = 0;
288 irq_iommu->sub_handle = 0;
289 irq_iommu->irte_mask = 0;
290
291 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
292
293 return rc;
294}
295
296/*
297 * source validation type
298 */
299#define SVT_NO_VERIFY 0x0 /* no verification is required */
300#define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fields */
301#define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */
302
303/*
304 * source-id qualifier
305 */
306#define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */
307#define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore
308 * the third least significant bit
309 */
310#define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore
311 * the second and third least significant bits
312 */
313#define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore
314 * the least three significant bits
315 */
316
317/*
318 * set SVT, SQ and SID fields of irte to verify
319 * source ids of interrupt requests
320 */
321static void set_irte_sid(struct irte *irte, unsigned int svt,
322 unsigned int sq, unsigned int sid)
323{
324 if (disable_sourceid_checking)
325 svt = SVT_NO_VERIFY;
326 irte->svt = svt;
327 irte->sq = sq;
328 irte->sid = sid;
329}
330
331int set_ioapic_sid(struct irte *irte, int apic)
332{
333 int i;
334 u16 sid = 0;
335
336 if (!irte)
337 return -1;
338
339 for (i = 0; i < MAX_IO_APICS; i++) {
340 if (ir_ioapic[i].id == apic) {
341 sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
342 break;
343 }
344 }
345
346 if (sid == 0) {
347 pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
348 return -1;
349 }
350
351 set_irte_sid(irte, 1, 0, sid);
352
353 return 0;
354}
355
356int set_hpet_sid(struct irte *irte, u8 id)
357{
358 int i;
359 u16 sid = 0;
360
361 if (!irte)
362 return -1;
363
364 for (i = 0; i < MAX_HPET_TBS; i++) {
365 if (ir_hpet[i].id == id) {
366 sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
367 break;
368 }
369 }
370
371 if (sid == 0) {
372 pr_warning("Failed to set source-id of HPET block (%d)\n", id);
373 return -1;
374 }
375
376 /*
377 * Should really use SQ_ALL_16. Some platforms are broken.
378 * While we figure out the right quirks for these broken platforms, use
379 * SQ_13_IGNORE_3 for now.
380 */
381 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid);
382
383 return 0;
384}
385
386int set_msi_sid(struct irte *irte, struct pci_dev *dev)
387{
388 struct pci_dev *bridge;
389
390 if (!irte || !dev)
391 return -1;
392
393 /* PCIe device or Root Complex integrated PCI device */
394 if (pci_is_pcie(dev) || !dev->bus->parent) {
395 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
396 (dev->bus->number << 8) | dev->devfn);
397 return 0;
398 }
399
400 bridge = pci_find_upstream_pcie_bridge(dev);
401 if (bridge) {
402 if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */
403 set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
404 (bridge->bus->number << 8) | dev->bus->number);
405 else /* this is a legacy PCI bridge */
406 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
407 (bridge->bus->number << 8) | bridge->devfn);
408 }
409
410 return 0;
411}
412
413static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
414{
415 u64 addr;
416 u32 sts;
417 unsigned long flags;
418
419 addr = virt_to_phys((void *)iommu->ir_table->base);
420
421 raw_spin_lock_irqsave(&iommu->register_lock, flags);
422
423 dmar_writeq(iommu->reg + DMAR_IRTA_REG,
424 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
425
426 /* Set interrupt-remapping table pointer */
427 iommu->gcmd |= DMA_GCMD_SIRTP;
428 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
429
430 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
431 readl, (sts & DMA_GSTS_IRTPS), sts);
432 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
433
434 /*
435 * global invalidation of interrupt entry cache before enabling
436 * interrupt-remapping.
437 */
438 qi_global_iec(iommu);
439
440 raw_spin_lock_irqsave(&iommu->register_lock, flags);
441
442 /* Enable interrupt-remapping */
443 iommu->gcmd |= DMA_GCMD_IRE;
444 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
445
446 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
447 readl, (sts & DMA_GSTS_IRES), sts);
448
449 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
450}
451
452
453static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
454{
455 struct ir_table *ir_table;
456 struct page *pages;
457
458 ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
459 GFP_ATOMIC);
460
461 if (!iommu->ir_table)
462 return -ENOMEM;
463
464 pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
465 INTR_REMAP_PAGE_ORDER);
466
467 if (!pages) {
468 printk(KERN_ERR "failed to allocate pages of order %d\n",
469 INTR_REMAP_PAGE_ORDER);
470 kfree(iommu->ir_table);
471 return -ENOMEM;
472 }
473
474 ir_table->base = page_address(pages);
475
476 iommu_set_intr_remapping(iommu, mode);
477 return 0;
478}
479
480/*
481 * Disable Interrupt Remapping.
482 */
483static void iommu_disable_intr_remapping(struct intel_iommu *iommu)
484{
485 unsigned long flags;
486 u32 sts;
487
488 if (!ecap_ir_support(iommu->ecap))
489 return;
490
491 /*
492 * global invalidation of interrupt entry cache before disabling
493 * interrupt-remapping.
494 */
495 qi_global_iec(iommu);
496
497 raw_spin_lock_irqsave(&iommu->register_lock, flags);
498
499 sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
500 if (!(sts & DMA_GSTS_IRES))
501 goto end;
502
503 iommu->gcmd &= ~DMA_GCMD_IRE;
504 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
505
506 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
507 readl, !(sts & DMA_GSTS_IRES), sts);
508
509end:
510 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
511}
512
513static int __init dmar_x2apic_optout(void)
514{
515 struct acpi_table_dmar *dmar;
516 dmar = (struct acpi_table_dmar *)dmar_tbl;
517 if (!dmar || no_x2apic_optout)
518 return 0;
519 return dmar->flags & DMAR_X2APIC_OPT_OUT;
520}
521
522int __init intr_remapping_supported(void)
523{
524 struct dmar_drhd_unit *drhd;
525
526 if (disable_intremap)
527 return 0;
528
529 if (!dmar_ir_support())
530 return 0;
531
532 for_each_drhd_unit(drhd) {
533 struct intel_iommu *iommu = drhd->iommu;
534
535 if (!ecap_ir_support(iommu->ecap))
536 return 0;
537 }
538
539 return 1;
540}
541
542int __init enable_intr_remapping(void)
543{
544 struct dmar_drhd_unit *drhd;
545 int setup = 0;
546 int eim = 0;
547
548 if (parse_ioapics_under_ir() != 1) {
549 printk(KERN_INFO "Not enable interrupt remapping\n");
550 return -1;
551 }
552
553 if (x2apic_supported()) {
554 eim = !dmar_x2apic_optout();
555 WARN(!eim, KERN_WARNING
556 "Your BIOS is broken and requested that x2apic be disabled\n"
557 "This will leave your machine vulnerable to irq-injection attacks\n"
558 "Use 'intremap=no_x2apic_optout' to override BIOS request\n");
559 }
560
561 for_each_drhd_unit(drhd) {
562 struct intel_iommu *iommu = drhd->iommu;
563
564 /*
565 * If the queued invalidation is already initialized,
566 * shouldn't disable it.
567 */
568 if (iommu->qi)
569 continue;
570
571 /*
572 * Clear previous faults.
573 */
574 dmar_fault(-1, iommu);
575
576 /*
577 * Disable intr remapping and queued invalidation, if already
578 * enabled prior to OS handover.
579 */
580 iommu_disable_intr_remapping(iommu);
581
582 dmar_disable_qi(iommu);
583 }
584
585 /*
586 * check for the Interrupt-remapping support
587 */
588 for_each_drhd_unit(drhd) {
589 struct intel_iommu *iommu = drhd->iommu;
590
591 if (!ecap_ir_support(iommu->ecap))
592 continue;
593
594 if (eim && !ecap_eim_support(iommu->ecap)) {
595 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
596 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
597 return -1;
598 }
599 }
600
601 /*
602 * Enable queued invalidation for all the DRHD's.
603 */
604 for_each_drhd_unit(drhd) {
605 int ret;
606 struct intel_iommu *iommu = drhd->iommu;
607 ret = dmar_enable_qi(iommu);
608
609 if (ret) {
610 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
611 " invalidation, ecap %Lx, ret %d\n",
612 drhd->reg_base_addr, iommu->ecap, ret);
613 return -1;
614 }
615 }
616
617 /*
618 * Setup Interrupt-remapping for all the DRHD's now.
619 */
620 for_each_drhd_unit(drhd) {
621 struct intel_iommu *iommu = drhd->iommu;
622
623 if (!ecap_ir_support(iommu->ecap))
624 continue;
625
626 if (setup_intr_remapping(iommu, eim))
627 goto error;
628
629 setup = 1;
630 }
631
632 if (!setup)
633 goto error;
634
635 intr_remapping_enabled = 1;
636 pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
637
638 return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
639
640error:
641 /*
642 * handle error condition gracefully here!
643 */
644 return -1;
645}
646
647static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
648 struct intel_iommu *iommu)
649{
650 struct acpi_dmar_pci_path *path;
651 u8 bus;
652 int count;
653
654 bus = scope->bus;
655 path = (struct acpi_dmar_pci_path *)(scope + 1);
656 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
657 / sizeof(struct acpi_dmar_pci_path);
658
659 while (--count > 0) {
660 /*
661 * Access PCI directly due to the PCI
662 * subsystem isn't initialized yet.
663 */
664 bus = read_pci_config_byte(bus, path->dev, path->fn,
665 PCI_SECONDARY_BUS);
666 path++;
667 }
668 ir_hpet[ir_hpet_num].bus = bus;
669 ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->dev, path->fn);
670 ir_hpet[ir_hpet_num].iommu = iommu;
671 ir_hpet[ir_hpet_num].id = scope->enumeration_id;
672 ir_hpet_num++;
673}
674
675static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
676 struct intel_iommu *iommu)
677{
678 struct acpi_dmar_pci_path *path;
679 u8 bus;
680 int count;
681
682 bus = scope->bus;
683 path = (struct acpi_dmar_pci_path *)(scope + 1);
684 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
685 / sizeof(struct acpi_dmar_pci_path);
686
687 while (--count > 0) {
688 /*
689 * Access PCI directly due to the PCI
690 * subsystem isn't initialized yet.
691 */
692 bus = read_pci_config_byte(bus, path->dev, path->fn,
693 PCI_SECONDARY_BUS);
694 path++;
695 }
696
697 ir_ioapic[ir_ioapic_num].bus = bus;
698 ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn);
699 ir_ioapic[ir_ioapic_num].iommu = iommu;
700 ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
701 ir_ioapic_num++;
702}
703
704static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
705 struct intel_iommu *iommu)
706{
707 struct acpi_dmar_hardware_unit *drhd;
708 struct acpi_dmar_device_scope *scope;
709 void *start, *end;
710
711 drhd = (struct acpi_dmar_hardware_unit *)header;
712
713 start = (void *)(drhd + 1);
714 end = ((void *)drhd) + header->length;
715
716 while (start < end) {
717 scope = start;
718 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
719 if (ir_ioapic_num == MAX_IO_APICS) {
720 printk(KERN_WARNING "Exceeded Max IO APICS\n");
721 return -1;
722 }
723
724 printk(KERN_INFO "IOAPIC id %d under DRHD base "
725 " 0x%Lx IOMMU %d\n", scope->enumeration_id,
726 drhd->address, iommu->seq_id);
727
728 ir_parse_one_ioapic_scope(scope, iommu);
729 } else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
730 if (ir_hpet_num == MAX_HPET_TBS) {
731 printk(KERN_WARNING "Exceeded Max HPET blocks\n");
732 return -1;
733 }
734
735 printk(KERN_INFO "HPET id %d under DRHD base"
736 " 0x%Lx\n", scope->enumeration_id,
737 drhd->address);
738
739 ir_parse_one_hpet_scope(scope, iommu);
740 }
741 start += scope->length;
742 }
743
744 return 0;
745}
746
747/*
748 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
749 * hardware unit.
750 */
751int __init parse_ioapics_under_ir(void)
752{
753 struct dmar_drhd_unit *drhd;
754 int ir_supported = 0;
755
756 for_each_drhd_unit(drhd) {
757 struct intel_iommu *iommu = drhd->iommu;
758
759 if (ecap_ir_support(iommu->ecap)) {
760 if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
761 return -1;
762
763 ir_supported = 1;
764 }
765 }
766
767 if (ir_supported && ir_ioapic_num != nr_ioapics) {
768 printk(KERN_WARNING
769 "Not all IO-APIC's listed under remapping hardware\n");
770 return -1;
771 }
772
773 return ir_supported;
774}
775
776int ir_dev_scope_init(void)
777{
778 if (!intr_remapping_enabled)
779 return 0;
780
781 return dmar_dev_scope_init();
782}
783rootfs_initcall(ir_dev_scope_init);
784
785void disable_intr_remapping(void)
786{
787 struct dmar_drhd_unit *drhd;
788 struct intel_iommu *iommu = NULL;
789
790 /*
791 * Disable Interrupt-remapping for all the DRHD's now.
792 */
793 for_each_iommu(iommu, drhd) {
794 if (!ecap_ir_support(iommu->ecap))
795 continue;
796
797 iommu_disable_intr_remapping(iommu);
798 }
799}
800
801int reenable_intr_remapping(int eim)
802{
803 struct dmar_drhd_unit *drhd;
804 int setup = 0;
805 struct intel_iommu *iommu = NULL;
806
807 for_each_iommu(iommu, drhd)
808 if (iommu->qi)
809 dmar_reenable_qi(iommu);
810
811 /*
812 * Setup Interrupt-remapping for all the DRHD's now.
813 */
814 for_each_iommu(iommu, drhd) {
815 if (!ecap_ir_support(iommu->ecap))
816 continue;
817
818 /* Set up interrupt remapping for iommu.*/
819 iommu_set_intr_remapping(iommu, eim);
820 setup = 1;
821 }
822
823 if (!setup)
824 goto error;
825
826 return 0;
827
828error:
829 /*
830 * handle error condition gracefully here!
831 */
832 return -1;
833}
834
diff --git a/drivers/iommu/intr_remapping.h b/drivers/iommu/intr_remapping.h
new file mode 100644
index 000000000000..5662fecfee60
--- /dev/null
+++ b/drivers/iommu/intr_remapping.h
@@ -0,0 +1,17 @@
1#include <linux/intel-iommu.h>
2
3struct ioapic_scope {
4 struct intel_iommu *iommu;
5 unsigned int id;
6 unsigned int bus; /* PCI bus number */
7 unsigned int devfn; /* PCI devfn number */
8};
9
10struct hpet_scope {
11 struct intel_iommu *iommu;
12 u8 id;
13 unsigned int bus;
14 unsigned int devfn;
15};
16
17#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
new file mode 100644
index 000000000000..2fb2963df553
--- /dev/null
+++ b/drivers/iommu/iommu.c
@@ -0,0 +1,188 @@
1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/device.h>
20#include <linux/kernel.h>
21#include <linux/bug.h>
22#include <linux/types.h>
23#include <linux/module.h>
24#include <linux/slab.h>
25#include <linux/errno.h>
26#include <linux/iommu.h>
27
28static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
29{
30}
31
32/**
33 * bus_set_iommu - set iommu-callbacks for the bus
34 * @bus: bus.
35 * @ops: the callbacks provided by the iommu-driver
36 *
37 * This function is called by an iommu driver to set the iommu methods
38 * used for a particular bus. Drivers for devices on that bus can use
39 * the iommu-api after these ops are registered.
40 * This special function is needed because IOMMUs are usually devices on
41 * the bus itself, so the iommu drivers are not initialized when the bus
42 * is set up. With this function the iommu-driver can set the iommu-ops
43 * afterwards.
44 */
45int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
46{
47 if (bus->iommu_ops != NULL)
48 return -EBUSY;
49
50 bus->iommu_ops = ops;
51
52 /* Do IOMMU specific setup for this bus-type */
53 iommu_bus_init(bus, ops);
54
55 return 0;
56}
57EXPORT_SYMBOL_GPL(bus_set_iommu);
58
59bool iommu_present(struct bus_type *bus)
60{
61 return bus->iommu_ops != NULL;
62}
63EXPORT_SYMBOL_GPL(iommu_present);
64
65/**
66 * iommu_set_fault_handler() - set a fault handler for an iommu domain
67 * @domain: iommu domain
68 * @handler: fault handler
69 *
70 * This function should be used by IOMMU users which want to be notified
71 * whenever an IOMMU fault happens.
72 *
73 * The fault handler itself should return 0 on success, and an appropriate
74 * error code otherwise.
75 */
76void iommu_set_fault_handler(struct iommu_domain *domain,
77 iommu_fault_handler_t handler)
78{
79 BUG_ON(!domain);
80
81 domain->handler = handler;
82}
83EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
84
85struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
86{
87 struct iommu_domain *domain;
88 int ret;
89
90 if (bus == NULL || bus->iommu_ops == NULL)
91 return NULL;
92
93 domain = kmalloc(sizeof(*domain), GFP_KERNEL);
94 if (!domain)
95 return NULL;
96
97 domain->ops = bus->iommu_ops;
98
99 ret = domain->ops->domain_init(domain);
100 if (ret)
101 goto out_free;
102
103 return domain;
104
105out_free:
106 kfree(domain);
107
108 return NULL;
109}
110EXPORT_SYMBOL_GPL(iommu_domain_alloc);
111
112void iommu_domain_free(struct iommu_domain *domain)
113{
114 if (likely(domain->ops->domain_destroy != NULL))
115 domain->ops->domain_destroy(domain);
116
117 kfree(domain);
118}
119EXPORT_SYMBOL_GPL(iommu_domain_free);
120
121int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
122{
123 if (unlikely(domain->ops->attach_dev == NULL))
124 return -ENODEV;
125
126 return domain->ops->attach_dev(domain, dev);
127}
128EXPORT_SYMBOL_GPL(iommu_attach_device);
129
130void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
131{
132 if (unlikely(domain->ops->detach_dev == NULL))
133 return;
134
135 domain->ops->detach_dev(domain, dev);
136}
137EXPORT_SYMBOL_GPL(iommu_detach_device);
138
139phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
140 unsigned long iova)
141{
142 if (unlikely(domain->ops->iova_to_phys == NULL))
143 return 0;
144
145 return domain->ops->iova_to_phys(domain, iova);
146}
147EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
148
149int iommu_domain_has_cap(struct iommu_domain *domain,
150 unsigned long cap)
151{
152 if (unlikely(domain->ops->domain_has_cap == NULL))
153 return 0;
154
155 return domain->ops->domain_has_cap(domain, cap);
156}
157EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
158
159int iommu_map(struct iommu_domain *domain, unsigned long iova,
160 phys_addr_t paddr, int gfp_order, int prot)
161{
162 size_t size;
163
164 if (unlikely(domain->ops->map == NULL))
165 return -ENODEV;
166
167 size = PAGE_SIZE << gfp_order;
168
169 BUG_ON(!IS_ALIGNED(iova | paddr, size));
170
171 return domain->ops->map(domain, iova, paddr, gfp_order, prot);
172}
173EXPORT_SYMBOL_GPL(iommu_map);
174
175int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
176{
177 size_t size;
178
179 if (unlikely(domain->ops->unmap == NULL))
180 return -ENODEV;
181
182 size = PAGE_SIZE << gfp_order;
183
184 BUG_ON(!IS_ALIGNED(iova, size));
185
186 return domain->ops->unmap(domain, iova, gfp_order);
187}
188EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
new file mode 100644
index 000000000000..c5c274ab5c5a
--- /dev/null
+++ b/drivers/iommu/iova.c
@@ -0,0 +1,435 @@
1/*
2 * Copyright © 2006-2009, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
18 */
19
20#include <linux/iova.h>
21
22void
23init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
24{
25 spin_lock_init(&iovad->iova_rbtree_lock);
26 iovad->rbroot = RB_ROOT;
27 iovad->cached32_node = NULL;
28 iovad->dma_32bit_pfn = pfn_32bit;
29}
30
31static struct rb_node *
32__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
33{
34 if ((*limit_pfn != iovad->dma_32bit_pfn) ||
35 (iovad->cached32_node == NULL))
36 return rb_last(&iovad->rbroot);
37 else {
38 struct rb_node *prev_node = rb_prev(iovad->cached32_node);
39 struct iova *curr_iova =
40 container_of(iovad->cached32_node, struct iova, node);
41 *limit_pfn = curr_iova->pfn_lo - 1;
42 return prev_node;
43 }
44}
45
46static void
47__cached_rbnode_insert_update(struct iova_domain *iovad,
48 unsigned long limit_pfn, struct iova *new)
49{
50 if (limit_pfn != iovad->dma_32bit_pfn)
51 return;
52 iovad->cached32_node = &new->node;
53}
54
55static void
56__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
57{
58 struct iova *cached_iova;
59 struct rb_node *curr;
60
61 if (!iovad->cached32_node)
62 return;
63 curr = iovad->cached32_node;
64 cached_iova = container_of(curr, struct iova, node);
65
66 if (free->pfn_lo >= cached_iova->pfn_lo) {
67 struct rb_node *node = rb_next(&free->node);
68 struct iova *iova = container_of(node, struct iova, node);
69
70 /* only cache if it's below 32bit pfn */
71 if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
72 iovad->cached32_node = node;
73 else
74 iovad->cached32_node = NULL;
75 }
76}
77
78/* Computes the padding size required, to make the
79 * the start address naturally aligned on its size
80 */
81static int
82iova_get_pad_size(int size, unsigned int limit_pfn)
83{
84 unsigned int pad_size = 0;
85 unsigned int order = ilog2(size);
86
87 if (order)
88 pad_size = (limit_pfn + 1) % (1 << order);
89
90 return pad_size;
91}
92
93static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
94 unsigned long size, unsigned long limit_pfn,
95 struct iova *new, bool size_aligned)
96{
97 struct rb_node *prev, *curr = NULL;
98 unsigned long flags;
99 unsigned long saved_pfn;
100 unsigned int pad_size = 0;
101
102 /* Walk the tree backwards */
103 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
104 saved_pfn = limit_pfn;
105 curr = __get_cached_rbnode(iovad, &limit_pfn);
106 prev = curr;
107 while (curr) {
108 struct iova *curr_iova = container_of(curr, struct iova, node);
109
110 if (limit_pfn < curr_iova->pfn_lo)
111 goto move_left;
112 else if (limit_pfn < curr_iova->pfn_hi)
113 goto adjust_limit_pfn;
114 else {
115 if (size_aligned)
116 pad_size = iova_get_pad_size(size, limit_pfn);
117 if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
118 break; /* found a free slot */
119 }
120adjust_limit_pfn:
121 limit_pfn = curr_iova->pfn_lo - 1;
122move_left:
123 prev = curr;
124 curr = rb_prev(curr);
125 }
126
127 if (!curr) {
128 if (size_aligned)
129 pad_size = iova_get_pad_size(size, limit_pfn);
130 if ((IOVA_START_PFN + size + pad_size) > limit_pfn) {
131 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
132 return -ENOMEM;
133 }
134 }
135
136 /* pfn_lo will point to size aligned address if size_aligned is set */
137 new->pfn_lo = limit_pfn - (size + pad_size) + 1;
138 new->pfn_hi = new->pfn_lo + size - 1;
139
140 /* Insert the new_iova into domain rbtree by holding writer lock */
141 /* Add new node and rebalance tree. */
142 {
143 struct rb_node **entry, *parent = NULL;
144
145 /* If we have 'prev', it's a valid place to start the
146 insertion. Otherwise, start from the root. */
147 if (prev)
148 entry = &prev;
149 else
150 entry = &iovad->rbroot.rb_node;
151
152 /* Figure out where to put new node */
153 while (*entry) {
154 struct iova *this = container_of(*entry,
155 struct iova, node);
156 parent = *entry;
157
158 if (new->pfn_lo < this->pfn_lo)
159 entry = &((*entry)->rb_left);
160 else if (new->pfn_lo > this->pfn_lo)
161 entry = &((*entry)->rb_right);
162 else
163 BUG(); /* this should not happen */
164 }
165
166 /* Add new node and rebalance tree. */
167 rb_link_node(&new->node, parent, entry);
168 rb_insert_color(&new->node, &iovad->rbroot);
169 }
170 __cached_rbnode_insert_update(iovad, saved_pfn, new);
171
172 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
173
174
175 return 0;
176}
177
178static void
179iova_insert_rbtree(struct rb_root *root, struct iova *iova)
180{
181 struct rb_node **new = &(root->rb_node), *parent = NULL;
182 /* Figure out where to put new node */
183 while (*new) {
184 struct iova *this = container_of(*new, struct iova, node);
185 parent = *new;
186
187 if (iova->pfn_lo < this->pfn_lo)
188 new = &((*new)->rb_left);
189 else if (iova->pfn_lo > this->pfn_lo)
190 new = &((*new)->rb_right);
191 else
192 BUG(); /* this should not happen */
193 }
194 /* Add new node and rebalance tree. */
195 rb_link_node(&iova->node, parent, new);
196 rb_insert_color(&iova->node, root);
197}
198
199/**
200 * alloc_iova - allocates an iova
201 * @iovad - iova domain in question
202 * @size - size of page frames to allocate
203 * @limit_pfn - max limit address
204 * @size_aligned - set if size_aligned address range is required
205 * This function allocates an iova in the range limit_pfn to IOVA_START_PFN
206 * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned
207 * flag is set then the allocated address iova->pfn_lo will be naturally
208 * aligned on roundup_power_of_two(size).
209 */
210struct iova *
211alloc_iova(struct iova_domain *iovad, unsigned long size,
212 unsigned long limit_pfn,
213 bool size_aligned)
214{
215 struct iova *new_iova;
216 int ret;
217
218 new_iova = alloc_iova_mem();
219 if (!new_iova)
220 return NULL;
221
222 /* If size aligned is set then round the size to
223 * to next power of two.
224 */
225 if (size_aligned)
226 size = __roundup_pow_of_two(size);
227
228 ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
229 new_iova, size_aligned);
230
231 if (ret) {
232 free_iova_mem(new_iova);
233 return NULL;
234 }
235
236 return new_iova;
237}
238
239/**
240 * find_iova - find's an iova for a given pfn
241 * @iovad - iova domain in question.
242 * pfn - page frame number
243 * This function finds and returns an iova belonging to the
244 * given doamin which matches the given pfn.
245 */
246struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
247{
248 unsigned long flags;
249 struct rb_node *node;
250
251 /* Take the lock so that no other thread is manipulating the rbtree */
252 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
253 node = iovad->rbroot.rb_node;
254 while (node) {
255 struct iova *iova = container_of(node, struct iova, node);
256
257 /* If pfn falls within iova's range, return iova */
258 if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
259 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
260 /* We are not holding the lock while this iova
261 * is referenced by the caller as the same thread
262 * which called this function also calls __free_iova()
263 * and it is by desing that only one thread can possibly
264 * reference a particular iova and hence no conflict.
265 */
266 return iova;
267 }
268
269 if (pfn < iova->pfn_lo)
270 node = node->rb_left;
271 else if (pfn > iova->pfn_lo)
272 node = node->rb_right;
273 }
274
275 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
276 return NULL;
277}
278
279/**
280 * __free_iova - frees the given iova
281 * @iovad: iova domain in question.
282 * @iova: iova in question.
283 * Frees the given iova belonging to the giving domain
284 */
285void
286__free_iova(struct iova_domain *iovad, struct iova *iova)
287{
288 unsigned long flags;
289
290 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
291 __cached_rbnode_delete_update(iovad, iova);
292 rb_erase(&iova->node, &iovad->rbroot);
293 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
294 free_iova_mem(iova);
295}
296
297/**
298 * free_iova - finds and frees the iova for a given pfn
299 * @iovad: - iova domain in question.
300 * @pfn: - pfn that is allocated previously
301 * This functions finds an iova for a given pfn and then
302 * frees the iova from that domain.
303 */
304void
305free_iova(struct iova_domain *iovad, unsigned long pfn)
306{
307 struct iova *iova = find_iova(iovad, pfn);
308 if (iova)
309 __free_iova(iovad, iova);
310
311}
312
313/**
314 * put_iova_domain - destroys the iova doamin
315 * @iovad: - iova domain in question.
316 * All the iova's in that domain are destroyed.
317 */
318void put_iova_domain(struct iova_domain *iovad)
319{
320 struct rb_node *node;
321 unsigned long flags;
322
323 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
324 node = rb_first(&iovad->rbroot);
325 while (node) {
326 struct iova *iova = container_of(node, struct iova, node);
327 rb_erase(node, &iovad->rbroot);
328 free_iova_mem(iova);
329 node = rb_first(&iovad->rbroot);
330 }
331 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
332}
333
334static int
335__is_range_overlap(struct rb_node *node,
336 unsigned long pfn_lo, unsigned long pfn_hi)
337{
338 struct iova *iova = container_of(node, struct iova, node);
339
340 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
341 return 1;
342 return 0;
343}
344
345static struct iova *
346__insert_new_range(struct iova_domain *iovad,
347 unsigned long pfn_lo, unsigned long pfn_hi)
348{
349 struct iova *iova;
350
351 iova = alloc_iova_mem();
352 if (!iova)
353 return iova;
354
355 iova->pfn_hi = pfn_hi;
356 iova->pfn_lo = pfn_lo;
357 iova_insert_rbtree(&iovad->rbroot, iova);
358 return iova;
359}
360
361static void
362__adjust_overlap_range(struct iova *iova,
363 unsigned long *pfn_lo, unsigned long *pfn_hi)
364{
365 if (*pfn_lo < iova->pfn_lo)
366 iova->pfn_lo = *pfn_lo;
367 if (*pfn_hi > iova->pfn_hi)
368 *pfn_lo = iova->pfn_hi + 1;
369}
370
371/**
372 * reserve_iova - reserves an iova in the given range
373 * @iovad: - iova domain pointer
374 * @pfn_lo: - lower page frame address
375 * @pfn_hi:- higher pfn adderss
376 * This function allocates reserves the address range from pfn_lo to pfn_hi so
377 * that this address is not dished out as part of alloc_iova.
378 */
379struct iova *
380reserve_iova(struct iova_domain *iovad,
381 unsigned long pfn_lo, unsigned long pfn_hi)
382{
383 struct rb_node *node;
384 unsigned long flags;
385 struct iova *iova;
386 unsigned int overlap = 0;
387
388 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
389 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
390 if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
391 iova = container_of(node, struct iova, node);
392 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
393 if ((pfn_lo >= iova->pfn_lo) &&
394 (pfn_hi <= iova->pfn_hi))
395 goto finish;
396 overlap = 1;
397
398 } else if (overlap)
399 break;
400 }
401
402 /* We are here either because this is the first reserver node
403 * or need to insert remaining non overlap addr range
404 */
405 iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
406finish:
407
408 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
409 return iova;
410}
411
412/**
413 * copy_reserved_iova - copies the reserved between domains
414 * @from: - source doamin from where to copy
415 * @to: - destination domin where to copy
416 * This function copies reserved iova's from one doamin to
417 * other.
418 */
419void
420copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
421{
422 unsigned long flags;
423 struct rb_node *node;
424
425 spin_lock_irqsave(&from->iova_rbtree_lock, flags);
426 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
427 struct iova *iova = container_of(node, struct iova, node);
428 struct iova *new_iova;
429 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
430 if (!new_iova)
431 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
432 iova->pfn_lo, iova->pfn_lo);
433 }
434 spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
435}
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
new file mode 100644
index 000000000000..5865dd2e28f9
--- /dev/null
+++ b/drivers/iommu/msm_iommu.c
@@ -0,0 +1,738 @@
1/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/platform_device.h>
22#include <linux/errno.h>
23#include <linux/io.h>
24#include <linux/interrupt.h>
25#include <linux/list.h>
26#include <linux/spinlock.h>
27#include <linux/slab.h>
28#include <linux/iommu.h>
29#include <linux/clk.h>
30
31#include <asm/cacheflush.h>
32#include <asm/sizes.h>
33
34#include <mach/iommu_hw-8xxx.h>
35#include <mach/iommu.h>
36
37#define MRC(reg, processor, op1, crn, crm, op2) \
38__asm__ __volatile__ ( \
39" mrc " #processor "," #op1 ", %0," #crn "," #crm "," #op2 "\n" \
40: "=r" (reg))
41
42#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0)
43#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1)
44
45static int msm_iommu_tex_class[4];
46
47DEFINE_SPINLOCK(msm_iommu_lock);
48
49struct msm_priv {
50 unsigned long *pgtable;
51 struct list_head list_attached;
52};
53
54static int __enable_clocks(struct msm_iommu_drvdata *drvdata)
55{
56 int ret;
57
58 ret = clk_enable(drvdata->pclk);
59 if (ret)
60 goto fail;
61
62 if (drvdata->clk) {
63 ret = clk_enable(drvdata->clk);
64 if (ret)
65 clk_disable(drvdata->pclk);
66 }
67fail:
68 return ret;
69}
70
71static void __disable_clocks(struct msm_iommu_drvdata *drvdata)
72{
73 if (drvdata->clk)
74 clk_disable(drvdata->clk);
75 clk_disable(drvdata->pclk);
76}
77
78static int __flush_iotlb(struct iommu_domain *domain)
79{
80 struct msm_priv *priv = domain->priv;
81 struct msm_iommu_drvdata *iommu_drvdata;
82 struct msm_iommu_ctx_drvdata *ctx_drvdata;
83 int ret = 0;
84#ifndef CONFIG_IOMMU_PGTABLES_L2
85 unsigned long *fl_table = priv->pgtable;
86 int i;
87
88 if (!list_empty(&priv->list_attached)) {
89 dmac_flush_range(fl_table, fl_table + SZ_16K);
90
91 for (i = 0; i < NUM_FL_PTE; i++)
92 if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
93 void *sl_table = __va(fl_table[i] &
94 FL_BASE_MASK);
95 dmac_flush_range(sl_table, sl_table + SZ_4K);
96 }
97 }
98#endif
99
100 list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
101 if (!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent)
102 BUG();
103
104 iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
105 BUG_ON(!iommu_drvdata);
106
107 ret = __enable_clocks(iommu_drvdata);
108 if (ret)
109 goto fail;
110
111 SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0);
112 __disable_clocks(iommu_drvdata);
113 }
114fail:
115 return ret;
116}
117
118static void __reset_context(void __iomem *base, int ctx)
119{
120 SET_BPRCOSH(base, ctx, 0);
121 SET_BPRCISH(base, ctx, 0);
122 SET_BPRCNSH(base, ctx, 0);
123 SET_BPSHCFG(base, ctx, 0);
124 SET_BPMTCFG(base, ctx, 0);
125 SET_ACTLR(base, ctx, 0);
126 SET_SCTLR(base, ctx, 0);
127 SET_FSRRESTORE(base, ctx, 0);
128 SET_TTBR0(base, ctx, 0);
129 SET_TTBR1(base, ctx, 0);
130 SET_TTBCR(base, ctx, 0);
131 SET_BFBCR(base, ctx, 0);
132 SET_PAR(base, ctx, 0);
133 SET_FAR(base, ctx, 0);
134 SET_CTX_TLBIALL(base, ctx, 0);
135 SET_TLBFLPTER(base, ctx, 0);
136 SET_TLBSLPTER(base, ctx, 0);
137 SET_TLBLKCR(base, ctx, 0);
138 SET_PRRR(base, ctx, 0);
139 SET_NMRR(base, ctx, 0);
140}
141
142static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
143{
144 unsigned int prrr, nmrr;
145 __reset_context(base, ctx);
146
147 /* Set up HTW mode */
148 /* TLB miss configuration: perform HTW on miss */
149 SET_TLBMCFG(base, ctx, 0x3);
150
151 /* V2P configuration: HTW for access */
152 SET_V2PCFG(base, ctx, 0x3);
153
154 SET_TTBCR(base, ctx, 0);
155 SET_TTBR0_PA(base, ctx, (pgtable >> 14));
156
157 /* Invalidate the TLB for this context */
158 SET_CTX_TLBIALL(base, ctx, 0);
159
160 /* Set interrupt number to "secure" interrupt */
161 SET_IRPTNDX(base, ctx, 0);
162
163 /* Enable context fault interrupt */
164 SET_CFEIE(base, ctx, 1);
165
166 /* Stall access on a context fault and let the handler deal with it */
167 SET_CFCFG(base, ctx, 1);
168
169 /* Redirect all cacheable requests to L2 slave port. */
170 SET_RCISH(base, ctx, 1);
171 SET_RCOSH(base, ctx, 1);
172 SET_RCNSH(base, ctx, 1);
173
174 /* Turn on TEX Remap */
175 SET_TRE(base, ctx, 1);
176
177 /* Set TEX remap attributes */
178 RCP15_PRRR(prrr);
179 RCP15_NMRR(nmrr);
180 SET_PRRR(base, ctx, prrr);
181 SET_NMRR(base, ctx, nmrr);
182
183 /* Turn on BFB prefetch */
184 SET_BFBDFE(base, ctx, 1);
185
186#ifdef CONFIG_IOMMU_PGTABLES_L2
187 /* Configure page tables as inner-cacheable and shareable to reduce
188 * the TLB miss penalty.
189 */
190 SET_TTBR0_SH(base, ctx, 1);
191 SET_TTBR1_SH(base, ctx, 1);
192
193 SET_TTBR0_NOS(base, ctx, 1);
194 SET_TTBR1_NOS(base, ctx, 1);
195
196 SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */
197 SET_TTBR0_IRGNL(base, ctx, 1);
198
199 SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */
200 SET_TTBR1_IRGNL(base, ctx, 1);
201
202 SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */
203 SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */
204#endif
205
206 /* Enable the MMU */
207 SET_M(base, ctx, 1);
208}
209
210static int msm_iommu_domain_init(struct iommu_domain *domain)
211{
212 struct msm_priv *priv = kzalloc(sizeof(*priv), GFP_KERNEL);
213
214 if (!priv)
215 goto fail_nomem;
216
217 INIT_LIST_HEAD(&priv->list_attached);
218 priv->pgtable = (unsigned long *)__get_free_pages(GFP_KERNEL,
219 get_order(SZ_16K));
220
221 if (!priv->pgtable)
222 goto fail_nomem;
223
224 memset(priv->pgtable, 0, SZ_16K);
225 domain->priv = priv;
226 return 0;
227
228fail_nomem:
229 kfree(priv);
230 return -ENOMEM;
231}
232
233static void msm_iommu_domain_destroy(struct iommu_domain *domain)
234{
235 struct msm_priv *priv;
236 unsigned long flags;
237 unsigned long *fl_table;
238 int i;
239
240 spin_lock_irqsave(&msm_iommu_lock, flags);
241 priv = domain->priv;
242 domain->priv = NULL;
243
244 if (priv) {
245 fl_table = priv->pgtable;
246
247 for (i = 0; i < NUM_FL_PTE; i++)
248 if ((fl_table[i] & 0x03) == FL_TYPE_TABLE)
249 free_page((unsigned long) __va(((fl_table[i]) &
250 FL_BASE_MASK)));
251
252 free_pages((unsigned long)priv->pgtable, get_order(SZ_16K));
253 priv->pgtable = NULL;
254 }
255
256 kfree(priv);
257 spin_unlock_irqrestore(&msm_iommu_lock, flags);
258}
259
260static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
261{
262 struct msm_priv *priv;
263 struct msm_iommu_ctx_dev *ctx_dev;
264 struct msm_iommu_drvdata *iommu_drvdata;
265 struct msm_iommu_ctx_drvdata *ctx_drvdata;
266 struct msm_iommu_ctx_drvdata *tmp_drvdata;
267 int ret = 0;
268 unsigned long flags;
269
270 spin_lock_irqsave(&msm_iommu_lock, flags);
271
272 priv = domain->priv;
273
274 if (!priv || !dev) {
275 ret = -EINVAL;
276 goto fail;
277 }
278
279 iommu_drvdata = dev_get_drvdata(dev->parent);
280 ctx_drvdata = dev_get_drvdata(dev);
281 ctx_dev = dev->platform_data;
282
283 if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) {
284 ret = -EINVAL;
285 goto fail;
286 }
287
288 if (!list_empty(&ctx_drvdata->attached_elm)) {
289 ret = -EBUSY;
290 goto fail;
291 }
292
293 list_for_each_entry(tmp_drvdata, &priv->list_attached, attached_elm)
294 if (tmp_drvdata == ctx_drvdata) {
295 ret = -EBUSY;
296 goto fail;
297 }
298
299 ret = __enable_clocks(iommu_drvdata);
300 if (ret)
301 goto fail;
302
303 __program_context(iommu_drvdata->base, ctx_dev->num,
304 __pa(priv->pgtable));
305
306 __disable_clocks(iommu_drvdata);
307 list_add(&(ctx_drvdata->attached_elm), &priv->list_attached);
308 ret = __flush_iotlb(domain);
309
310fail:
311 spin_unlock_irqrestore(&msm_iommu_lock, flags);
312 return ret;
313}
314
315static void msm_iommu_detach_dev(struct iommu_domain *domain,
316 struct device *dev)
317{
318 struct msm_priv *priv;
319 struct msm_iommu_ctx_dev *ctx_dev;
320 struct msm_iommu_drvdata *iommu_drvdata;
321 struct msm_iommu_ctx_drvdata *ctx_drvdata;
322 unsigned long flags;
323 int ret;
324
325 spin_lock_irqsave(&msm_iommu_lock, flags);
326 priv = domain->priv;
327
328 if (!priv || !dev)
329 goto fail;
330
331 iommu_drvdata = dev_get_drvdata(dev->parent);
332 ctx_drvdata = dev_get_drvdata(dev);
333 ctx_dev = dev->platform_data;
334
335 if (!iommu_drvdata || !ctx_drvdata || !ctx_dev)
336 goto fail;
337
338 ret = __flush_iotlb(domain);
339 if (ret)
340 goto fail;
341
342 ret = __enable_clocks(iommu_drvdata);
343 if (ret)
344 goto fail;
345
346 __reset_context(iommu_drvdata->base, ctx_dev->num);
347 __disable_clocks(iommu_drvdata);
348 list_del_init(&ctx_drvdata->attached_elm);
349
350fail:
351 spin_unlock_irqrestore(&msm_iommu_lock, flags);
352}
353
354static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
355 phys_addr_t pa, int order, int prot)
356{
357 struct msm_priv *priv;
358 unsigned long flags;
359 unsigned long *fl_table;
360 unsigned long *fl_pte;
361 unsigned long fl_offset;
362 unsigned long *sl_table;
363 unsigned long *sl_pte;
364 unsigned long sl_offset;
365 unsigned int pgprot;
366 size_t len = 0x1000UL << order;
367 int ret = 0, tex, sh;
368
369 spin_lock_irqsave(&msm_iommu_lock, flags);
370
371 sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0;
372 tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK];
373
374 if (tex < 0 || tex > NUM_TEX_CLASS - 1) {
375 ret = -EINVAL;
376 goto fail;
377 }
378
379 priv = domain->priv;
380 if (!priv) {
381 ret = -EINVAL;
382 goto fail;
383 }
384
385 fl_table = priv->pgtable;
386
387 if (len != SZ_16M && len != SZ_1M &&
388 len != SZ_64K && len != SZ_4K) {
389 pr_debug("Bad size: %d\n", len);
390 ret = -EINVAL;
391 goto fail;
392 }
393
394 if (!fl_table) {
395 pr_debug("Null page table\n");
396 ret = -EINVAL;
397 goto fail;
398 }
399
400 if (len == SZ_16M || len == SZ_1M) {
401 pgprot = sh ? FL_SHARED : 0;
402 pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
403 pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
404 pgprot |= tex & 0x04 ? FL_TEX0 : 0;
405 } else {
406 pgprot = sh ? SL_SHARED : 0;
407 pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
408 pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
409 pgprot |= tex & 0x04 ? SL_TEX0 : 0;
410 }
411
412 fl_offset = FL_OFFSET(va); /* Upper 12 bits */
413 fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */
414
415 if (len == SZ_16M) {
416 int i = 0;
417 for (i = 0; i < 16; i++)
418 *(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
419 FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
420 FL_SHARED | FL_NG | pgprot;
421 }
422
423 if (len == SZ_1M)
424 *fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG |
425 FL_TYPE_SECT | FL_SHARED | pgprot;
426
427 /* Need a 2nd level table */
428 if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) {
429 unsigned long *sl;
430 sl = (unsigned long *) __get_free_pages(GFP_ATOMIC,
431 get_order(SZ_4K));
432
433 if (!sl) {
434 pr_debug("Could not allocate second level table\n");
435 ret = -ENOMEM;
436 goto fail;
437 }
438
439 memset(sl, 0, SZ_4K);
440 *fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | FL_TYPE_TABLE);
441 }
442
443 sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
444 sl_offset = SL_OFFSET(va);
445 sl_pte = sl_table + sl_offset;
446
447
448 if (len == SZ_4K)
449 *sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG |
450 SL_SHARED | SL_TYPE_SMALL | pgprot;
451
452 if (len == SZ_64K) {
453 int i;
454
455 for (i = 0; i < 16; i++)
456 *(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
457 SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
458 }
459
460 ret = __flush_iotlb(domain);
461fail:
462 spin_unlock_irqrestore(&msm_iommu_lock, flags);
463 return ret;
464}
465
466static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
467 int order)
468{
469 struct msm_priv *priv;
470 unsigned long flags;
471 unsigned long *fl_table;
472 unsigned long *fl_pte;
473 unsigned long fl_offset;
474 unsigned long *sl_table;
475 unsigned long *sl_pte;
476 unsigned long sl_offset;
477 size_t len = 0x1000UL << order;
478 int i, ret = 0;
479
480 spin_lock_irqsave(&msm_iommu_lock, flags);
481
482 priv = domain->priv;
483
484 if (!priv) {
485 ret = -ENODEV;
486 goto fail;
487 }
488
489 fl_table = priv->pgtable;
490
491 if (len != SZ_16M && len != SZ_1M &&
492 len != SZ_64K && len != SZ_4K) {
493 pr_debug("Bad length: %d\n", len);
494 ret = -EINVAL;
495 goto fail;
496 }
497
498 if (!fl_table) {
499 pr_debug("Null page table\n");
500 ret = -EINVAL;
501 goto fail;
502 }
503
504 fl_offset = FL_OFFSET(va); /* Upper 12 bits */
505 fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */
506
507 if (*fl_pte == 0) {
508 pr_debug("First level PTE is 0\n");
509 ret = -ENODEV;
510 goto fail;
511 }
512
513 /* Unmap supersection */
514 if (len == SZ_16M)
515 for (i = 0; i < 16; i++)
516 *(fl_pte+i) = 0;
517
518 if (len == SZ_1M)
519 *fl_pte = 0;
520
521 sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
522 sl_offset = SL_OFFSET(va);
523 sl_pte = sl_table + sl_offset;
524
525 if (len == SZ_64K) {
526 for (i = 0; i < 16; i++)
527 *(sl_pte+i) = 0;
528 }
529
530 if (len == SZ_4K)
531 *sl_pte = 0;
532
533 if (len == SZ_4K || len == SZ_64K) {
534 int used = 0;
535
536 for (i = 0; i < NUM_SL_PTE; i++)
537 if (sl_table[i])
538 used = 1;
539 if (!used) {
540 free_page((unsigned long)sl_table);
541 *fl_pte = 0;
542 }
543 }
544
545 ret = __flush_iotlb(domain);
546
547 /*
548 * the IOMMU API requires us to return the order of the unmapped
549 * page (on success).
550 */
551 if (!ret)
552 ret = order;
553fail:
554 spin_unlock_irqrestore(&msm_iommu_lock, flags);
555 return ret;
556}
557
558static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
559 unsigned long va)
560{
561 struct msm_priv *priv;
562 struct msm_iommu_drvdata *iommu_drvdata;
563 struct msm_iommu_ctx_drvdata *ctx_drvdata;
564 unsigned int par;
565 unsigned long flags;
566 void __iomem *base;
567 phys_addr_t ret = 0;
568 int ctx;
569
570 spin_lock_irqsave(&msm_iommu_lock, flags);
571
572 priv = domain->priv;
573 if (list_empty(&priv->list_attached))
574 goto fail;
575
576 ctx_drvdata = list_entry(priv->list_attached.next,
577 struct msm_iommu_ctx_drvdata, attached_elm);
578 iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
579
580 base = iommu_drvdata->base;
581 ctx = ctx_drvdata->num;
582
583 ret = __enable_clocks(iommu_drvdata);
584 if (ret)
585 goto fail;
586
587 /* Invalidate context TLB */
588 SET_CTX_TLBIALL(base, ctx, 0);
589 SET_V2PPR(base, ctx, va & V2Pxx_VA);
590
591 par = GET_PAR(base, ctx);
592
593 /* We are dealing with a supersection */
594 if (GET_NOFAULT_SS(base, ctx))
595 ret = (par & 0xFF000000) | (va & 0x00FFFFFF);
596 else /* Upper 20 bits from PAR, lower 12 from VA */
597 ret = (par & 0xFFFFF000) | (va & 0x00000FFF);
598
599 if (GET_FAULT(base, ctx))
600 ret = 0;
601
602 __disable_clocks(iommu_drvdata);
603fail:
604 spin_unlock_irqrestore(&msm_iommu_lock, flags);
605 return ret;
606}
607
608static int msm_iommu_domain_has_cap(struct iommu_domain *domain,
609 unsigned long cap)
610{
611 return 0;
612}
613
614static void print_ctx_regs(void __iomem *base, int ctx)
615{
616 unsigned int fsr = GET_FSR(base, ctx);
617 pr_err("FAR = %08x PAR = %08x\n",
618 GET_FAR(base, ctx), GET_PAR(base, ctx));
619 pr_err("FSR = %08x [%s%s%s%s%s%s%s%s%s%s]\n", fsr,
620 (fsr & 0x02) ? "TF " : "",
621 (fsr & 0x04) ? "AFF " : "",
622 (fsr & 0x08) ? "APF " : "",
623 (fsr & 0x10) ? "TLBMF " : "",
624 (fsr & 0x20) ? "HTWDEEF " : "",
625 (fsr & 0x40) ? "HTWSEEF " : "",
626 (fsr & 0x80) ? "MHF " : "",
627 (fsr & 0x10000) ? "SL " : "",
628 (fsr & 0x40000000) ? "SS " : "",
629 (fsr & 0x80000000) ? "MULTI " : "");
630
631 pr_err("FSYNR0 = %08x FSYNR1 = %08x\n",
632 GET_FSYNR0(base, ctx), GET_FSYNR1(base, ctx));
633 pr_err("TTBR0 = %08x TTBR1 = %08x\n",
634 GET_TTBR0(base, ctx), GET_TTBR1(base, ctx));
635 pr_err("SCTLR = %08x ACTLR = %08x\n",
636 GET_SCTLR(base, ctx), GET_ACTLR(base, ctx));
637 pr_err("PRRR = %08x NMRR = %08x\n",
638 GET_PRRR(base, ctx), GET_NMRR(base, ctx));
639}
640
641irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
642{
643 struct msm_iommu_drvdata *drvdata = dev_id;
644 void __iomem *base;
645 unsigned int fsr;
646 int i, ret;
647
648 spin_lock(&msm_iommu_lock);
649
650 if (!drvdata) {
651 pr_err("Invalid device ID in context interrupt handler\n");
652 goto fail;
653 }
654
655 base = drvdata->base;
656
657 pr_err("Unexpected IOMMU page fault!\n");
658 pr_err("base = %08x\n", (unsigned int) base);
659
660 ret = __enable_clocks(drvdata);
661 if (ret)
662 goto fail;
663
664 for (i = 0; i < drvdata->ncb; i++) {
665 fsr = GET_FSR(base, i);
666 if (fsr) {
667 pr_err("Fault occurred in context %d.\n", i);
668 pr_err("Interesting registers:\n");
669 print_ctx_regs(base, i);
670 SET_FSR(base, i, 0x4000000F);
671 }
672 }
673 __disable_clocks(drvdata);
674fail:
675 spin_unlock(&msm_iommu_lock);
676 return 0;
677}
678
679static struct iommu_ops msm_iommu_ops = {
680 .domain_init = msm_iommu_domain_init,
681 .domain_destroy = msm_iommu_domain_destroy,
682 .attach_dev = msm_iommu_attach_dev,
683 .detach_dev = msm_iommu_detach_dev,
684 .map = msm_iommu_map,
685 .unmap = msm_iommu_unmap,
686 .iova_to_phys = msm_iommu_iova_to_phys,
687 .domain_has_cap = msm_iommu_domain_has_cap
688};
689
690static int __init get_tex_class(int icp, int ocp, int mt, int nos)
691{
692 int i = 0;
693 unsigned int prrr = 0;
694 unsigned int nmrr = 0;
695 int c_icp, c_ocp, c_mt, c_nos;
696
697 RCP15_PRRR(prrr);
698 RCP15_NMRR(nmrr);
699
700 for (i = 0; i < NUM_TEX_CLASS; i++) {
701 c_nos = PRRR_NOS(prrr, i);
702 c_mt = PRRR_MT(prrr, i);
703 c_icp = NMRR_ICP(nmrr, i);
704 c_ocp = NMRR_OCP(nmrr, i);
705
706 if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos)
707 return i;
708 }
709
710 return -ENODEV;
711}
712
713static void __init setup_iommu_tex_classes(void)
714{
715 msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] =
716 get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1);
717
718 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] =
719 get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1);
720
721 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] =
722 get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1);
723
724 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] =
725 get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1);
726}
727
728static int __init msm_iommu_init(void)
729{
730 setup_iommu_tex_classes();
731 bus_set_iommu(&platform_bus_type, &msm_iommu_ops);
732 return 0;
733}
734
735subsys_initcall(msm_iommu_init);
736
737MODULE_LICENSE("GPL v2");
738MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
new file mode 100644
index 000000000000..8e8fb079852d
--- /dev/null
+++ b/drivers/iommu/msm_iommu_dev.c
@@ -0,0 +1,422 @@
1/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/platform_device.h>
23#include <linux/io.h>
24#include <linux/clk.h>
25#include <linux/iommu.h>
26#include <linux/interrupt.h>
27#include <linux/err.h>
28#include <linux/slab.h>
29
30#include <mach/iommu_hw-8xxx.h>
31#include <mach/iommu.h>
32#include <mach/clk.h>
33
34struct iommu_ctx_iter_data {
35 /* input */
36 const char *name;
37
38 /* output */
39 struct device *dev;
40};
41
42static struct platform_device *msm_iommu_root_dev;
43
44static int each_iommu_ctx(struct device *dev, void *data)
45{
46 struct iommu_ctx_iter_data *res = data;
47 struct msm_iommu_ctx_dev *c = dev->platform_data;
48
49 if (!res || !c || !c->name || !res->name)
50 return -EINVAL;
51
52 if (!strcmp(res->name, c->name)) {
53 res->dev = dev;
54 return 1;
55 }
56 return 0;
57}
58
59static int each_iommu(struct device *dev, void *data)
60{
61 return device_for_each_child(dev, data, each_iommu_ctx);
62}
63
64struct device *msm_iommu_get_ctx(const char *ctx_name)
65{
66 struct iommu_ctx_iter_data r;
67 int found;
68
69 if (!msm_iommu_root_dev) {
70 pr_err("No root IOMMU device.\n");
71 goto fail;
72 }
73
74 r.name = ctx_name;
75 found = device_for_each_child(&msm_iommu_root_dev->dev, &r, each_iommu);
76
77 if (!found) {
78 pr_err("Could not find context <%s>\n", ctx_name);
79 goto fail;
80 }
81
82 return r.dev;
83fail:
84 return NULL;
85}
86EXPORT_SYMBOL(msm_iommu_get_ctx);
87
88static void msm_iommu_reset(void __iomem *base, int ncb)
89{
90 int ctx;
91
92 SET_RPUE(base, 0);
93 SET_RPUEIE(base, 0);
94 SET_ESRRESTORE(base, 0);
95 SET_TBE(base, 0);
96 SET_CR(base, 0);
97 SET_SPDMBE(base, 0);
98 SET_TESTBUSCR(base, 0);
99 SET_TLBRSW(base, 0);
100 SET_GLOBAL_TLBIALL(base, 0);
101 SET_RPU_ACR(base, 0);
102 SET_TLBLKCRWE(base, 1);
103
104 for (ctx = 0; ctx < ncb; ctx++) {
105 SET_BPRCOSH(base, ctx, 0);
106 SET_BPRCISH(base, ctx, 0);
107 SET_BPRCNSH(base, ctx, 0);
108 SET_BPSHCFG(base, ctx, 0);
109 SET_BPMTCFG(base, ctx, 0);
110 SET_ACTLR(base, ctx, 0);
111 SET_SCTLR(base, ctx, 0);
112 SET_FSRRESTORE(base, ctx, 0);
113 SET_TTBR0(base, ctx, 0);
114 SET_TTBR1(base, ctx, 0);
115 SET_TTBCR(base, ctx, 0);
116 SET_BFBCR(base, ctx, 0);
117 SET_PAR(base, ctx, 0);
118 SET_FAR(base, ctx, 0);
119 SET_CTX_TLBIALL(base, ctx, 0);
120 SET_TLBFLPTER(base, ctx, 0);
121 SET_TLBSLPTER(base, ctx, 0);
122 SET_TLBLKCR(base, ctx, 0);
123 SET_PRRR(base, ctx, 0);
124 SET_NMRR(base, ctx, 0);
125 SET_CONTEXTIDR(base, ctx, 0);
126 }
127}
128
129static int msm_iommu_probe(struct platform_device *pdev)
130{
131 struct resource *r, *r2;
132 struct clk *iommu_clk;
133 struct clk *iommu_pclk;
134 struct msm_iommu_drvdata *drvdata;
135 struct msm_iommu_dev *iommu_dev = pdev->dev.platform_data;
136 void __iomem *regs_base;
137 resource_size_t len;
138 int ret, irq, par;
139
140 if (pdev->id == -1) {
141 msm_iommu_root_dev = pdev;
142 return 0;
143 }
144
145 drvdata = kzalloc(sizeof(*drvdata), GFP_KERNEL);
146
147 if (!drvdata) {
148 ret = -ENOMEM;
149 goto fail;
150 }
151
152 if (!iommu_dev) {
153 ret = -ENODEV;
154 goto fail;
155 }
156
157 iommu_pclk = clk_get(NULL, "smmu_pclk");
158 if (IS_ERR(iommu_pclk)) {
159 ret = -ENODEV;
160 goto fail;
161 }
162
163 ret = clk_enable(iommu_pclk);
164 if (ret)
165 goto fail_enable;
166
167 iommu_clk = clk_get(&pdev->dev, "iommu_clk");
168
169 if (!IS_ERR(iommu_clk)) {
170 if (clk_get_rate(iommu_clk) == 0)
171 clk_set_min_rate(iommu_clk, 1);
172
173 ret = clk_enable(iommu_clk);
174 if (ret) {
175 clk_put(iommu_clk);
176 goto fail_pclk;
177 }
178 } else
179 iommu_clk = NULL;
180
181 r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase");
182
183 if (!r) {
184 ret = -ENODEV;
185 goto fail_clk;
186 }
187
188 len = resource_size(r);
189
190 r2 = request_mem_region(r->start, len, r->name);
191 if (!r2) {
192 pr_err("Could not request memory region: start=%p, len=%d\n",
193 (void *) r->start, len);
194 ret = -EBUSY;
195 goto fail_clk;
196 }
197
198 regs_base = ioremap(r2->start, len);
199
200 if (!regs_base) {
201 pr_err("Could not ioremap: start=%p, len=%d\n",
202 (void *) r2->start, len);
203 ret = -EBUSY;
204 goto fail_mem;
205 }
206
207 irq = platform_get_irq_byname(pdev, "secure_irq");
208 if (irq < 0) {
209 ret = -ENODEV;
210 goto fail_io;
211 }
212
213 msm_iommu_reset(regs_base, iommu_dev->ncb);
214
215 SET_M(regs_base, 0, 1);
216 SET_PAR(regs_base, 0, 0);
217 SET_V2PCFG(regs_base, 0, 1);
218 SET_V2PPR(regs_base, 0, 0);
219 par = GET_PAR(regs_base, 0);
220 SET_V2PCFG(regs_base, 0, 0);
221 SET_M(regs_base, 0, 0);
222
223 if (!par) {
224 pr_err("%s: Invalid PAR value detected\n", iommu_dev->name);
225 ret = -ENODEV;
226 goto fail_io;
227 }
228
229 ret = request_irq(irq, msm_iommu_fault_handler, 0,
230 "msm_iommu_secure_irpt_handler", drvdata);
231 if (ret) {
232 pr_err("Request IRQ %d failed with ret=%d\n", irq, ret);
233 goto fail_io;
234 }
235
236
237 drvdata->pclk = iommu_pclk;
238 drvdata->clk = iommu_clk;
239 drvdata->base = regs_base;
240 drvdata->irq = irq;
241 drvdata->ncb = iommu_dev->ncb;
242
243 pr_info("device %s mapped at %p, irq %d with %d ctx banks\n",
244 iommu_dev->name, regs_base, irq, iommu_dev->ncb);
245
246 platform_set_drvdata(pdev, drvdata);
247
248 if (iommu_clk)
249 clk_disable(iommu_clk);
250
251 clk_disable(iommu_pclk);
252
253 return 0;
254fail_io:
255 iounmap(regs_base);
256fail_mem:
257 release_mem_region(r->start, len);
258fail_clk:
259 if (iommu_clk) {
260 clk_disable(iommu_clk);
261 clk_put(iommu_clk);
262 }
263fail_pclk:
264 clk_disable(iommu_pclk);
265fail_enable:
266 clk_put(iommu_pclk);
267fail:
268 kfree(drvdata);
269 return ret;
270}
271
272static int msm_iommu_remove(struct platform_device *pdev)
273{
274 struct msm_iommu_drvdata *drv = NULL;
275
276 drv = platform_get_drvdata(pdev);
277 if (drv) {
278 if (drv->clk)
279 clk_put(drv->clk);
280 clk_put(drv->pclk);
281 memset(drv, 0, sizeof(*drv));
282 kfree(drv);
283 platform_set_drvdata(pdev, NULL);
284 }
285 return 0;
286}
287
288static int msm_iommu_ctx_probe(struct platform_device *pdev)
289{
290 struct msm_iommu_ctx_dev *c = pdev->dev.platform_data;
291 struct msm_iommu_drvdata *drvdata;
292 struct msm_iommu_ctx_drvdata *ctx_drvdata = NULL;
293 int i, ret;
294 if (!c || !pdev->dev.parent) {
295 ret = -EINVAL;
296 goto fail;
297 }
298
299 drvdata = dev_get_drvdata(pdev->dev.parent);
300
301 if (!drvdata) {
302 ret = -ENODEV;
303 goto fail;
304 }
305
306 ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL);
307 if (!ctx_drvdata) {
308 ret = -ENOMEM;
309 goto fail;
310 }
311 ctx_drvdata->num = c->num;
312 ctx_drvdata->pdev = pdev;
313
314 INIT_LIST_HEAD(&ctx_drvdata->attached_elm);
315 platform_set_drvdata(pdev, ctx_drvdata);
316
317 ret = clk_enable(drvdata->pclk);
318 if (ret)
319 goto fail;
320
321 if (drvdata->clk) {
322 ret = clk_enable(drvdata->clk);
323 if (ret) {
324 clk_disable(drvdata->pclk);
325 goto fail;
326 }
327 }
328
329 /* Program the M2V tables for this context */
330 for (i = 0; i < MAX_NUM_MIDS; i++) {
331 int mid = c->mids[i];
332 if (mid == -1)
333 break;
334
335 SET_M2VCBR_N(drvdata->base, mid, 0);
336 SET_CBACR_N(drvdata->base, c->num, 0);
337
338 /* Set VMID = 0 */
339 SET_VMID(drvdata->base, mid, 0);
340
341 /* Set the context number for that MID to this context */
342 SET_CBNDX(drvdata->base, mid, c->num);
343
344 /* Set MID associated with this context bank to 0*/
345 SET_CBVMID(drvdata->base, c->num, 0);
346
347 /* Set the ASID for TLB tagging for this context */
348 SET_CONTEXTIDR_ASID(drvdata->base, c->num, c->num);
349
350 /* Set security bit override to be Non-secure */
351 SET_NSCFG(drvdata->base, mid, 3);
352 }
353
354 if (drvdata->clk)
355 clk_disable(drvdata->clk);
356 clk_disable(drvdata->pclk);
357
358 dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num);
359 return 0;
360fail:
361 kfree(ctx_drvdata);
362 return ret;
363}
364
365static int msm_iommu_ctx_remove(struct platform_device *pdev)
366{
367 struct msm_iommu_ctx_drvdata *drv = NULL;
368 drv = platform_get_drvdata(pdev);
369 if (drv) {
370 memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata));
371 kfree(drv);
372 platform_set_drvdata(pdev, NULL);
373 }
374 return 0;
375}
376
377static struct platform_driver msm_iommu_driver = {
378 .driver = {
379 .name = "msm_iommu",
380 },
381 .probe = msm_iommu_probe,
382 .remove = msm_iommu_remove,
383};
384
385static struct platform_driver msm_iommu_ctx_driver = {
386 .driver = {
387 .name = "msm_iommu_ctx",
388 },
389 .probe = msm_iommu_ctx_probe,
390 .remove = msm_iommu_ctx_remove,
391};
392
393static int __init msm_iommu_driver_init(void)
394{
395 int ret;
396 ret = platform_driver_register(&msm_iommu_driver);
397 if (ret != 0) {
398 pr_err("Failed to register IOMMU driver\n");
399 goto error;
400 }
401
402 ret = platform_driver_register(&msm_iommu_ctx_driver);
403 if (ret != 0) {
404 pr_err("Failed to register IOMMU context driver\n");
405 goto error;
406 }
407
408error:
409 return ret;
410}
411
412static void __exit msm_iommu_driver_exit(void)
413{
414 platform_driver_unregister(&msm_iommu_ctx_driver);
415 platform_driver_unregister(&msm_iommu_driver);
416}
417
418subsys_initcall(msm_iommu_driver_init);
419module_exit(msm_iommu_driver_exit);
420
421MODULE_LICENSE("GPL v2");
422MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
new file mode 100644
index 000000000000..288da5c1499d
--- /dev/null
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -0,0 +1,419 @@
1/*
2 * omap iommu: debugfs interface
3 *
4 * Copyright (C) 2008-2009 Nokia Corporation
5 *
6 * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/err.h>
15#include <linux/clk.h>
16#include <linux/io.h>
17#include <linux/slab.h>
18#include <linux/uaccess.h>
19#include <linux/platform_device.h>
20#include <linux/debugfs.h>
21
22#include <plat/iommu.h>
23#include <plat/iovmm.h>
24
25#include <plat/iopgtable.h>
26
27#define MAXCOLUMN 100 /* for short messages */
28
29static DEFINE_MUTEX(iommu_debug_lock);
30
31static struct dentry *iommu_debug_root;
32
33static ssize_t debug_read_ver(struct file *file, char __user *userbuf,
34 size_t count, loff_t *ppos)
35{
36 u32 ver = omap_iommu_arch_version();
37 char buf[MAXCOLUMN], *p = buf;
38
39 p += sprintf(p, "H/W version: %d.%d\n", (ver >> 4) & 0xf , ver & 0xf);
40
41 return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
42}
43
44static ssize_t debug_read_regs(struct file *file, char __user *userbuf,
45 size_t count, loff_t *ppos)
46{
47 struct omap_iommu *obj = file->private_data;
48 char *p, *buf;
49 ssize_t bytes;
50
51 buf = kmalloc(count, GFP_KERNEL);
52 if (!buf)
53 return -ENOMEM;
54 p = buf;
55
56 mutex_lock(&iommu_debug_lock);
57
58 bytes = omap_iommu_dump_ctx(obj, p, count);
59 bytes = simple_read_from_buffer(userbuf, count, ppos, buf, bytes);
60
61 mutex_unlock(&iommu_debug_lock);
62 kfree(buf);
63
64 return bytes;
65}
66
67static ssize_t debug_read_tlb(struct file *file, char __user *userbuf,
68 size_t count, loff_t *ppos)
69{
70 struct omap_iommu *obj = file->private_data;
71 char *p, *buf;
72 ssize_t bytes, rest;
73
74 buf = kmalloc(count, GFP_KERNEL);
75 if (!buf)
76 return -ENOMEM;
77 p = buf;
78
79 mutex_lock(&iommu_debug_lock);
80
81 p += sprintf(p, "%8s %8s\n", "cam:", "ram:");
82 p += sprintf(p, "-----------------------------------------\n");
83 rest = count - (p - buf);
84 p += omap_dump_tlb_entries(obj, p, rest);
85
86 bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
87
88 mutex_unlock(&iommu_debug_lock);
89 kfree(buf);
90
91 return bytes;
92}
93
94static ssize_t debug_write_pagetable(struct file *file,
95 const char __user *userbuf, size_t count, loff_t *ppos)
96{
97 struct iotlb_entry e;
98 struct cr_regs cr;
99 int err;
100 struct omap_iommu *obj = file->private_data;
101 char buf[MAXCOLUMN], *p = buf;
102
103 count = min(count, sizeof(buf));
104
105 mutex_lock(&iommu_debug_lock);
106 if (copy_from_user(p, userbuf, count)) {
107 mutex_unlock(&iommu_debug_lock);
108 return -EFAULT;
109 }
110
111 sscanf(p, "%x %x", &cr.cam, &cr.ram);
112 if (!cr.cam || !cr.ram) {
113 mutex_unlock(&iommu_debug_lock);
114 return -EINVAL;
115 }
116
117 omap_iotlb_cr_to_e(&cr, &e);
118 err = omap_iopgtable_store_entry(obj, &e);
119 if (err)
120 dev_err(obj->dev, "%s: fail to store cr\n", __func__);
121
122 mutex_unlock(&iommu_debug_lock);
123 return count;
124}
125
126#define dump_ioptable_entry_one(lv, da, val) \
127 ({ \
128 int __err = 0; \
129 ssize_t bytes; \
130 const int maxcol = 22; \
131 const char *str = "%d: %08x %08x\n"; \
132 bytes = snprintf(p, maxcol, str, lv, da, val); \
133 p += bytes; \
134 len -= bytes; \
135 if (len < maxcol) \
136 __err = -ENOMEM; \
137 __err; \
138 })
139
140static ssize_t dump_ioptable(struct omap_iommu *obj, char *buf, ssize_t len)
141{
142 int i;
143 u32 *iopgd;
144 char *p = buf;
145
146 spin_lock(&obj->page_table_lock);
147
148 iopgd = iopgd_offset(obj, 0);
149 for (i = 0; i < PTRS_PER_IOPGD; i++, iopgd++) {
150 int j, err;
151 u32 *iopte;
152 u32 da;
153
154 if (!*iopgd)
155 continue;
156
157 if (!(*iopgd & IOPGD_TABLE)) {
158 da = i << IOPGD_SHIFT;
159
160 err = dump_ioptable_entry_one(1, da, *iopgd);
161 if (err)
162 goto out;
163 continue;
164 }
165
166 iopte = iopte_offset(iopgd, 0);
167
168 for (j = 0; j < PTRS_PER_IOPTE; j++, iopte++) {
169 if (!*iopte)
170 continue;
171
172 da = (i << IOPGD_SHIFT) + (j << IOPTE_SHIFT);
173 err = dump_ioptable_entry_one(2, da, *iopgd);
174 if (err)
175 goto out;
176 }
177 }
178out:
179 spin_unlock(&obj->page_table_lock);
180
181 return p - buf;
182}
183
184static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf,
185 size_t count, loff_t *ppos)
186{
187 struct omap_iommu *obj = file->private_data;
188 char *p, *buf;
189 size_t bytes;
190
191 buf = (char *)__get_free_page(GFP_KERNEL);
192 if (!buf)
193 return -ENOMEM;
194 p = buf;
195
196 p += sprintf(p, "L: %8s %8s\n", "da:", "pa:");
197 p += sprintf(p, "-----------------------------------------\n");
198
199 mutex_lock(&iommu_debug_lock);
200
201 bytes = PAGE_SIZE - (p - buf);
202 p += dump_ioptable(obj, p, bytes);
203
204 bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
205
206 mutex_unlock(&iommu_debug_lock);
207 free_page((unsigned long)buf);
208
209 return bytes;
210}
211
212static ssize_t debug_read_mmap(struct file *file, char __user *userbuf,
213 size_t count, loff_t *ppos)
214{
215 struct omap_iommu *obj = file->private_data;
216 char *p, *buf;
217 struct iovm_struct *tmp;
218 int uninitialized_var(i);
219 ssize_t bytes;
220
221 buf = (char *)__get_free_page(GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
224 p = buf;
225
226 p += sprintf(p, "%-3s %-8s %-8s %6s %8s\n",
227 "No", "start", "end", "size", "flags");
228 p += sprintf(p, "-------------------------------------------------\n");
229
230 mutex_lock(&iommu_debug_lock);
231
232 list_for_each_entry(tmp, &obj->mmap, list) {
233 size_t len;
234 const char *str = "%3d %08x-%08x %6x %8x\n";
235 const int maxcol = 39;
236
237 len = tmp->da_end - tmp->da_start;
238 p += snprintf(p, maxcol, str,
239 i, tmp->da_start, tmp->da_end, len, tmp->flags);
240
241 if (PAGE_SIZE - (p - buf) < maxcol)
242 break;
243 i++;
244 }
245
246 bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
247
248 mutex_unlock(&iommu_debug_lock);
249 free_page((unsigned long)buf);
250
251 return bytes;
252}
253
254static ssize_t debug_read_mem(struct file *file, char __user *userbuf,
255 size_t count, loff_t *ppos)
256{
257 struct omap_iommu *obj = file->private_data;
258 char *p, *buf;
259 struct iovm_struct *area;
260 ssize_t bytes;
261
262 count = min_t(ssize_t, count, PAGE_SIZE);
263
264 buf = (char *)__get_free_page(GFP_KERNEL);
265 if (!buf)
266 return -ENOMEM;
267 p = buf;
268
269 mutex_lock(&iommu_debug_lock);
270
271 area = omap_find_iovm_area(obj, (u32)ppos);
272 if (IS_ERR(area)) {
273 bytes = -EINVAL;
274 goto err_out;
275 }
276 memcpy(p, area->va, count);
277 p += count;
278
279 bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
280err_out:
281 mutex_unlock(&iommu_debug_lock);
282 free_page((unsigned long)buf);
283
284 return bytes;
285}
286
287static ssize_t debug_write_mem(struct file *file, const char __user *userbuf,
288 size_t count, loff_t *ppos)
289{
290 struct omap_iommu *obj = file->private_data;
291 struct iovm_struct *area;
292 char *p, *buf;
293
294 count = min_t(size_t, count, PAGE_SIZE);
295
296 buf = (char *)__get_free_page(GFP_KERNEL);
297 if (!buf)
298 return -ENOMEM;
299 p = buf;
300
301 mutex_lock(&iommu_debug_lock);
302
303 if (copy_from_user(p, userbuf, count)) {
304 count = -EFAULT;
305 goto err_out;
306 }
307
308 area = omap_find_iovm_area(obj, (u32)ppos);
309 if (IS_ERR(area)) {
310 count = -EINVAL;
311 goto err_out;
312 }
313 memcpy(area->va, p, count);
314err_out:
315 mutex_unlock(&iommu_debug_lock);
316 free_page((unsigned long)buf);
317
318 return count;
319}
320
321static int debug_open_generic(struct inode *inode, struct file *file)
322{
323 file->private_data = inode->i_private;
324 return 0;
325}
326
327#define DEBUG_FOPS(name) \
328 static const struct file_operations debug_##name##_fops = { \
329 .open = debug_open_generic, \
330 .read = debug_read_##name, \
331 .write = debug_write_##name, \
332 .llseek = generic_file_llseek, \
333 };
334
335#define DEBUG_FOPS_RO(name) \
336 static const struct file_operations debug_##name##_fops = { \
337 .open = debug_open_generic, \
338 .read = debug_read_##name, \
339 .llseek = generic_file_llseek, \
340 };
341
342DEBUG_FOPS_RO(ver);
343DEBUG_FOPS_RO(regs);
344DEBUG_FOPS_RO(tlb);
345DEBUG_FOPS(pagetable);
346DEBUG_FOPS_RO(mmap);
347DEBUG_FOPS(mem);
348
349#define __DEBUG_ADD_FILE(attr, mode) \
350 { \
351 struct dentry *dent; \
352 dent = debugfs_create_file(#attr, mode, parent, \
353 obj, &debug_##attr##_fops); \
354 if (!dent) \
355 return -ENOMEM; \
356 }
357
358#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 600)
359#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 400)
360
361static int iommu_debug_register(struct device *dev, void *data)
362{
363 struct platform_device *pdev = to_platform_device(dev);
364 struct omap_iommu *obj = platform_get_drvdata(pdev);
365 struct dentry *d, *parent;
366
367 if (!obj || !obj->dev)
368 return -EINVAL;
369
370 d = debugfs_create_dir(obj->name, iommu_debug_root);
371 if (!d)
372 return -ENOMEM;
373 parent = d;
374
375 d = debugfs_create_u8("nr_tlb_entries", 400, parent,
376 (u8 *)&obj->nr_tlb_entries);
377 if (!d)
378 return -ENOMEM;
379
380 DEBUG_ADD_FILE_RO(ver);
381 DEBUG_ADD_FILE_RO(regs);
382 DEBUG_ADD_FILE_RO(tlb);
383 DEBUG_ADD_FILE(pagetable);
384 DEBUG_ADD_FILE_RO(mmap);
385 DEBUG_ADD_FILE(mem);
386
387 return 0;
388}
389
390static int __init iommu_debug_init(void)
391{
392 struct dentry *d;
393 int err;
394
395 d = debugfs_create_dir("iommu", NULL);
396 if (!d)
397 return -ENOMEM;
398 iommu_debug_root = d;
399
400 err = omap_foreach_iommu_device(d, iommu_debug_register);
401 if (err)
402 goto err_out;
403 return 0;
404
405err_out:
406 debugfs_remove_recursive(iommu_debug_root);
407 return err;
408}
409module_init(iommu_debug_init)
410
411static void __exit iommu_debugfs_exit(void)
412{
413 debugfs_remove_recursive(iommu_debug_root);
414}
415module_exit(iommu_debugfs_exit)
416
417MODULE_DESCRIPTION("omap iommu: debugfs interface");
418MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
419MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
new file mode 100644
index 000000000000..8f32b2bf7587
--- /dev/null
+++ b/drivers/iommu/omap-iommu.c
@@ -0,0 +1,1245 @@
1/*
2 * omap iommu: tlb and pagetable primitives
3 *
4 * Copyright (C) 2008-2010 Nokia Corporation
5 *
6 * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
7 * Paul Mundt and Toshihiro Kobayashi
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/err.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/interrupt.h>
18#include <linux/ioport.h>
19#include <linux/clk.h>
20#include <linux/platform_device.h>
21#include <linux/iommu.h>
22#include <linux/mutex.h>
23#include <linux/spinlock.h>
24
25#include <asm/cacheflush.h>
26
27#include <plat/iommu.h>
28
29#include <plat/iopgtable.h>
30
31#define for_each_iotlb_cr(obj, n, __i, cr) \
32 for (__i = 0; \
33 (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true); \
34 __i++)
35
36/**
37 * struct omap_iommu_domain - omap iommu domain
38 * @pgtable: the page table
39 * @iommu_dev: an omap iommu device attached to this domain. only a single
40 * iommu device can be attached for now.
41 * @lock: domain lock, should be taken when attaching/detaching
42 */
43struct omap_iommu_domain {
44 u32 *pgtable;
45 struct omap_iommu *iommu_dev;
46 spinlock_t lock;
47};
48
49/* accommodate the difference between omap1 and omap2/3 */
50static const struct iommu_functions *arch_iommu;
51
52static struct platform_driver omap_iommu_driver;
53static struct kmem_cache *iopte_cachep;
54
55/**
56 * omap_install_iommu_arch - Install archtecure specific iommu functions
57 * @ops: a pointer to architecture specific iommu functions
58 *
59 * There are several kind of iommu algorithm(tlb, pagetable) among
60 * omap series. This interface installs such an iommu algorighm.
61 **/
62int omap_install_iommu_arch(const struct iommu_functions *ops)
63{
64 if (arch_iommu)
65 return -EBUSY;
66
67 arch_iommu = ops;
68 return 0;
69}
70EXPORT_SYMBOL_GPL(omap_install_iommu_arch);
71
72/**
73 * omap_uninstall_iommu_arch - Uninstall archtecure specific iommu functions
74 * @ops: a pointer to architecture specific iommu functions
75 *
76 * This interface uninstalls the iommu algorighm installed previously.
77 **/
78void omap_uninstall_iommu_arch(const struct iommu_functions *ops)
79{
80 if (arch_iommu != ops)
81 pr_err("%s: not your arch\n", __func__);
82
83 arch_iommu = NULL;
84}
85EXPORT_SYMBOL_GPL(omap_uninstall_iommu_arch);
86
87/**
88 * omap_iommu_save_ctx - Save registers for pm off-mode support
89 * @obj: target iommu
90 **/
91void omap_iommu_save_ctx(struct omap_iommu *obj)
92{
93 arch_iommu->save_ctx(obj);
94}
95EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
96
97/**
98 * omap_iommu_restore_ctx - Restore registers for pm off-mode support
99 * @obj: target iommu
100 **/
101void omap_iommu_restore_ctx(struct omap_iommu *obj)
102{
103 arch_iommu->restore_ctx(obj);
104}
105EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
106
107/**
108 * omap_iommu_arch_version - Return running iommu arch version
109 **/
110u32 omap_iommu_arch_version(void)
111{
112 return arch_iommu->version;
113}
114EXPORT_SYMBOL_GPL(omap_iommu_arch_version);
115
116static int iommu_enable(struct omap_iommu *obj)
117{
118 int err;
119
120 if (!obj)
121 return -EINVAL;
122
123 if (!arch_iommu)
124 return -ENODEV;
125
126 clk_enable(obj->clk);
127
128 err = arch_iommu->enable(obj);
129
130 clk_disable(obj->clk);
131 return err;
132}
133
134static void iommu_disable(struct omap_iommu *obj)
135{
136 if (!obj)
137 return;
138
139 clk_enable(obj->clk);
140
141 arch_iommu->disable(obj);
142
143 clk_disable(obj->clk);
144}
145
146/*
147 * TLB operations
148 */
149void omap_iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e)
150{
151 BUG_ON(!cr || !e);
152
153 arch_iommu->cr_to_e(cr, e);
154}
155EXPORT_SYMBOL_GPL(omap_iotlb_cr_to_e);
156
157static inline int iotlb_cr_valid(struct cr_regs *cr)
158{
159 if (!cr)
160 return -EINVAL;
161
162 return arch_iommu->cr_valid(cr);
163}
164
165static inline struct cr_regs *iotlb_alloc_cr(struct omap_iommu *obj,
166 struct iotlb_entry *e)
167{
168 if (!e)
169 return NULL;
170
171 return arch_iommu->alloc_cr(obj, e);
172}
173
174static u32 iotlb_cr_to_virt(struct cr_regs *cr)
175{
176 return arch_iommu->cr_to_virt(cr);
177}
178
179static u32 get_iopte_attr(struct iotlb_entry *e)
180{
181 return arch_iommu->get_pte_attr(e);
182}
183
184static u32 iommu_report_fault(struct omap_iommu *obj, u32 *da)
185{
186 return arch_iommu->fault_isr(obj, da);
187}
188
189static void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l)
190{
191 u32 val;
192
193 val = iommu_read_reg(obj, MMU_LOCK);
194
195 l->base = MMU_LOCK_BASE(val);
196 l->vict = MMU_LOCK_VICT(val);
197
198}
199
200static void iotlb_lock_set(struct omap_iommu *obj, struct iotlb_lock *l)
201{
202 u32 val;
203
204 val = (l->base << MMU_LOCK_BASE_SHIFT);
205 val |= (l->vict << MMU_LOCK_VICT_SHIFT);
206
207 iommu_write_reg(obj, val, MMU_LOCK);
208}
209
210static void iotlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr)
211{
212 arch_iommu->tlb_read_cr(obj, cr);
213}
214
215static void iotlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr)
216{
217 arch_iommu->tlb_load_cr(obj, cr);
218
219 iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
220 iommu_write_reg(obj, 1, MMU_LD_TLB);
221}
222
223/**
224 * iotlb_dump_cr - Dump an iommu tlb entry into buf
225 * @obj: target iommu
226 * @cr: contents of cam and ram register
227 * @buf: output buffer
228 **/
229static inline ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
230 char *buf)
231{
232 BUG_ON(!cr || !buf);
233
234 return arch_iommu->dump_cr(obj, cr, buf);
235}
236
237/* only used in iotlb iteration for-loop */
238static struct cr_regs __iotlb_read_cr(struct omap_iommu *obj, int n)
239{
240 struct cr_regs cr;
241 struct iotlb_lock l;
242
243 iotlb_lock_get(obj, &l);
244 l.vict = n;
245 iotlb_lock_set(obj, &l);
246 iotlb_read_cr(obj, &cr);
247
248 return cr;
249}
250
251/**
252 * load_iotlb_entry - Set an iommu tlb entry
253 * @obj: target iommu
254 * @e: an iommu tlb entry info
255 **/
256#ifdef PREFETCH_IOTLB
257static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
258{
259 int err = 0;
260 struct iotlb_lock l;
261 struct cr_regs *cr;
262
263 if (!obj || !obj->nr_tlb_entries || !e)
264 return -EINVAL;
265
266 clk_enable(obj->clk);
267
268 iotlb_lock_get(obj, &l);
269 if (l.base == obj->nr_tlb_entries) {
270 dev_warn(obj->dev, "%s: preserve entries full\n", __func__);
271 err = -EBUSY;
272 goto out;
273 }
274 if (!e->prsvd) {
275 int i;
276 struct cr_regs tmp;
277
278 for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, tmp)
279 if (!iotlb_cr_valid(&tmp))
280 break;
281
282 if (i == obj->nr_tlb_entries) {
283 dev_dbg(obj->dev, "%s: full: no entry\n", __func__);
284 err = -EBUSY;
285 goto out;
286 }
287
288 iotlb_lock_get(obj, &l);
289 } else {
290 l.vict = l.base;
291 iotlb_lock_set(obj, &l);
292 }
293
294 cr = iotlb_alloc_cr(obj, e);
295 if (IS_ERR(cr)) {
296 clk_disable(obj->clk);
297 return PTR_ERR(cr);
298 }
299
300 iotlb_load_cr(obj, cr);
301 kfree(cr);
302
303 if (e->prsvd)
304 l.base++;
305 /* increment victim for next tlb load */
306 if (++l.vict == obj->nr_tlb_entries)
307 l.vict = l.base;
308 iotlb_lock_set(obj, &l);
309out:
310 clk_disable(obj->clk);
311 return err;
312}
313
314#else /* !PREFETCH_IOTLB */
315
316static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
317{
318 return 0;
319}
320
321#endif /* !PREFETCH_IOTLB */
322
323static int prefetch_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
324{
325 return load_iotlb_entry(obj, e);
326}
327
328/**
329 * flush_iotlb_page - Clear an iommu tlb entry
330 * @obj: target iommu
331 * @da: iommu device virtual address
332 *
333 * Clear an iommu tlb entry which includes 'da' address.
334 **/
335static void flush_iotlb_page(struct omap_iommu *obj, u32 da)
336{
337 int i;
338 struct cr_regs cr;
339
340 clk_enable(obj->clk);
341
342 for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) {
343 u32 start;
344 size_t bytes;
345
346 if (!iotlb_cr_valid(&cr))
347 continue;
348
349 start = iotlb_cr_to_virt(&cr);
350 bytes = iopgsz_to_bytes(cr.cam & 3);
351
352 if ((start <= da) && (da < start + bytes)) {
353 dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n",
354 __func__, start, da, bytes);
355 iotlb_load_cr(obj, &cr);
356 iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
357 }
358 }
359 clk_disable(obj->clk);
360
361 if (i == obj->nr_tlb_entries)
362 dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da);
363}
364
365/**
366 * flush_iotlb_all - Clear all iommu tlb entries
367 * @obj: target iommu
368 **/
369static void flush_iotlb_all(struct omap_iommu *obj)
370{
371 struct iotlb_lock l;
372
373 clk_enable(obj->clk);
374
375 l.base = 0;
376 l.vict = 0;
377 iotlb_lock_set(obj, &l);
378
379 iommu_write_reg(obj, 1, MMU_GFLUSH);
380
381 clk_disable(obj->clk);
382}
383
384#if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
385
386ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes)
387{
388 if (!obj || !buf)
389 return -EINVAL;
390
391 clk_enable(obj->clk);
392
393 bytes = arch_iommu->dump_ctx(obj, buf, bytes);
394
395 clk_disable(obj->clk);
396
397 return bytes;
398}
399EXPORT_SYMBOL_GPL(omap_iommu_dump_ctx);
400
401static int
402__dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
403{
404 int i;
405 struct iotlb_lock saved;
406 struct cr_regs tmp;
407 struct cr_regs *p = crs;
408
409 clk_enable(obj->clk);
410 iotlb_lock_get(obj, &saved);
411
412 for_each_iotlb_cr(obj, num, i, tmp) {
413 if (!iotlb_cr_valid(&tmp))
414 continue;
415 *p++ = tmp;
416 }
417
418 iotlb_lock_set(obj, &saved);
419 clk_disable(obj->clk);
420
421 return p - crs;
422}
423
424/**
425 * omap_dump_tlb_entries - dump cr arrays to given buffer
426 * @obj: target iommu
427 * @buf: output buffer
428 **/
429size_t omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t bytes)
430{
431 int i, num;
432 struct cr_regs *cr;
433 char *p = buf;
434
435 num = bytes / sizeof(*cr);
436 num = min(obj->nr_tlb_entries, num);
437
438 cr = kcalloc(num, sizeof(*cr), GFP_KERNEL);
439 if (!cr)
440 return 0;
441
442 num = __dump_tlb_entries(obj, cr, num);
443 for (i = 0; i < num; i++)
444 p += iotlb_dump_cr(obj, cr + i, p);
445 kfree(cr);
446
447 return p - buf;
448}
449EXPORT_SYMBOL_GPL(omap_dump_tlb_entries);
450
451int omap_foreach_iommu_device(void *data, int (*fn)(struct device *, void *))
452{
453 return driver_for_each_device(&omap_iommu_driver.driver,
454 NULL, data, fn);
455}
456EXPORT_SYMBOL_GPL(omap_foreach_iommu_device);
457
458#endif /* CONFIG_OMAP_IOMMU_DEBUG_MODULE */
459
460/*
461 * H/W pagetable operations
462 */
463static void flush_iopgd_range(u32 *first, u32 *last)
464{
465 /* FIXME: L2 cache should be taken care of if it exists */
466 do {
467 asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pgd"
468 : : "r" (first));
469 first += L1_CACHE_BYTES / sizeof(*first);
470 } while (first <= last);
471}
472
473static void flush_iopte_range(u32 *first, u32 *last)
474{
475 /* FIXME: L2 cache should be taken care of if it exists */
476 do {
477 asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pte"
478 : : "r" (first));
479 first += L1_CACHE_BYTES / sizeof(*first);
480 } while (first <= last);
481}
482
483static void iopte_free(u32 *iopte)
484{
485 /* Note: freed iopte's must be clean ready for re-use */
486 kmem_cache_free(iopte_cachep, iopte);
487}
488
489static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da)
490{
491 u32 *iopte;
492
493 /* a table has already existed */
494 if (*iopgd)
495 goto pte_ready;
496
497 /*
498 * do the allocation outside the page table lock
499 */
500 spin_unlock(&obj->page_table_lock);
501 iopte = kmem_cache_zalloc(iopte_cachep, GFP_KERNEL);
502 spin_lock(&obj->page_table_lock);
503
504 if (!*iopgd) {
505 if (!iopte)
506 return ERR_PTR(-ENOMEM);
507
508 *iopgd = virt_to_phys(iopte) | IOPGD_TABLE;
509 flush_iopgd_range(iopgd, iopgd);
510
511 dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte);
512 } else {
513 /* We raced, free the reduniovant table */
514 iopte_free(iopte);
515 }
516
517pte_ready:
518 iopte = iopte_offset(iopgd, da);
519
520 dev_vdbg(obj->dev,
521 "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n",
522 __func__, da, iopgd, *iopgd, iopte, *iopte);
523
524 return iopte;
525}
526
527static int iopgd_alloc_section(struct omap_iommu *obj, u32 da, u32 pa, u32 prot)
528{
529 u32 *iopgd = iopgd_offset(obj, da);
530
531 if ((da | pa) & ~IOSECTION_MASK) {
532 dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
533 __func__, da, pa, IOSECTION_SIZE);
534 return -EINVAL;
535 }
536
537 *iopgd = (pa & IOSECTION_MASK) | prot | IOPGD_SECTION;
538 flush_iopgd_range(iopgd, iopgd);
539 return 0;
540}
541
542static int iopgd_alloc_super(struct omap_iommu *obj, u32 da, u32 pa, u32 prot)
543{
544 u32 *iopgd = iopgd_offset(obj, da);
545 int i;
546
547 if ((da | pa) & ~IOSUPER_MASK) {
548 dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
549 __func__, da, pa, IOSUPER_SIZE);
550 return -EINVAL;
551 }
552
553 for (i = 0; i < 16; i++)
554 *(iopgd + i) = (pa & IOSUPER_MASK) | prot | IOPGD_SUPER;
555 flush_iopgd_range(iopgd, iopgd + 15);
556 return 0;
557}
558
559static int iopte_alloc_page(struct omap_iommu *obj, u32 da, u32 pa, u32 prot)
560{
561 u32 *iopgd = iopgd_offset(obj, da);
562 u32 *iopte = iopte_alloc(obj, iopgd, da);
563
564 if (IS_ERR(iopte))
565 return PTR_ERR(iopte);
566
567 *iopte = (pa & IOPAGE_MASK) | prot | IOPTE_SMALL;
568 flush_iopte_range(iopte, iopte);
569
570 dev_vdbg(obj->dev, "%s: da:%08x pa:%08x pte:%p *pte:%08x\n",
571 __func__, da, pa, iopte, *iopte);
572
573 return 0;
574}
575
576static int iopte_alloc_large(struct omap_iommu *obj, u32 da, u32 pa, u32 prot)
577{
578 u32 *iopgd = iopgd_offset(obj, da);
579 u32 *iopte = iopte_alloc(obj, iopgd, da);
580 int i;
581
582 if ((da | pa) & ~IOLARGE_MASK) {
583 dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
584 __func__, da, pa, IOLARGE_SIZE);
585 return -EINVAL;
586 }
587
588 if (IS_ERR(iopte))
589 return PTR_ERR(iopte);
590
591 for (i = 0; i < 16; i++)
592 *(iopte + i) = (pa & IOLARGE_MASK) | prot | IOPTE_LARGE;
593 flush_iopte_range(iopte, iopte + 15);
594 return 0;
595}
596
597static int
598iopgtable_store_entry_core(struct omap_iommu *obj, struct iotlb_entry *e)
599{
600 int (*fn)(struct omap_iommu *, u32, u32, u32);
601 u32 prot;
602 int err;
603
604 if (!obj || !e)
605 return -EINVAL;
606
607 switch (e->pgsz) {
608 case MMU_CAM_PGSZ_16M:
609 fn = iopgd_alloc_super;
610 break;
611 case MMU_CAM_PGSZ_1M:
612 fn = iopgd_alloc_section;
613 break;
614 case MMU_CAM_PGSZ_64K:
615 fn = iopte_alloc_large;
616 break;
617 case MMU_CAM_PGSZ_4K:
618 fn = iopte_alloc_page;
619 break;
620 default:
621 fn = NULL;
622 BUG();
623 break;
624 }
625
626 prot = get_iopte_attr(e);
627
628 spin_lock(&obj->page_table_lock);
629 err = fn(obj, e->da, e->pa, prot);
630 spin_unlock(&obj->page_table_lock);
631
632 return err;
633}
634
635/**
636 * omap_iopgtable_store_entry - Make an iommu pte entry
637 * @obj: target iommu
638 * @e: an iommu tlb entry info
639 **/
640int omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e)
641{
642 int err;
643
644 flush_iotlb_page(obj, e->da);
645 err = iopgtable_store_entry_core(obj, e);
646 if (!err)
647 prefetch_iotlb_entry(obj, e);
648 return err;
649}
650EXPORT_SYMBOL_GPL(omap_iopgtable_store_entry);
651
652/**
653 * iopgtable_lookup_entry - Lookup an iommu pte entry
654 * @obj: target iommu
655 * @da: iommu device virtual address
656 * @ppgd: iommu pgd entry pointer to be returned
657 * @ppte: iommu pte entry pointer to be returned
658 **/
659static void
660iopgtable_lookup_entry(struct omap_iommu *obj, u32 da, u32 **ppgd, u32 **ppte)
661{
662 u32 *iopgd, *iopte = NULL;
663
664 iopgd = iopgd_offset(obj, da);
665 if (!*iopgd)
666 goto out;
667
668 if (iopgd_is_table(*iopgd))
669 iopte = iopte_offset(iopgd, da);
670out:
671 *ppgd = iopgd;
672 *ppte = iopte;
673}
674
675static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da)
676{
677 size_t bytes;
678 u32 *iopgd = iopgd_offset(obj, da);
679 int nent = 1;
680
681 if (!*iopgd)
682 return 0;
683
684 if (iopgd_is_table(*iopgd)) {
685 int i;
686 u32 *iopte = iopte_offset(iopgd, da);
687
688 bytes = IOPTE_SIZE;
689 if (*iopte & IOPTE_LARGE) {
690 nent *= 16;
691 /* rewind to the 1st entry */
692 iopte = iopte_offset(iopgd, (da & IOLARGE_MASK));
693 }
694 bytes *= nent;
695 memset(iopte, 0, nent * sizeof(*iopte));
696 flush_iopte_range(iopte, iopte + (nent - 1) * sizeof(*iopte));
697
698 /*
699 * do table walk to check if this table is necessary or not
700 */
701 iopte = iopte_offset(iopgd, 0);
702 for (i = 0; i < PTRS_PER_IOPTE; i++)
703 if (iopte[i])
704 goto out;
705
706 iopte_free(iopte);
707 nent = 1; /* for the next L1 entry */
708 } else {
709 bytes = IOPGD_SIZE;
710 if ((*iopgd & IOPGD_SUPER) == IOPGD_SUPER) {
711 nent *= 16;
712 /* rewind to the 1st entry */
713 iopgd = iopgd_offset(obj, (da & IOSUPER_MASK));
714 }
715 bytes *= nent;
716 }
717 memset(iopgd, 0, nent * sizeof(*iopgd));
718 flush_iopgd_range(iopgd, iopgd + (nent - 1) * sizeof(*iopgd));
719out:
720 return bytes;
721}
722
723/**
724 * iopgtable_clear_entry - Remove an iommu pte entry
725 * @obj: target iommu
726 * @da: iommu device virtual address
727 **/
728static size_t iopgtable_clear_entry(struct omap_iommu *obj, u32 da)
729{
730 size_t bytes;
731
732 spin_lock(&obj->page_table_lock);
733
734 bytes = iopgtable_clear_entry_core(obj, da);
735 flush_iotlb_page(obj, da);
736
737 spin_unlock(&obj->page_table_lock);
738
739 return bytes;
740}
741
742static void iopgtable_clear_entry_all(struct omap_iommu *obj)
743{
744 int i;
745
746 spin_lock(&obj->page_table_lock);
747
748 for (i = 0; i < PTRS_PER_IOPGD; i++) {
749 u32 da;
750 u32 *iopgd;
751
752 da = i << IOPGD_SHIFT;
753 iopgd = iopgd_offset(obj, da);
754
755 if (!*iopgd)
756 continue;
757
758 if (iopgd_is_table(*iopgd))
759 iopte_free(iopte_offset(iopgd, 0));
760
761 *iopgd = 0;
762 flush_iopgd_range(iopgd, iopgd);
763 }
764
765 flush_iotlb_all(obj);
766
767 spin_unlock(&obj->page_table_lock);
768}
769
770/*
771 * Device IOMMU generic operations
772 */
773static irqreturn_t iommu_fault_handler(int irq, void *data)
774{
775 u32 da, errs;
776 u32 *iopgd, *iopte;
777 struct omap_iommu *obj = data;
778 struct iommu_domain *domain = obj->domain;
779
780 if (!obj->refcount)
781 return IRQ_NONE;
782
783 clk_enable(obj->clk);
784 errs = iommu_report_fault(obj, &da);
785 clk_disable(obj->clk);
786 if (errs == 0)
787 return IRQ_HANDLED;
788
789 /* Fault callback or TLB/PTE Dynamic loading */
790 if (!report_iommu_fault(domain, obj->dev, da, 0))
791 return IRQ_HANDLED;
792
793 iommu_disable(obj);
794
795 iopgd = iopgd_offset(obj, da);
796
797 if (!iopgd_is_table(*iopgd)) {
798 dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p "
799 "*pgd:px%08x\n", obj->name, errs, da, iopgd, *iopgd);
800 return IRQ_NONE;
801 }
802
803 iopte = iopte_offset(iopgd, da);
804
805 dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:0x%08x "
806 "pte:0x%p *pte:0x%08x\n", obj->name, errs, da, iopgd, *iopgd,
807 iopte, *iopte);
808
809 return IRQ_NONE;
810}
811
812static int device_match_by_alias(struct device *dev, void *data)
813{
814 struct omap_iommu *obj = to_iommu(dev);
815 const char *name = data;
816
817 pr_debug("%s: %s %s\n", __func__, obj->name, name);
818
819 return strcmp(obj->name, name) == 0;
820}
821
822/**
823 * omap_find_iommu_device() - find an omap iommu device by name
824 * @name: name of the iommu device
825 *
826 * The generic iommu API requires the caller to provide the device
827 * he wishes to attach to a certain iommu domain.
828 *
829 * Drivers generally should not bother with this as it should just
830 * be taken care of by the DMA-API using dev_archdata.
831 *
832 * This function is provided as an interim solution until the latter
833 * materializes, and omap3isp is fully migrated to the DMA-API.
834 */
835struct device *omap_find_iommu_device(const char *name)
836{
837 return driver_find_device(&omap_iommu_driver.driver, NULL,
838 (void *)name,
839 device_match_by_alias);
840}
841EXPORT_SYMBOL_GPL(omap_find_iommu_device);
842
843/**
844 * omap_iommu_attach() - attach iommu device to an iommu domain
845 * @dev: target omap iommu device
846 * @iopgd: page table
847 **/
848static struct omap_iommu *omap_iommu_attach(struct device *dev, u32 *iopgd)
849{
850 int err = -ENOMEM;
851 struct omap_iommu *obj = to_iommu(dev);
852
853 spin_lock(&obj->iommu_lock);
854
855 /* an iommu device can only be attached once */
856 if (++obj->refcount > 1) {
857 dev_err(dev, "%s: already attached!\n", obj->name);
858 err = -EBUSY;
859 goto err_enable;
860 }
861
862 obj->iopgd = iopgd;
863 err = iommu_enable(obj);
864 if (err)
865 goto err_enable;
866 flush_iotlb_all(obj);
867
868 if (!try_module_get(obj->owner))
869 goto err_module;
870
871 spin_unlock(&obj->iommu_lock);
872
873 dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
874 return obj;
875
876err_module:
877 if (obj->refcount == 1)
878 iommu_disable(obj);
879err_enable:
880 obj->refcount--;
881 spin_unlock(&obj->iommu_lock);
882 return ERR_PTR(err);
883}
884
885/**
886 * omap_iommu_detach - release iommu device
887 * @obj: target iommu
888 **/
889static void omap_iommu_detach(struct omap_iommu *obj)
890{
891 if (!obj || IS_ERR(obj))
892 return;
893
894 spin_lock(&obj->iommu_lock);
895
896 if (--obj->refcount == 0)
897 iommu_disable(obj);
898
899 module_put(obj->owner);
900
901 obj->iopgd = NULL;
902
903 spin_unlock(&obj->iommu_lock);
904
905 dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
906}
907
908/*
909 * OMAP Device MMU(IOMMU) detection
910 */
911static int __devinit omap_iommu_probe(struct platform_device *pdev)
912{
913 int err = -ENODEV;
914 int irq;
915 struct omap_iommu *obj;
916 struct resource *res;
917 struct iommu_platform_data *pdata = pdev->dev.platform_data;
918
919 if (pdev->num_resources != 2)
920 return -EINVAL;
921
922 obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
923 if (!obj)
924 return -ENOMEM;
925
926 obj->clk = clk_get(&pdev->dev, pdata->clk_name);
927 if (IS_ERR(obj->clk))
928 goto err_clk;
929
930 obj->nr_tlb_entries = pdata->nr_tlb_entries;
931 obj->name = pdata->name;
932 obj->dev = &pdev->dev;
933 obj->ctx = (void *)obj + sizeof(*obj);
934 obj->da_start = pdata->da_start;
935 obj->da_end = pdata->da_end;
936
937 spin_lock_init(&obj->iommu_lock);
938 mutex_init(&obj->mmap_lock);
939 spin_lock_init(&obj->page_table_lock);
940 INIT_LIST_HEAD(&obj->mmap);
941
942 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
943 if (!res) {
944 err = -ENODEV;
945 goto err_mem;
946 }
947
948 res = request_mem_region(res->start, resource_size(res),
949 dev_name(&pdev->dev));
950 if (!res) {
951 err = -EIO;
952 goto err_mem;
953 }
954
955 obj->regbase = ioremap(res->start, resource_size(res));
956 if (!obj->regbase) {
957 err = -ENOMEM;
958 goto err_ioremap;
959 }
960
961 irq = platform_get_irq(pdev, 0);
962 if (irq < 0) {
963 err = -ENODEV;
964 goto err_irq;
965 }
966 err = request_irq(irq, iommu_fault_handler, IRQF_SHARED,
967 dev_name(&pdev->dev), obj);
968 if (err < 0)
969 goto err_irq;
970 platform_set_drvdata(pdev, obj);
971
972 dev_info(&pdev->dev, "%s registered\n", obj->name);
973 return 0;
974
975err_irq:
976 iounmap(obj->regbase);
977err_ioremap:
978 release_mem_region(res->start, resource_size(res));
979err_mem:
980 clk_put(obj->clk);
981err_clk:
982 kfree(obj);
983 return err;
984}
985
986static int __devexit omap_iommu_remove(struct platform_device *pdev)
987{
988 int irq;
989 struct resource *res;
990 struct omap_iommu *obj = platform_get_drvdata(pdev);
991
992 platform_set_drvdata(pdev, NULL);
993
994 iopgtable_clear_entry_all(obj);
995
996 irq = platform_get_irq(pdev, 0);
997 free_irq(irq, obj);
998 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
999 release_mem_region(res->start, resource_size(res));
1000 iounmap(obj->regbase);
1001
1002 clk_put(obj->clk);
1003 dev_info(&pdev->dev, "%s removed\n", obj->name);
1004 kfree(obj);
1005 return 0;
1006}
1007
1008static struct platform_driver omap_iommu_driver = {
1009 .probe = omap_iommu_probe,
1010 .remove = __devexit_p(omap_iommu_remove),
1011 .driver = {
1012 .name = "omap-iommu",
1013 },
1014};
1015
1016static void iopte_cachep_ctor(void *iopte)
1017{
1018 clean_dcache_area(iopte, IOPTE_TABLE_SIZE);
1019}
1020
1021static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
1022 phys_addr_t pa, int order, int prot)
1023{
1024 struct omap_iommu_domain *omap_domain = domain->priv;
1025 struct omap_iommu *oiommu = omap_domain->iommu_dev;
1026 struct device *dev = oiommu->dev;
1027 size_t bytes = PAGE_SIZE << order;
1028 struct iotlb_entry e;
1029 int omap_pgsz;
1030 u32 ret, flags;
1031
1032 /* we only support mapping a single iommu page for now */
1033 omap_pgsz = bytes_to_iopgsz(bytes);
1034 if (omap_pgsz < 0) {
1035 dev_err(dev, "invalid size to map: %d\n", bytes);
1036 return -EINVAL;
1037 }
1038
1039 dev_dbg(dev, "mapping da 0x%lx to pa 0x%x size 0x%x\n", da, pa, bytes);
1040
1041 flags = omap_pgsz | prot;
1042
1043 iotlb_init_entry(&e, da, pa, flags);
1044
1045 ret = omap_iopgtable_store_entry(oiommu, &e);
1046 if (ret)
1047 dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret);
1048
1049 return ret;
1050}
1051
1052static int omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
1053 int order)
1054{
1055 struct omap_iommu_domain *omap_domain = domain->priv;
1056 struct omap_iommu *oiommu = omap_domain->iommu_dev;
1057 struct device *dev = oiommu->dev;
1058 size_t unmap_size;
1059
1060 dev_dbg(dev, "unmapping da 0x%lx order %d\n", da, order);
1061
1062 unmap_size = iopgtable_clear_entry(oiommu, da);
1063
1064 return unmap_size ? get_order(unmap_size) : -EINVAL;
1065}
1066
1067static int
1068omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
1069{
1070 struct omap_iommu_domain *omap_domain = domain->priv;
1071 struct omap_iommu *oiommu;
1072 int ret = 0;
1073
1074 spin_lock(&omap_domain->lock);
1075
1076 /* only a single device is supported per domain for now */
1077 if (omap_domain->iommu_dev) {
1078 dev_err(dev, "iommu domain is already attached\n");
1079 ret = -EBUSY;
1080 goto out;
1081 }
1082
1083 /* get a handle to and enable the omap iommu */
1084 oiommu = omap_iommu_attach(dev, omap_domain->pgtable);
1085 if (IS_ERR(oiommu)) {
1086 ret = PTR_ERR(oiommu);
1087 dev_err(dev, "can't get omap iommu: %d\n", ret);
1088 goto out;
1089 }
1090
1091 omap_domain->iommu_dev = oiommu;
1092 oiommu->domain = domain;
1093
1094out:
1095 spin_unlock(&omap_domain->lock);
1096 return ret;
1097}
1098
1099static void omap_iommu_detach_dev(struct iommu_domain *domain,
1100 struct device *dev)
1101{
1102 struct omap_iommu_domain *omap_domain = domain->priv;
1103 struct omap_iommu *oiommu = to_iommu(dev);
1104
1105 spin_lock(&omap_domain->lock);
1106
1107 /* only a single device is supported per domain for now */
1108 if (omap_domain->iommu_dev != oiommu) {
1109 dev_err(dev, "invalid iommu device\n");
1110 goto out;
1111 }
1112
1113 iopgtable_clear_entry_all(oiommu);
1114
1115 omap_iommu_detach(oiommu);
1116
1117 omap_domain->iommu_dev = NULL;
1118
1119out:
1120 spin_unlock(&omap_domain->lock);
1121}
1122
1123static int omap_iommu_domain_init(struct iommu_domain *domain)
1124{
1125 struct omap_iommu_domain *omap_domain;
1126
1127 omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
1128 if (!omap_domain) {
1129 pr_err("kzalloc failed\n");
1130 goto out;
1131 }
1132
1133 omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL);
1134 if (!omap_domain->pgtable) {
1135 pr_err("kzalloc failed\n");
1136 goto fail_nomem;
1137 }
1138
1139 /*
1140 * should never fail, but please keep this around to ensure
1141 * we keep the hardware happy
1142 */
1143 BUG_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE));
1144
1145 clean_dcache_area(omap_domain->pgtable, IOPGD_TABLE_SIZE);
1146 spin_lock_init(&omap_domain->lock);
1147
1148 domain->priv = omap_domain;
1149
1150 return 0;
1151
1152fail_nomem:
1153 kfree(omap_domain);
1154out:
1155 return -ENOMEM;
1156}
1157
1158/* assume device was already detached */
1159static void omap_iommu_domain_destroy(struct iommu_domain *domain)
1160{
1161 struct omap_iommu_domain *omap_domain = domain->priv;
1162
1163 domain->priv = NULL;
1164
1165 kfree(omap_domain->pgtable);
1166 kfree(omap_domain);
1167}
1168
1169static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
1170 unsigned long da)
1171{
1172 struct omap_iommu_domain *omap_domain = domain->priv;
1173 struct omap_iommu *oiommu = omap_domain->iommu_dev;
1174 struct device *dev = oiommu->dev;
1175 u32 *pgd, *pte;
1176 phys_addr_t ret = 0;
1177
1178 iopgtable_lookup_entry(oiommu, da, &pgd, &pte);
1179
1180 if (pte) {
1181 if (iopte_is_small(*pte))
1182 ret = omap_iommu_translate(*pte, da, IOPTE_MASK);
1183 else if (iopte_is_large(*pte))
1184 ret = omap_iommu_translate(*pte, da, IOLARGE_MASK);
1185 else
1186 dev_err(dev, "bogus pte 0x%x", *pte);
1187 } else {
1188 if (iopgd_is_section(*pgd))
1189 ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK);
1190 else if (iopgd_is_super(*pgd))
1191 ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK);
1192 else
1193 dev_err(dev, "bogus pgd 0x%x", *pgd);
1194 }
1195
1196 return ret;
1197}
1198
1199static int omap_iommu_domain_has_cap(struct iommu_domain *domain,
1200 unsigned long cap)
1201{
1202 return 0;
1203}
1204
1205static struct iommu_ops omap_iommu_ops = {
1206 .domain_init = omap_iommu_domain_init,
1207 .domain_destroy = omap_iommu_domain_destroy,
1208 .attach_dev = omap_iommu_attach_dev,
1209 .detach_dev = omap_iommu_detach_dev,
1210 .map = omap_iommu_map,
1211 .unmap = omap_iommu_unmap,
1212 .iova_to_phys = omap_iommu_iova_to_phys,
1213 .domain_has_cap = omap_iommu_domain_has_cap,
1214};
1215
1216static int __init omap_iommu_init(void)
1217{
1218 struct kmem_cache *p;
1219 const unsigned long flags = SLAB_HWCACHE_ALIGN;
1220 size_t align = 1 << 10; /* L2 pagetable alignement */
1221
1222 p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags,
1223 iopte_cachep_ctor);
1224 if (!p)
1225 return -ENOMEM;
1226 iopte_cachep = p;
1227
1228 bus_set_iommu(&platform_bus_type, &omap_iommu_ops);
1229
1230 return platform_driver_register(&omap_iommu_driver);
1231}
1232module_init(omap_iommu_init);
1233
1234static void __exit omap_iommu_exit(void)
1235{
1236 kmem_cache_destroy(iopte_cachep);
1237
1238 platform_driver_unregister(&omap_iommu_driver);
1239}
1240module_exit(omap_iommu_exit);
1241
1242MODULE_DESCRIPTION("omap iommu: tlb and pagetable primitives");
1243MODULE_ALIAS("platform:omap-iommu");
1244MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi");
1245MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
new file mode 100644
index 000000000000..46be456fcc00
--- /dev/null
+++ b/drivers/iommu/omap-iovmm.c
@@ -0,0 +1,743 @@
1/*
2 * omap iommu: simple virtual address space management
3 *
4 * Copyright (C) 2008-2009 Nokia Corporation
5 *
6 * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/err.h>
15#include <linux/slab.h>
16#include <linux/vmalloc.h>
17#include <linux/device.h>
18#include <linux/scatterlist.h>
19#include <linux/iommu.h>
20
21#include <asm/cacheflush.h>
22#include <asm/mach/map.h>
23
24#include <plat/iommu.h>
25#include <plat/iovmm.h>
26
27#include <plat/iopgtable.h>
28
29static struct kmem_cache *iovm_area_cachep;
30
31/* return the offset of the first scatterlist entry in a sg table */
32static unsigned int sgtable_offset(const struct sg_table *sgt)
33{
34 if (!sgt || !sgt->nents)
35 return 0;
36
37 return sgt->sgl->offset;
38}
39
40/* return total bytes of sg buffers */
41static size_t sgtable_len(const struct sg_table *sgt)
42{
43 unsigned int i, total = 0;
44 struct scatterlist *sg;
45
46 if (!sgt)
47 return 0;
48
49 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
50 size_t bytes;
51
52 bytes = sg->length + sg->offset;
53
54 if (!iopgsz_ok(bytes)) {
55 pr_err("%s: sg[%d] not iommu pagesize(%u %u)\n",
56 __func__, i, bytes, sg->offset);
57 return 0;
58 }
59
60 if (i && sg->offset) {
61 pr_err("%s: sg[%d] offset not allowed in internal "
62 "entries\n", __func__, i);
63 return 0;
64 }
65
66 total += bytes;
67 }
68
69 return total;
70}
71#define sgtable_ok(x) (!!sgtable_len(x))
72
73static unsigned max_alignment(u32 addr)
74{
75 int i;
76 unsigned pagesize[] = { SZ_16M, SZ_1M, SZ_64K, SZ_4K, };
77 for (i = 0; i < ARRAY_SIZE(pagesize) && addr & (pagesize[i] - 1); i++)
78 ;
79 return (i < ARRAY_SIZE(pagesize)) ? pagesize[i] : 0;
80}
81
82/*
83 * calculate the optimal number sg elements from total bytes based on
84 * iommu superpages
85 */
86static unsigned sgtable_nents(size_t bytes, u32 da, u32 pa)
87{
88 unsigned nr_entries = 0, ent_sz;
89
90 if (!IS_ALIGNED(bytes, PAGE_SIZE)) {
91 pr_err("%s: wrong size %08x\n", __func__, bytes);
92 return 0;
93 }
94
95 while (bytes) {
96 ent_sz = max_alignment(da | pa);
97 ent_sz = min_t(unsigned, ent_sz, iopgsz_max(bytes));
98 nr_entries++;
99 da += ent_sz;
100 pa += ent_sz;
101 bytes -= ent_sz;
102 }
103
104 return nr_entries;
105}
106
107/* allocate and initialize sg_table header(a kind of 'superblock') */
108static struct sg_table *sgtable_alloc(const size_t bytes, u32 flags,
109 u32 da, u32 pa)
110{
111 unsigned int nr_entries;
112 int err;
113 struct sg_table *sgt;
114
115 if (!bytes)
116 return ERR_PTR(-EINVAL);
117
118 if (!IS_ALIGNED(bytes, PAGE_SIZE))
119 return ERR_PTR(-EINVAL);
120
121 if (flags & IOVMF_LINEAR) {
122 nr_entries = sgtable_nents(bytes, da, pa);
123 if (!nr_entries)
124 return ERR_PTR(-EINVAL);
125 } else
126 nr_entries = bytes / PAGE_SIZE;
127
128 sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
129 if (!sgt)
130 return ERR_PTR(-ENOMEM);
131
132 err = sg_alloc_table(sgt, nr_entries, GFP_KERNEL);
133 if (err) {
134 kfree(sgt);
135 return ERR_PTR(err);
136 }
137
138 pr_debug("%s: sgt:%p(%d entries)\n", __func__, sgt, nr_entries);
139
140 return sgt;
141}
142
143/* free sg_table header(a kind of superblock) */
144static void sgtable_free(struct sg_table *sgt)
145{
146 if (!sgt)
147 return;
148
149 sg_free_table(sgt);
150 kfree(sgt);
151
152 pr_debug("%s: sgt:%p\n", __func__, sgt);
153}
154
155/* map 'sglist' to a contiguous mpu virtual area and return 'va' */
156static void *vmap_sg(const struct sg_table *sgt)
157{
158 u32 va;
159 size_t total;
160 unsigned int i;
161 struct scatterlist *sg;
162 struct vm_struct *new;
163 const struct mem_type *mtype;
164
165 mtype = get_mem_type(MT_DEVICE);
166 if (!mtype)
167 return ERR_PTR(-EINVAL);
168
169 total = sgtable_len(sgt);
170 if (!total)
171 return ERR_PTR(-EINVAL);
172
173 new = __get_vm_area(total, VM_IOREMAP, VMALLOC_START, VMALLOC_END);
174 if (!new)
175 return ERR_PTR(-ENOMEM);
176 va = (u32)new->addr;
177
178 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
179 size_t bytes;
180 u32 pa;
181 int err;
182
183 pa = sg_phys(sg) - sg->offset;
184 bytes = sg->length + sg->offset;
185
186 BUG_ON(bytes != PAGE_SIZE);
187
188 err = ioremap_page(va, pa, mtype);
189 if (err)
190 goto err_out;
191
192 va += bytes;
193 }
194
195 flush_cache_vmap((unsigned long)new->addr,
196 (unsigned long)(new->addr + total));
197 return new->addr;
198
199err_out:
200 WARN_ON(1); /* FIXME: cleanup some mpu mappings */
201 vunmap(new->addr);
202 return ERR_PTR(-EAGAIN);
203}
204
205static inline void vunmap_sg(const void *va)
206{
207 vunmap(va);
208}
209
210static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj,
211 const u32 da)
212{
213 struct iovm_struct *tmp;
214
215 list_for_each_entry(tmp, &obj->mmap, list) {
216 if ((da >= tmp->da_start) && (da < tmp->da_end)) {
217 size_t len;
218
219 len = tmp->da_end - tmp->da_start;
220
221 dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n",
222 __func__, tmp->da_start, da, tmp->da_end, len,
223 tmp->flags);
224
225 return tmp;
226 }
227 }
228
229 return NULL;
230}
231
232/**
233 * omap_find_iovm_area - find iovma which includes @da
234 * @da: iommu device virtual address
235 *
236 * Find the existing iovma starting at @da
237 */
238struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da)
239{
240 struct iovm_struct *area;
241
242 mutex_lock(&obj->mmap_lock);
243 area = __find_iovm_area(obj, da);
244 mutex_unlock(&obj->mmap_lock);
245
246 return area;
247}
248EXPORT_SYMBOL_GPL(omap_find_iovm_area);
249
250/*
251 * This finds the hole(area) which fits the requested address and len
252 * in iovmas mmap, and returns the new allocated iovma.
253 */
254static struct iovm_struct *alloc_iovm_area(struct omap_iommu *obj, u32 da,
255 size_t bytes, u32 flags)
256{
257 struct iovm_struct *new, *tmp;
258 u32 start, prev_end, alignment;
259
260 if (!obj || !bytes)
261 return ERR_PTR(-EINVAL);
262
263 start = da;
264 alignment = PAGE_SIZE;
265
266 if (~flags & IOVMF_DA_FIXED) {
267 /* Don't map address 0 */
268 start = obj->da_start ? obj->da_start : alignment;
269
270 if (flags & IOVMF_LINEAR)
271 alignment = iopgsz_max(bytes);
272 start = roundup(start, alignment);
273 } else if (start < obj->da_start || start > obj->da_end ||
274 obj->da_end - start < bytes) {
275 return ERR_PTR(-EINVAL);
276 }
277
278 tmp = NULL;
279 if (list_empty(&obj->mmap))
280 goto found;
281
282 prev_end = 0;
283 list_for_each_entry(tmp, &obj->mmap, list) {
284
285 if (prev_end > start)
286 break;
287
288 if (tmp->da_start > start && (tmp->da_start - start) >= bytes)
289 goto found;
290
291 if (tmp->da_end >= start && ~flags & IOVMF_DA_FIXED)
292 start = roundup(tmp->da_end + 1, alignment);
293
294 prev_end = tmp->da_end;
295 }
296
297 if ((start >= prev_end) && (obj->da_end - start >= bytes))
298 goto found;
299
300 dev_dbg(obj->dev, "%s: no space to fit %08x(%x) flags: %08x\n",
301 __func__, da, bytes, flags);
302
303 return ERR_PTR(-EINVAL);
304
305found:
306 new = kmem_cache_zalloc(iovm_area_cachep, GFP_KERNEL);
307 if (!new)
308 return ERR_PTR(-ENOMEM);
309
310 new->iommu = obj;
311 new->da_start = start;
312 new->da_end = start + bytes;
313 new->flags = flags;
314
315 /*
316 * keep ascending order of iovmas
317 */
318 if (tmp)
319 list_add_tail(&new->list, &tmp->list);
320 else
321 list_add(&new->list, &obj->mmap);
322
323 dev_dbg(obj->dev, "%s: found %08x-%08x-%08x(%x) %08x\n",
324 __func__, new->da_start, start, new->da_end, bytes, flags);
325
326 return new;
327}
328
329static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area)
330{
331 size_t bytes;
332
333 BUG_ON(!obj || !area);
334
335 bytes = area->da_end - area->da_start;
336
337 dev_dbg(obj->dev, "%s: %08x-%08x(%x) %08x\n",
338 __func__, area->da_start, area->da_end, bytes, area->flags);
339
340 list_del(&area->list);
341 kmem_cache_free(iovm_area_cachep, area);
342}
343
344/**
345 * omap_da_to_va - convert (d) to (v)
346 * @obj: objective iommu
347 * @da: iommu device virtual address
348 * @va: mpu virtual address
349 *
350 * Returns mpu virtual addr which corresponds to a given device virtual addr
351 */
352void *omap_da_to_va(struct omap_iommu *obj, u32 da)
353{
354 void *va = NULL;
355 struct iovm_struct *area;
356
357 mutex_lock(&obj->mmap_lock);
358
359 area = __find_iovm_area(obj, da);
360 if (!area) {
361 dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
362 goto out;
363 }
364 va = area->va;
365out:
366 mutex_unlock(&obj->mmap_lock);
367
368 return va;
369}
370EXPORT_SYMBOL_GPL(omap_da_to_va);
371
372static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va)
373{
374 unsigned int i;
375 struct scatterlist *sg;
376 void *va = _va;
377 void *va_end;
378
379 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
380 struct page *pg;
381 const size_t bytes = PAGE_SIZE;
382
383 /*
384 * iommu 'superpage' isn't supported with 'omap_iommu_vmalloc()'
385 */
386 pg = vmalloc_to_page(va);
387 BUG_ON(!pg);
388 sg_set_page(sg, pg, bytes, 0);
389
390 va += bytes;
391 }
392
393 va_end = _va + PAGE_SIZE * i;
394}
395
396static inline void sgtable_drain_vmalloc(struct sg_table *sgt)
397{
398 /*
399 * Actually this is not necessary at all, just exists for
400 * consistency of the code readability.
401 */
402 BUG_ON(!sgt);
403}
404
405/* create 'da' <-> 'pa' mapping from 'sgt' */
406static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
407 const struct sg_table *sgt, u32 flags)
408{
409 int err;
410 unsigned int i, j;
411 struct scatterlist *sg;
412 u32 da = new->da_start;
413 int order;
414
415 if (!domain || !sgt)
416 return -EINVAL;
417
418 BUG_ON(!sgtable_ok(sgt));
419
420 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
421 u32 pa;
422 size_t bytes;
423
424 pa = sg_phys(sg) - sg->offset;
425 bytes = sg->length + sg->offset;
426
427 flags &= ~IOVMF_PGSZ_MASK;
428
429 if (bytes_to_iopgsz(bytes) < 0)
430 goto err_out;
431
432 order = get_order(bytes);
433
434 pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
435 i, da, pa, bytes);
436
437 err = iommu_map(domain, da, pa, order, flags);
438 if (err)
439 goto err_out;
440
441 da += bytes;
442 }
443 return 0;
444
445err_out:
446 da = new->da_start;
447
448 for_each_sg(sgt->sgl, sg, i, j) {
449 size_t bytes;
450
451 bytes = sg->length + sg->offset;
452 order = get_order(bytes);
453
454 /* ignore failures.. we're already handling one */
455 iommu_unmap(domain, da, order);
456
457 da += bytes;
458 }
459 return err;
460}
461
462/* release 'da' <-> 'pa' mapping */
463static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
464 struct iovm_struct *area)
465{
466 u32 start;
467 size_t total = area->da_end - area->da_start;
468 const struct sg_table *sgt = area->sgt;
469 struct scatterlist *sg;
470 int i, err;
471
472 BUG_ON(!sgtable_ok(sgt));
473 BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
474
475 start = area->da_start;
476 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
477 size_t bytes;
478 int order;
479
480 bytes = sg->length + sg->offset;
481 order = get_order(bytes);
482
483 err = iommu_unmap(domain, start, order);
484 if (err < 0)
485 break;
486
487 dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
488 __func__, start, bytes, area->flags);
489
490 BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
491
492 total -= bytes;
493 start += bytes;
494 }
495 BUG_ON(total);
496}
497
498/* template function for all unmapping */
499static struct sg_table *unmap_vm_area(struct iommu_domain *domain,
500 struct omap_iommu *obj, const u32 da,
501 void (*fn)(const void *), u32 flags)
502{
503 struct sg_table *sgt = NULL;
504 struct iovm_struct *area;
505
506 if (!IS_ALIGNED(da, PAGE_SIZE)) {
507 dev_err(obj->dev, "%s: alignment err(%08x)\n", __func__, da);
508 return NULL;
509 }
510
511 mutex_lock(&obj->mmap_lock);
512
513 area = __find_iovm_area(obj, da);
514 if (!area) {
515 dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
516 goto out;
517 }
518
519 if ((area->flags & flags) != flags) {
520 dev_err(obj->dev, "%s: wrong flags(%08x)\n", __func__,
521 area->flags);
522 goto out;
523 }
524 sgt = (struct sg_table *)area->sgt;
525
526 unmap_iovm_area(domain, obj, area);
527
528 fn(area->va);
529
530 dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", __func__,
531 area->da_start, da, area->da_end,
532 area->da_end - area->da_start, area->flags);
533
534 free_iovm_area(obj, area);
535out:
536 mutex_unlock(&obj->mmap_lock);
537
538 return sgt;
539}
540
541static u32 map_iommu_region(struct iommu_domain *domain, struct omap_iommu *obj,
542 u32 da, const struct sg_table *sgt, void *va,
543 size_t bytes, u32 flags)
544{
545 int err = -ENOMEM;
546 struct iovm_struct *new;
547
548 mutex_lock(&obj->mmap_lock);
549
550 new = alloc_iovm_area(obj, da, bytes, flags);
551 if (IS_ERR(new)) {
552 err = PTR_ERR(new);
553 goto err_alloc_iovma;
554 }
555 new->va = va;
556 new->sgt = sgt;
557
558 if (map_iovm_area(domain, new, sgt, new->flags))
559 goto err_map;
560
561 mutex_unlock(&obj->mmap_lock);
562
563 dev_dbg(obj->dev, "%s: da:%08x(%x) flags:%08x va:%p\n",
564 __func__, new->da_start, bytes, new->flags, va);
565
566 return new->da_start;
567
568err_map:
569 free_iovm_area(obj, new);
570err_alloc_iovma:
571 mutex_unlock(&obj->mmap_lock);
572 return err;
573}
574
575static inline u32
576__iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj,
577 u32 da, const struct sg_table *sgt,
578 void *va, size_t bytes, u32 flags)
579{
580 return map_iommu_region(domain, obj, da, sgt, va, bytes, flags);
581}
582
583/**
584 * omap_iommu_vmap - (d)-(p)-(v) address mapper
585 * @obj: objective iommu
586 * @sgt: address of scatter gather table
587 * @flags: iovma and page property
588 *
589 * Creates 1-n-1 mapping with given @sgt and returns @da.
590 * All @sgt element must be io page size aligned.
591 */
592u32 omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
593 const struct sg_table *sgt, u32 flags)
594{
595 size_t bytes;
596 void *va = NULL;
597
598 if (!obj || !obj->dev || !sgt)
599 return -EINVAL;
600
601 bytes = sgtable_len(sgt);
602 if (!bytes)
603 return -EINVAL;
604 bytes = PAGE_ALIGN(bytes);
605
606 if (flags & IOVMF_MMIO) {
607 va = vmap_sg(sgt);
608 if (IS_ERR(va))
609 return PTR_ERR(va);
610 }
611
612 flags |= IOVMF_DISCONT;
613 flags |= IOVMF_MMIO;
614
615 da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags);
616 if (IS_ERR_VALUE(da))
617 vunmap_sg(va);
618
619 return da + sgtable_offset(sgt);
620}
621EXPORT_SYMBOL_GPL(omap_iommu_vmap);
622
623/**
624 * omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()'
625 * @obj: objective iommu
626 * @da: iommu device virtual address
627 *
628 * Free the iommu virtually contiguous memory area starting at
629 * @da, which was returned by 'omap_iommu_vmap()'.
630 */
631struct sg_table *
632omap_iommu_vunmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da)
633{
634 struct sg_table *sgt;
635 /*
636 * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called.
637 * Just returns 'sgt' to the caller to free
638 */
639 da &= PAGE_MASK;
640 sgt = unmap_vm_area(domain, obj, da, vunmap_sg,
641 IOVMF_DISCONT | IOVMF_MMIO);
642 if (!sgt)
643 dev_dbg(obj->dev, "%s: No sgt\n", __func__);
644 return sgt;
645}
646EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
647
648/**
649 * omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper
650 * @obj: objective iommu
651 * @da: contiguous iommu virtual memory
652 * @bytes: allocation size
653 * @flags: iovma and page property
654 *
655 * Allocate @bytes linearly and creates 1-n-1 mapping and returns
656 * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
657 */
658u32
659omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
660 size_t bytes, u32 flags)
661{
662 void *va;
663 struct sg_table *sgt;
664
665 if (!obj || !obj->dev || !bytes)
666 return -EINVAL;
667
668 bytes = PAGE_ALIGN(bytes);
669
670 va = vmalloc(bytes);
671 if (!va)
672 return -ENOMEM;
673
674 flags |= IOVMF_DISCONT;
675 flags |= IOVMF_ALLOC;
676
677 sgt = sgtable_alloc(bytes, flags, da, 0);
678 if (IS_ERR(sgt)) {
679 da = PTR_ERR(sgt);
680 goto err_sgt_alloc;
681 }
682 sgtable_fill_vmalloc(sgt, va);
683
684 da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags);
685 if (IS_ERR_VALUE(da))
686 goto err_iommu_vmap;
687
688 return da;
689
690err_iommu_vmap:
691 sgtable_drain_vmalloc(sgt);
692 sgtable_free(sgt);
693err_sgt_alloc:
694 vfree(va);
695 return da;
696}
697EXPORT_SYMBOL_GPL(omap_iommu_vmalloc);
698
699/**
700 * omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()'
701 * @obj: objective iommu
702 * @da: iommu device virtual address
703 *
704 * Frees the iommu virtually continuous memory area starting at
705 * @da, as obtained from 'omap_iommu_vmalloc()'.
706 */
707void omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj,
708 const u32 da)
709{
710 struct sg_table *sgt;
711
712 sgt = unmap_vm_area(domain, obj, da, vfree,
713 IOVMF_DISCONT | IOVMF_ALLOC);
714 if (!sgt)
715 dev_dbg(obj->dev, "%s: No sgt\n", __func__);
716 sgtable_free(sgt);
717}
718EXPORT_SYMBOL_GPL(omap_iommu_vfree);
719
720static int __init iovmm_init(void)
721{
722 const unsigned long flags = SLAB_HWCACHE_ALIGN;
723 struct kmem_cache *p;
724
725 p = kmem_cache_create("iovm_area_cache", sizeof(struct iovm_struct), 0,
726 flags, NULL);
727 if (!p)
728 return -ENOMEM;
729 iovm_area_cachep = p;
730
731 return 0;
732}
733module_init(iovmm_init);
734
735static void __exit iovmm_exit(void)
736{
737 kmem_cache_destroy(iovm_area_cachep);
738}
739module_exit(iovmm_exit);
740
741MODULE_DESCRIPTION("omap iommu: simple virtual address space management");
742MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
743MODULE_LICENSE("GPL v2");