aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-07-07 06:56:44 -0400
committerIngo Molnar <mingo@elte.hu>2011-07-07 06:58:28 -0400
commitb395fb36d59e17b9335805c10fa30fc51c8a94c6 (patch)
treeb96bc8eee33753e2a1b1181c62a028d7f89643d7 /drivers
parentfe0d42203cb5616eeff68b14576a0f7e2dd56625 (diff)
parent6b385b46ee17d7e1a68d3411b8cdb2342e0f0445 (diff)
Merge branch 'iommu-3.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu into core/iommu
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/base/Makefile1
-rw-r--r--drivers/iommu/Kconfig110
-rw-r--r--drivers/iommu/Makefile5
-rw-r--r--drivers/iommu/amd_iommu.c2824
-rw-r--r--drivers/iommu/amd_iommu_init.c1574
-rw-r--r--drivers/iommu/amd_iommu_proto.h54
-rw-r--r--drivers/iommu/amd_iommu_types.h585
-rw-r--r--drivers/iommu/dmar.c (renamed from drivers/pci/dmar.c)0
-rw-r--r--drivers/iommu/intel-iommu.c (renamed from drivers/pci/intel-iommu.c)1
-rw-r--r--drivers/iommu/intr_remapping.c (renamed from drivers/pci/intr_remapping.c)1
-rw-r--r--drivers/iommu/intr_remapping.h (renamed from drivers/pci/intr_remapping.h)0
-rw-r--r--drivers/iommu/iommu.c (renamed from drivers/base/iommu.c)0
-rw-r--r--drivers/iommu/iova.c (renamed from drivers/pci/iova.c)0
-rw-r--r--drivers/iommu/msm_iommu.c731
-rw-r--r--drivers/iommu/msm_iommu_dev.c422
-rw-r--r--drivers/pci/Makefile5
-rw-r--r--drivers/pci/pci.h2
19 files changed, 6308 insertions, 10 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3bb154d8c8cc..9d513188b47a 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -126,4 +126,6 @@ source "drivers/hwspinlock/Kconfig"
126 126
127source "drivers/clocksource/Kconfig" 127source "drivers/clocksource/Kconfig"
128 128
129source "drivers/iommu/Kconfig"
130
129endmenu 131endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 09f3232bcdcd..2b551e971726 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -122,3 +122,4 @@ obj-y += ieee802154/
122obj-y += clk/ 122obj-y += clk/
123 123
124obj-$(CONFIG_HWSPINLOCK) += hwspinlock/ 124obj-$(CONFIG_HWSPINLOCK) += hwspinlock/
125obj-$(CONFIG_IOMMU_SUPPORT) += iommu/
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 4c5701c15f53..5ab0d07c4578 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_FW_LOADER) += firmware_class.o
13obj-$(CONFIG_NUMA) += node.o 13obj-$(CONFIG_NUMA) += node.o
14obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o 14obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
15obj-$(CONFIG_SMP) += topology.o 15obj-$(CONFIG_SMP) += topology.o
16obj-$(CONFIG_IOMMU_API) += iommu.o
17ifeq ($(CONFIG_SYSFS),y) 16ifeq ($(CONFIG_SYSFS),y)
18obj-$(CONFIG_MODULES) += module.o 17obj-$(CONFIG_MODULES) += module.o
19endif 18endif
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
new file mode 100644
index 000000000000..b57b3fa492f3
--- /dev/null
+++ b/drivers/iommu/Kconfig
@@ -0,0 +1,110 @@
1# IOMMU_API always gets selected by whoever wants it.
2config IOMMU_API
3 bool
4
5menuconfig IOMMU_SUPPORT
6 bool "IOMMU Hardware Support"
7 default y
8 ---help---
9 Say Y here if you want to compile device drivers for IO Memory
10 Management Units into the kernel. These devices usually allow to
11 remap DMA requests and/or remap interrupts from other devices on the
12 system.
13
14if IOMMU_SUPPORT
15
16# MSM IOMMU support
17config MSM_IOMMU
18 bool "MSM IOMMU Support"
19 depends on ARCH_MSM8X60 || ARCH_MSM8960
20 select IOMMU_API
21 help
22 Support for the IOMMUs found on certain Qualcomm SOCs.
23 These IOMMUs allow virtualization of the address space used by most
24 cores within the multimedia subsystem.
25
26 If unsure, say N here.
27
28config IOMMU_PGTABLES_L2
29 def_bool y
30 depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n
31
32# AMD IOMMU support
33config AMD_IOMMU
34 bool "AMD IOMMU support"
35 select SWIOTLB
36 select PCI_MSI
37 select PCI_IOV
38 select IOMMU_API
39 depends on X86_64 && PCI && ACPI
40 ---help---
41 With this option you can enable support for AMD IOMMU hardware in
42 your system. An IOMMU is a hardware component which provides
43 remapping of DMA memory accesses from devices. With an AMD IOMMU you
44 can isolate the the DMA memory of different devices and protect the
45 system from misbehaving device drivers or hardware.
46
47 You can find out if your system has an AMD IOMMU if you look into
48 your BIOS for an option to enable it or if you have an IVRS ACPI
49 table.
50
51config AMD_IOMMU_STATS
52 bool "Export AMD IOMMU statistics to debugfs"
53 depends on AMD_IOMMU
54 select DEBUG_FS
55 ---help---
56 This option enables code in the AMD IOMMU driver to collect various
57 statistics about whats happening in the driver and exports that
58 information to userspace via debugfs.
59 If unsure, say N.
60
61# Intel IOMMU support
62config DMAR
63 bool "Support for DMA Remapping Devices"
64 depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
65 select IOMMU_API
66 help
67 DMA remapping (DMAR) devices support enables independent address
68 translations for Direct Memory Access (DMA) from devices.
69 These DMA remapping devices are reported via ACPI tables
70 and include PCI device scope covered by these DMA
71 remapping devices.
72
73config DMAR_DEFAULT_ON
74 def_bool y
75 prompt "Enable DMA Remapping Devices by default"
76 depends on DMAR
77 help
78 Selecting this option will enable a DMAR device at boot time if
79 one is found. If this option is not selected, DMAR support can
80 be enabled by passing intel_iommu=on to the kernel.
81
82config DMAR_BROKEN_GFX_WA
83 bool "Workaround broken graphics drivers (going away soon)"
84 depends on DMAR && BROKEN && X86
85 ---help---
86 Current Graphics drivers tend to use physical address
87 for DMA and avoid using DMA APIs. Setting this config
88 option permits the IOMMU driver to set a unity map for
89 all the OS-visible memory. Hence the driver can continue
90 to use physical addresses for DMA, at least until this
91 option is removed in the 2.6.32 kernel.
92
93config DMAR_FLOPPY_WA
94 def_bool y
95 depends on DMAR && X86
96 ---help---
97 Floppy disk drivers are known to bypass DMA API calls
98 thereby failing to work when IOMMU is enabled. This
99 workaround will setup a 1:1 mapping for the first
100 16MiB to make floppy (an ISA device) work.
101
102config INTR_REMAP
103 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
104 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
105 ---help---
106 Supports Interrupt remapping for IO-APIC and MSI devices.
107 To use x2apic mode in the CPU's which support x2APIC enhancements or
108 to support platforms with CPU's having > 8 bit APIC ID, say Y.
109
110endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
new file mode 100644
index 000000000000..4d4d77df7cac
--- /dev/null
+++ b/drivers/iommu/Makefile
@@ -0,0 +1,5 @@
1obj-$(CONFIG_IOMMU_API) += iommu.o
2obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
3obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
4obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
5obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
new file mode 100644
index 000000000000..a14f8dc23462
--- /dev/null
+++ b/drivers/iommu/amd_iommu.c
@@ -0,0 +1,2824 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/pci-ats.h>
22#include <linux/bitmap.h>
23#include <linux/slab.h>
24#include <linux/debugfs.h>
25#include <linux/scatterlist.h>
26#include <linux/dma-mapping.h>
27#include <linux/iommu-helper.h>
28#include <linux/iommu.h>
29#include <linux/delay.h>
30#include <linux/amd-iommu.h>
31#include <asm/msidef.h>
32#include <asm/proto.h>
33#include <asm/iommu.h>
34#include <asm/gart.h>
35#include <asm/dma.h>
36
37#include "amd_iommu_proto.h"
38#include "amd_iommu_types.h"
39
40#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
41
42#define LOOP_TIMEOUT 100000
43
44static DEFINE_RWLOCK(amd_iommu_devtable_lock);
45
46/* A list of preallocated protection domains */
47static LIST_HEAD(iommu_pd_list);
48static DEFINE_SPINLOCK(iommu_pd_list_lock);
49
50/* List of all available dev_data structures */
51static LIST_HEAD(dev_data_list);
52static DEFINE_SPINLOCK(dev_data_list_lock);
53
54/*
55 * Domain for untranslated devices - only allocated
56 * if iommu=pt passed on kernel cmd line.
57 */
58static struct protection_domain *pt_domain;
59
60static struct iommu_ops amd_iommu_ops;
61
62/*
63 * general struct to manage commands send to an IOMMU
64 */
65struct iommu_cmd {
66 u32 data[4];
67};
68
69static void update_domain(struct protection_domain *domain);
70
71/****************************************************************************
72 *
73 * Helper functions
74 *
75 ****************************************************************************/
76
77static struct iommu_dev_data *alloc_dev_data(u16 devid)
78{
79 struct iommu_dev_data *dev_data;
80 unsigned long flags;
81
82 dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
83 if (!dev_data)
84 return NULL;
85
86 dev_data->devid = devid;
87 atomic_set(&dev_data->bind, 0);
88
89 spin_lock_irqsave(&dev_data_list_lock, flags);
90 list_add_tail(&dev_data->dev_data_list, &dev_data_list);
91 spin_unlock_irqrestore(&dev_data_list_lock, flags);
92
93 return dev_data;
94}
95
96static void free_dev_data(struct iommu_dev_data *dev_data)
97{
98 unsigned long flags;
99
100 spin_lock_irqsave(&dev_data_list_lock, flags);
101 list_del(&dev_data->dev_data_list);
102 spin_unlock_irqrestore(&dev_data_list_lock, flags);
103
104 kfree(dev_data);
105}
106
107static struct iommu_dev_data *search_dev_data(u16 devid)
108{
109 struct iommu_dev_data *dev_data;
110 unsigned long flags;
111
112 spin_lock_irqsave(&dev_data_list_lock, flags);
113 list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
114 if (dev_data->devid == devid)
115 goto out_unlock;
116 }
117
118 dev_data = NULL;
119
120out_unlock:
121 spin_unlock_irqrestore(&dev_data_list_lock, flags);
122
123 return dev_data;
124}
125
126static struct iommu_dev_data *find_dev_data(u16 devid)
127{
128 struct iommu_dev_data *dev_data;
129
130 dev_data = search_dev_data(devid);
131
132 if (dev_data == NULL)
133 dev_data = alloc_dev_data(devid);
134
135 return dev_data;
136}
137
138static inline u16 get_device_id(struct device *dev)
139{
140 struct pci_dev *pdev = to_pci_dev(dev);
141
142 return calc_devid(pdev->bus->number, pdev->devfn);
143}
144
145static struct iommu_dev_data *get_dev_data(struct device *dev)
146{
147 return dev->archdata.iommu;
148}
149
150/*
151 * In this function the list of preallocated protection domains is traversed to
152 * find the domain for a specific device
153 */
154static struct dma_ops_domain *find_protection_domain(u16 devid)
155{
156 struct dma_ops_domain *entry, *ret = NULL;
157 unsigned long flags;
158 u16 alias = amd_iommu_alias_table[devid];
159
160 if (list_empty(&iommu_pd_list))
161 return NULL;
162
163 spin_lock_irqsave(&iommu_pd_list_lock, flags);
164
165 list_for_each_entry(entry, &iommu_pd_list, list) {
166 if (entry->target_dev == devid ||
167 entry->target_dev == alias) {
168 ret = entry;
169 break;
170 }
171 }
172
173 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
174
175 return ret;
176}
177
178/*
179 * This function checks if the driver got a valid device from the caller to
180 * avoid dereferencing invalid pointers.
181 */
182static bool check_device(struct device *dev)
183{
184 u16 devid;
185
186 if (!dev || !dev->dma_mask)
187 return false;
188
189 /* No device or no PCI device */
190 if (dev->bus != &pci_bus_type)
191 return false;
192
193 devid = get_device_id(dev);
194
195 /* Out of our scope? */
196 if (devid > amd_iommu_last_bdf)
197 return false;
198
199 if (amd_iommu_rlookup_table[devid] == NULL)
200 return false;
201
202 return true;
203}
204
205static int iommu_init_device(struct device *dev)
206{
207 struct iommu_dev_data *dev_data;
208 u16 alias;
209
210 if (dev->archdata.iommu)
211 return 0;
212
213 dev_data = find_dev_data(get_device_id(dev));
214 if (!dev_data)
215 return -ENOMEM;
216
217 alias = amd_iommu_alias_table[dev_data->devid];
218 if (alias != dev_data->devid) {
219 struct iommu_dev_data *alias_data;
220
221 alias_data = find_dev_data(alias);
222 if (alias_data == NULL) {
223 pr_err("AMD-Vi: Warning: Unhandled device %s\n",
224 dev_name(dev));
225 free_dev_data(dev_data);
226 return -ENOTSUPP;
227 }
228 dev_data->alias_data = alias_data;
229 }
230
231 dev->archdata.iommu = dev_data;
232
233 return 0;
234}
235
236static void iommu_ignore_device(struct device *dev)
237{
238 u16 devid, alias;
239
240 devid = get_device_id(dev);
241 alias = amd_iommu_alias_table[devid];
242
243 memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
244 memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
245
246 amd_iommu_rlookup_table[devid] = NULL;
247 amd_iommu_rlookup_table[alias] = NULL;
248}
249
250static void iommu_uninit_device(struct device *dev)
251{
252 /*
253 * Nothing to do here - we keep dev_data around for unplugged devices
254 * and reuse it when the device is re-plugged - not doing so would
255 * introduce a ton of races.
256 */
257}
258
259void __init amd_iommu_uninit_devices(void)
260{
261 struct iommu_dev_data *dev_data, *n;
262 struct pci_dev *pdev = NULL;
263
264 for_each_pci_dev(pdev) {
265
266 if (!check_device(&pdev->dev))
267 continue;
268
269 iommu_uninit_device(&pdev->dev);
270 }
271
272 /* Free all of our dev_data structures */
273 list_for_each_entry_safe(dev_data, n, &dev_data_list, dev_data_list)
274 free_dev_data(dev_data);
275}
276
277int __init amd_iommu_init_devices(void)
278{
279 struct pci_dev *pdev = NULL;
280 int ret = 0;
281
282 for_each_pci_dev(pdev) {
283
284 if (!check_device(&pdev->dev))
285 continue;
286
287 ret = iommu_init_device(&pdev->dev);
288 if (ret == -ENOTSUPP)
289 iommu_ignore_device(&pdev->dev);
290 else if (ret)
291 goto out_free;
292 }
293
294 return 0;
295
296out_free:
297
298 amd_iommu_uninit_devices();
299
300 return ret;
301}
302#ifdef CONFIG_AMD_IOMMU_STATS
303
304/*
305 * Initialization code for statistics collection
306 */
307
308DECLARE_STATS_COUNTER(compl_wait);
309DECLARE_STATS_COUNTER(cnt_map_single);
310DECLARE_STATS_COUNTER(cnt_unmap_single);
311DECLARE_STATS_COUNTER(cnt_map_sg);
312DECLARE_STATS_COUNTER(cnt_unmap_sg);
313DECLARE_STATS_COUNTER(cnt_alloc_coherent);
314DECLARE_STATS_COUNTER(cnt_free_coherent);
315DECLARE_STATS_COUNTER(cross_page);
316DECLARE_STATS_COUNTER(domain_flush_single);
317DECLARE_STATS_COUNTER(domain_flush_all);
318DECLARE_STATS_COUNTER(alloced_io_mem);
319DECLARE_STATS_COUNTER(total_map_requests);
320
321static struct dentry *stats_dir;
322static struct dentry *de_fflush;
323
324static void amd_iommu_stats_add(struct __iommu_counter *cnt)
325{
326 if (stats_dir == NULL)
327 return;
328
329 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
330 &cnt->value);
331}
332
333static void amd_iommu_stats_init(void)
334{
335 stats_dir = debugfs_create_dir("amd-iommu", NULL);
336 if (stats_dir == NULL)
337 return;
338
339 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
340 (u32 *)&amd_iommu_unmap_flush);
341
342 amd_iommu_stats_add(&compl_wait);
343 amd_iommu_stats_add(&cnt_map_single);
344 amd_iommu_stats_add(&cnt_unmap_single);
345 amd_iommu_stats_add(&cnt_map_sg);
346 amd_iommu_stats_add(&cnt_unmap_sg);
347 amd_iommu_stats_add(&cnt_alloc_coherent);
348 amd_iommu_stats_add(&cnt_free_coherent);
349 amd_iommu_stats_add(&cross_page);
350 amd_iommu_stats_add(&domain_flush_single);
351 amd_iommu_stats_add(&domain_flush_all);
352 amd_iommu_stats_add(&alloced_io_mem);
353 amd_iommu_stats_add(&total_map_requests);
354}
355
356#endif
357
358/****************************************************************************
359 *
360 * Interrupt handling functions
361 *
362 ****************************************************************************/
363
364static void dump_dte_entry(u16 devid)
365{
366 int i;
367
368 for (i = 0; i < 8; ++i)
369 pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
370 amd_iommu_dev_table[devid].data[i]);
371}
372
373static void dump_command(unsigned long phys_addr)
374{
375 struct iommu_cmd *cmd = phys_to_virt(phys_addr);
376 int i;
377
378 for (i = 0; i < 4; ++i)
379 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
380}
381
382static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
383{
384 u32 *event = __evt;
385 int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
386 int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
387 int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
388 int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
389 u64 address = (u64)(((u64)event[3]) << 32) | event[2];
390
391 printk(KERN_ERR "AMD-Vi: Event logged [");
392
393 switch (type) {
394 case EVENT_TYPE_ILL_DEV:
395 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
396 "address=0x%016llx flags=0x%04x]\n",
397 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
398 address, flags);
399 dump_dte_entry(devid);
400 break;
401 case EVENT_TYPE_IO_FAULT:
402 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
403 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
404 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
405 domid, address, flags);
406 break;
407 case EVENT_TYPE_DEV_TAB_ERR:
408 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
409 "address=0x%016llx flags=0x%04x]\n",
410 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
411 address, flags);
412 break;
413 case EVENT_TYPE_PAGE_TAB_ERR:
414 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
415 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
416 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
417 domid, address, flags);
418 break;
419 case EVENT_TYPE_ILL_CMD:
420 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
421 dump_command(address);
422 break;
423 case EVENT_TYPE_CMD_HARD_ERR:
424 printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
425 "flags=0x%04x]\n", address, flags);
426 break;
427 case EVENT_TYPE_IOTLB_INV_TO:
428 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
429 "address=0x%016llx]\n",
430 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
431 address);
432 break;
433 case EVENT_TYPE_INV_DEV_REQ:
434 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
435 "address=0x%016llx flags=0x%04x]\n",
436 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
437 address, flags);
438 break;
439 default:
440 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
441 }
442}
443
444static void iommu_poll_events(struct amd_iommu *iommu)
445{
446 u32 head, tail;
447 unsigned long flags;
448
449 spin_lock_irqsave(&iommu->lock, flags);
450
451 head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
452 tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
453
454 while (head != tail) {
455 iommu_print_event(iommu, iommu->evt_buf + head);
456 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
457 }
458
459 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
460
461 spin_unlock_irqrestore(&iommu->lock, flags);
462}
463
464irqreturn_t amd_iommu_int_thread(int irq, void *data)
465{
466 struct amd_iommu *iommu;
467
468 for_each_iommu(iommu)
469 iommu_poll_events(iommu);
470
471 return IRQ_HANDLED;
472}
473
474irqreturn_t amd_iommu_int_handler(int irq, void *data)
475{
476 return IRQ_WAKE_THREAD;
477}
478
479/****************************************************************************
480 *
481 * IOMMU command queuing functions
482 *
483 ****************************************************************************/
484
485static int wait_on_sem(volatile u64 *sem)
486{
487 int i = 0;
488
489 while (*sem == 0 && i < LOOP_TIMEOUT) {
490 udelay(1);
491 i += 1;
492 }
493
494 if (i == LOOP_TIMEOUT) {
495 pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
496 return -EIO;
497 }
498
499 return 0;
500}
501
502static void copy_cmd_to_buffer(struct amd_iommu *iommu,
503 struct iommu_cmd *cmd,
504 u32 tail)
505{
506 u8 *target;
507
508 target = iommu->cmd_buf + tail;
509 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
510
511 /* Copy command to buffer */
512 memcpy(target, cmd, sizeof(*cmd));
513
514 /* Tell the IOMMU about it */
515 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
516}
517
518static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
519{
520 WARN_ON(address & 0x7ULL);
521
522 memset(cmd, 0, sizeof(*cmd));
523 cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
524 cmd->data[1] = upper_32_bits(__pa(address));
525 cmd->data[2] = 1;
526 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
527}
528
529static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
530{
531 memset(cmd, 0, sizeof(*cmd));
532 cmd->data[0] = devid;
533 CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
534}
535
536static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
537 size_t size, u16 domid, int pde)
538{
539 u64 pages;
540 int s;
541
542 pages = iommu_num_pages(address, size, PAGE_SIZE);
543 s = 0;
544
545 if (pages > 1) {
546 /*
547 * If we have to flush more than one page, flush all
548 * TLB entries for this domain
549 */
550 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
551 s = 1;
552 }
553
554 address &= PAGE_MASK;
555
556 memset(cmd, 0, sizeof(*cmd));
557 cmd->data[1] |= domid;
558 cmd->data[2] = lower_32_bits(address);
559 cmd->data[3] = upper_32_bits(address);
560 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
561 if (s) /* size bit - we flush more than one 4kb page */
562 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
563 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
564 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
565}
566
567static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
568 u64 address, size_t size)
569{
570 u64 pages;
571 int s;
572
573 pages = iommu_num_pages(address, size, PAGE_SIZE);
574 s = 0;
575
576 if (pages > 1) {
577 /*
578 * If we have to flush more than one page, flush all
579 * TLB entries for this domain
580 */
581 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
582 s = 1;
583 }
584
585 address &= PAGE_MASK;
586
587 memset(cmd, 0, sizeof(*cmd));
588 cmd->data[0] = devid;
589 cmd->data[0] |= (qdep & 0xff) << 24;
590 cmd->data[1] = devid;
591 cmd->data[2] = lower_32_bits(address);
592 cmd->data[3] = upper_32_bits(address);
593 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
594 if (s)
595 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
596}
597
598static void build_inv_all(struct iommu_cmd *cmd)
599{
600 memset(cmd, 0, sizeof(*cmd));
601 CMD_SET_TYPE(cmd, CMD_INV_ALL);
602}
603
604/*
605 * Writes the command to the IOMMUs command buffer and informs the
606 * hardware about the new command.
607 */
608static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
609{
610 u32 left, tail, head, next_tail;
611 unsigned long flags;
612
613 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
614
615again:
616 spin_lock_irqsave(&iommu->lock, flags);
617
618 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
619 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
620 next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
621 left = (head - next_tail) % iommu->cmd_buf_size;
622
623 if (left <= 2) {
624 struct iommu_cmd sync_cmd;
625 volatile u64 sem = 0;
626 int ret;
627
628 build_completion_wait(&sync_cmd, (u64)&sem);
629 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
630
631 spin_unlock_irqrestore(&iommu->lock, flags);
632
633 if ((ret = wait_on_sem(&sem)) != 0)
634 return ret;
635
636 goto again;
637 }
638
639 copy_cmd_to_buffer(iommu, cmd, tail);
640
641 /* We need to sync now to make sure all commands are processed */
642 iommu->need_sync = true;
643
644 spin_unlock_irqrestore(&iommu->lock, flags);
645
646 return 0;
647}
648
649/*
650 * This function queues a completion wait command into the command
651 * buffer of an IOMMU
652 */
653static int iommu_completion_wait(struct amd_iommu *iommu)
654{
655 struct iommu_cmd cmd;
656 volatile u64 sem = 0;
657 int ret;
658
659 if (!iommu->need_sync)
660 return 0;
661
662 build_completion_wait(&cmd, (u64)&sem);
663
664 ret = iommu_queue_command(iommu, &cmd);
665 if (ret)
666 return ret;
667
668 return wait_on_sem(&sem);
669}
670
671static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
672{
673 struct iommu_cmd cmd;
674
675 build_inv_dte(&cmd, devid);
676
677 return iommu_queue_command(iommu, &cmd);
678}
679
680static void iommu_flush_dte_all(struct amd_iommu *iommu)
681{
682 u32 devid;
683
684 for (devid = 0; devid <= 0xffff; ++devid)
685 iommu_flush_dte(iommu, devid);
686
687 iommu_completion_wait(iommu);
688}
689
690/*
691 * This function uses heavy locking and may disable irqs for some time. But
692 * this is no issue because it is only called during resume.
693 */
694static void iommu_flush_tlb_all(struct amd_iommu *iommu)
695{
696 u32 dom_id;
697
698 for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
699 struct iommu_cmd cmd;
700 build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
701 dom_id, 1);
702 iommu_queue_command(iommu, &cmd);
703 }
704
705 iommu_completion_wait(iommu);
706}
707
708static void iommu_flush_all(struct amd_iommu *iommu)
709{
710 struct iommu_cmd cmd;
711
712 build_inv_all(&cmd);
713
714 iommu_queue_command(iommu, &cmd);
715 iommu_completion_wait(iommu);
716}
717
718void iommu_flush_all_caches(struct amd_iommu *iommu)
719{
720 if (iommu_feature(iommu, FEATURE_IA)) {
721 iommu_flush_all(iommu);
722 } else {
723 iommu_flush_dte_all(iommu);
724 iommu_flush_tlb_all(iommu);
725 }
726}
727
728/*
729 * Command send function for flushing on-device TLB
730 */
731static int device_flush_iotlb(struct iommu_dev_data *dev_data,
732 u64 address, size_t size)
733{
734 struct amd_iommu *iommu;
735 struct iommu_cmd cmd;
736 int qdep;
737
738 qdep = dev_data->ats.qdep;
739 iommu = amd_iommu_rlookup_table[dev_data->devid];
740
741 build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size);
742
743 return iommu_queue_command(iommu, &cmd);
744}
745
746/*
747 * Command send function for invalidating a device table entry
748 */
749static int device_flush_dte(struct iommu_dev_data *dev_data)
750{
751 struct amd_iommu *iommu;
752 int ret;
753
754 iommu = amd_iommu_rlookup_table[dev_data->devid];
755
756 ret = iommu_flush_dte(iommu, dev_data->devid);
757 if (ret)
758 return ret;
759
760 if (dev_data->ats.enabled)
761 ret = device_flush_iotlb(dev_data, 0, ~0UL);
762
763 return ret;
764}
765
766/*
767 * TLB invalidation function which is called from the mapping functions.
768 * It invalidates a single PTE if the range to flush is within a single
769 * page. Otherwise it flushes the whole TLB of the IOMMU.
770 */
771static void __domain_flush_pages(struct protection_domain *domain,
772 u64 address, size_t size, int pde)
773{
774 struct iommu_dev_data *dev_data;
775 struct iommu_cmd cmd;
776 int ret = 0, i;
777
778 build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
779
780 for (i = 0; i < amd_iommus_present; ++i) {
781 if (!domain->dev_iommu[i])
782 continue;
783
784 /*
785 * Devices of this domain are behind this IOMMU
786 * We need a TLB flush
787 */
788 ret |= iommu_queue_command(amd_iommus[i], &cmd);
789 }
790
791 list_for_each_entry(dev_data, &domain->dev_list, list) {
792
793 if (!dev_data->ats.enabled)
794 continue;
795
796 ret |= device_flush_iotlb(dev_data, address, size);
797 }
798
799 WARN_ON(ret);
800}
801
802static void domain_flush_pages(struct protection_domain *domain,
803 u64 address, size_t size)
804{
805 __domain_flush_pages(domain, address, size, 0);
806}
807
808/* Flush the whole IO/TLB for a given protection domain */
809static void domain_flush_tlb(struct protection_domain *domain)
810{
811 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
812}
813
814/* Flush the whole IO/TLB for a given protection domain - including PDE */
815static void domain_flush_tlb_pde(struct protection_domain *domain)
816{
817 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
818}
819
820static void domain_flush_complete(struct protection_domain *domain)
821{
822 int i;
823
824 for (i = 0; i < amd_iommus_present; ++i) {
825 if (!domain->dev_iommu[i])
826 continue;
827
828 /*
829 * Devices of this domain are behind this IOMMU
830 * We need to wait for completion of all commands.
831 */
832 iommu_completion_wait(amd_iommus[i]);
833 }
834}
835
836
837/*
838 * This function flushes the DTEs for all devices in domain
839 */
840static void domain_flush_devices(struct protection_domain *domain)
841{
842 struct iommu_dev_data *dev_data;
843 unsigned long flags;
844
845 spin_lock_irqsave(&domain->lock, flags);
846
847 list_for_each_entry(dev_data, &domain->dev_list, list)
848 device_flush_dte(dev_data);
849
850 spin_unlock_irqrestore(&domain->lock, flags);
851}
852
853/****************************************************************************
854 *
855 * The functions below are used the create the page table mappings for
856 * unity mapped regions.
857 *
858 ****************************************************************************/
859
860/*
861 * This function is used to add another level to an IO page table. Adding
862 * another level increases the size of the address space by 9 bits to a size up
863 * to 64 bits.
864 */
865static bool increase_address_space(struct protection_domain *domain,
866 gfp_t gfp)
867{
868 u64 *pte;
869
870 if (domain->mode == PAGE_MODE_6_LEVEL)
871 /* address space already 64 bit large */
872 return false;
873
874 pte = (void *)get_zeroed_page(gfp);
875 if (!pte)
876 return false;
877
878 *pte = PM_LEVEL_PDE(domain->mode,
879 virt_to_phys(domain->pt_root));
880 domain->pt_root = pte;
881 domain->mode += 1;
882 domain->updated = true;
883
884 return true;
885}
886
887static u64 *alloc_pte(struct protection_domain *domain,
888 unsigned long address,
889 unsigned long page_size,
890 u64 **pte_page,
891 gfp_t gfp)
892{
893 int level, end_lvl;
894 u64 *pte, *page;
895
896 BUG_ON(!is_power_of_2(page_size));
897
898 while (address > PM_LEVEL_SIZE(domain->mode))
899 increase_address_space(domain, gfp);
900
901 level = domain->mode - 1;
902 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
903 address = PAGE_SIZE_ALIGN(address, page_size);
904 end_lvl = PAGE_SIZE_LEVEL(page_size);
905
906 while (level > end_lvl) {
907 if (!IOMMU_PTE_PRESENT(*pte)) {
908 page = (u64 *)get_zeroed_page(gfp);
909 if (!page)
910 return NULL;
911 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
912 }
913
914 /* No level skipping support yet */
915 if (PM_PTE_LEVEL(*pte) != level)
916 return NULL;
917
918 level -= 1;
919
920 pte = IOMMU_PTE_PAGE(*pte);
921
922 if (pte_page && level == end_lvl)
923 *pte_page = pte;
924
925 pte = &pte[PM_LEVEL_INDEX(level, address)];
926 }
927
928 return pte;
929}
930
931/*
932 * This function checks if there is a PTE for a given dma address. If
933 * there is one, it returns the pointer to it.
934 */
935static u64 *fetch_pte(struct protection_domain *domain, unsigned long address)
936{
937 int level;
938 u64 *pte;
939
940 if (address > PM_LEVEL_SIZE(domain->mode))
941 return NULL;
942
943 level = domain->mode - 1;
944 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
945
946 while (level > 0) {
947
948 /* Not Present */
949 if (!IOMMU_PTE_PRESENT(*pte))
950 return NULL;
951
952 /* Large PTE */
953 if (PM_PTE_LEVEL(*pte) == 0x07) {
954 unsigned long pte_mask, __pte;
955
956 /*
957 * If we have a series of large PTEs, make
958 * sure to return a pointer to the first one.
959 */
960 pte_mask = PTE_PAGE_SIZE(*pte);
961 pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
962 __pte = ((unsigned long)pte) & pte_mask;
963
964 return (u64 *)__pte;
965 }
966
967 /* No level skipping support yet */
968 if (PM_PTE_LEVEL(*pte) != level)
969 return NULL;
970
971 level -= 1;
972
973 /* Walk to the next level */
974 pte = IOMMU_PTE_PAGE(*pte);
975 pte = &pte[PM_LEVEL_INDEX(level, address)];
976 }
977
978 return pte;
979}
980
981/*
982 * Generic mapping functions. It maps a physical address into a DMA
983 * address space. It allocates the page table pages if necessary.
984 * In the future it can be extended to a generic mapping function
985 * supporting all features of AMD IOMMU page tables like level skipping
986 * and full 64 bit address spaces.
987 */
988static int iommu_map_page(struct protection_domain *dom,
989 unsigned long bus_addr,
990 unsigned long phys_addr,
991 int prot,
992 unsigned long page_size)
993{
994 u64 __pte, *pte;
995 int i, count;
996
997 if (!(prot & IOMMU_PROT_MASK))
998 return -EINVAL;
999
1000 bus_addr = PAGE_ALIGN(bus_addr);
1001 phys_addr = PAGE_ALIGN(phys_addr);
1002 count = PAGE_SIZE_PTE_COUNT(page_size);
1003 pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
1004
1005 for (i = 0; i < count; ++i)
1006 if (IOMMU_PTE_PRESENT(pte[i]))
1007 return -EBUSY;
1008
1009 if (page_size > PAGE_SIZE) {
1010 __pte = PAGE_SIZE_PTE(phys_addr, page_size);
1011 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
1012 } else
1013 __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
1014
1015 if (prot & IOMMU_PROT_IR)
1016 __pte |= IOMMU_PTE_IR;
1017 if (prot & IOMMU_PROT_IW)
1018 __pte |= IOMMU_PTE_IW;
1019
1020 for (i = 0; i < count; ++i)
1021 pte[i] = __pte;
1022
1023 update_domain(dom);
1024
1025 return 0;
1026}
1027
1028static unsigned long iommu_unmap_page(struct protection_domain *dom,
1029 unsigned long bus_addr,
1030 unsigned long page_size)
1031{
1032 unsigned long long unmap_size, unmapped;
1033 u64 *pte;
1034
1035 BUG_ON(!is_power_of_2(page_size));
1036
1037 unmapped = 0;
1038
1039 while (unmapped < page_size) {
1040
1041 pte = fetch_pte(dom, bus_addr);
1042
1043 if (!pte) {
1044 /*
1045 * No PTE for this address
1046 * move forward in 4kb steps
1047 */
1048 unmap_size = PAGE_SIZE;
1049 } else if (PM_PTE_LEVEL(*pte) == 0) {
1050 /* 4kb PTE found for this address */
1051 unmap_size = PAGE_SIZE;
1052 *pte = 0ULL;
1053 } else {
1054 int count, i;
1055
1056 /* Large PTE found which maps this address */
1057 unmap_size = PTE_PAGE_SIZE(*pte);
1058 count = PAGE_SIZE_PTE_COUNT(unmap_size);
1059 for (i = 0; i < count; i++)
1060 pte[i] = 0ULL;
1061 }
1062
1063 bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
1064 unmapped += unmap_size;
1065 }
1066
1067 BUG_ON(!is_power_of_2(unmapped));
1068
1069 return unmapped;
1070}
1071
1072/*
1073 * This function checks if a specific unity mapping entry is needed for
1074 * this specific IOMMU.
1075 */
1076static int iommu_for_unity_map(struct amd_iommu *iommu,
1077 struct unity_map_entry *entry)
1078{
1079 u16 bdf, i;
1080
1081 for (i = entry->devid_start; i <= entry->devid_end; ++i) {
1082 bdf = amd_iommu_alias_table[i];
1083 if (amd_iommu_rlookup_table[bdf] == iommu)
1084 return 1;
1085 }
1086
1087 return 0;
1088}
1089
1090/*
1091 * This function actually applies the mapping to the page table of the
1092 * dma_ops domain.
1093 */
1094static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
1095 struct unity_map_entry *e)
1096{
1097 u64 addr;
1098 int ret;
1099
1100 for (addr = e->address_start; addr < e->address_end;
1101 addr += PAGE_SIZE) {
1102 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
1103 PAGE_SIZE);
1104 if (ret)
1105 return ret;
1106 /*
1107 * if unity mapping is in aperture range mark the page
1108 * as allocated in the aperture
1109 */
1110 if (addr < dma_dom->aperture_size)
1111 __set_bit(addr >> PAGE_SHIFT,
1112 dma_dom->aperture[0]->bitmap);
1113 }
1114
1115 return 0;
1116}
1117
1118/*
1119 * Init the unity mappings for a specific IOMMU in the system
1120 *
1121 * Basically iterates over all unity mapping entries and applies them to
1122 * the default domain DMA of that IOMMU if necessary.
1123 */
1124static int iommu_init_unity_mappings(struct amd_iommu *iommu)
1125{
1126 struct unity_map_entry *entry;
1127 int ret;
1128
1129 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
1130 if (!iommu_for_unity_map(iommu, entry))
1131 continue;
1132 ret = dma_ops_unity_map(iommu->default_dom, entry);
1133 if (ret)
1134 return ret;
1135 }
1136
1137 return 0;
1138}
1139
1140/*
1141 * Inits the unity mappings required for a specific device
1142 */
1143static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
1144 u16 devid)
1145{
1146 struct unity_map_entry *e;
1147 int ret;
1148
1149 list_for_each_entry(e, &amd_iommu_unity_map, list) {
1150 if (!(devid >= e->devid_start && devid <= e->devid_end))
1151 continue;
1152 ret = dma_ops_unity_map(dma_dom, e);
1153 if (ret)
1154 return ret;
1155 }
1156
1157 return 0;
1158}
1159
1160/****************************************************************************
1161 *
1162 * The next functions belong to the address allocator for the dma_ops
1163 * interface functions. They work like the allocators in the other IOMMU
1164 * drivers. Its basically a bitmap which marks the allocated pages in
1165 * the aperture. Maybe it could be enhanced in the future to a more
1166 * efficient allocator.
1167 *
1168 ****************************************************************************/
1169
1170/*
1171 * The address allocator core functions.
1172 *
1173 * called with domain->lock held
1174 */
1175
1176/*
1177 * Used to reserve address ranges in the aperture (e.g. for exclusion
1178 * ranges.
1179 */
1180static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1181 unsigned long start_page,
1182 unsigned int pages)
1183{
1184 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1185
1186 if (start_page + pages > last_page)
1187 pages = last_page - start_page;
1188
1189 for (i = start_page; i < start_page + pages; ++i) {
1190 int index = i / APERTURE_RANGE_PAGES;
1191 int page = i % APERTURE_RANGE_PAGES;
1192 __set_bit(page, dom->aperture[index]->bitmap);
1193 }
1194}
1195
1196/*
1197 * This function is used to add a new aperture range to an existing
1198 * aperture in case of dma_ops domain allocation or address allocation
1199 * failure.
1200 */
1201static int alloc_new_range(struct dma_ops_domain *dma_dom,
1202 bool populate, gfp_t gfp)
1203{
1204 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
1205 struct amd_iommu *iommu;
1206 unsigned long i, old_size;
1207
1208#ifdef CONFIG_IOMMU_STRESS
1209 populate = false;
1210#endif
1211
1212 if (index >= APERTURE_MAX_RANGES)
1213 return -ENOMEM;
1214
1215 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
1216 if (!dma_dom->aperture[index])
1217 return -ENOMEM;
1218
1219 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
1220 if (!dma_dom->aperture[index]->bitmap)
1221 goto out_free;
1222
1223 dma_dom->aperture[index]->offset = dma_dom->aperture_size;
1224
1225 if (populate) {
1226 unsigned long address = dma_dom->aperture_size;
1227 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
1228 u64 *pte, *pte_page;
1229
1230 for (i = 0; i < num_ptes; ++i) {
1231 pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
1232 &pte_page, gfp);
1233 if (!pte)
1234 goto out_free;
1235
1236 dma_dom->aperture[index]->pte_pages[i] = pte_page;
1237
1238 address += APERTURE_RANGE_SIZE / 64;
1239 }
1240 }
1241
1242 old_size = dma_dom->aperture_size;
1243 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
1244
1245 /* Reserve address range used for MSI messages */
1246 if (old_size < MSI_ADDR_BASE_LO &&
1247 dma_dom->aperture_size > MSI_ADDR_BASE_LO) {
1248 unsigned long spage;
1249 int pages;
1250
1251 pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE);
1252 spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT;
1253
1254 dma_ops_reserve_addresses(dma_dom, spage, pages);
1255 }
1256
1257 /* Initialize the exclusion range if necessary */
1258 for_each_iommu(iommu) {
1259 if (iommu->exclusion_start &&
1260 iommu->exclusion_start >= dma_dom->aperture[index]->offset
1261 && iommu->exclusion_start < dma_dom->aperture_size) {
1262 unsigned long startpage;
1263 int pages = iommu_num_pages(iommu->exclusion_start,
1264 iommu->exclusion_length,
1265 PAGE_SIZE);
1266 startpage = iommu->exclusion_start >> PAGE_SHIFT;
1267 dma_ops_reserve_addresses(dma_dom, startpage, pages);
1268 }
1269 }
1270
1271 /*
1272 * Check for areas already mapped as present in the new aperture
1273 * range and mark those pages as reserved in the allocator. Such
1274 * mappings may already exist as a result of requested unity
1275 * mappings for devices.
1276 */
1277 for (i = dma_dom->aperture[index]->offset;
1278 i < dma_dom->aperture_size;
1279 i += PAGE_SIZE) {
1280 u64 *pte = fetch_pte(&dma_dom->domain, i);
1281 if (!pte || !IOMMU_PTE_PRESENT(*pte))
1282 continue;
1283
1284 dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
1285 }
1286
1287 update_domain(&dma_dom->domain);
1288
1289 return 0;
1290
1291out_free:
1292 update_domain(&dma_dom->domain);
1293
1294 free_page((unsigned long)dma_dom->aperture[index]->bitmap);
1295
1296 kfree(dma_dom->aperture[index]);
1297 dma_dom->aperture[index] = NULL;
1298
1299 return -ENOMEM;
1300}
1301
1302static unsigned long dma_ops_area_alloc(struct device *dev,
1303 struct dma_ops_domain *dom,
1304 unsigned int pages,
1305 unsigned long align_mask,
1306 u64 dma_mask,
1307 unsigned long start)
1308{
1309 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
1310 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
1311 int i = start >> APERTURE_RANGE_SHIFT;
1312 unsigned long boundary_size;
1313 unsigned long address = -1;
1314 unsigned long limit;
1315
1316 next_bit >>= PAGE_SHIFT;
1317
1318 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1319 PAGE_SIZE) >> PAGE_SHIFT;
1320
1321 for (;i < max_index; ++i) {
1322 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
1323
1324 if (dom->aperture[i]->offset >= dma_mask)
1325 break;
1326
1327 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
1328 dma_mask >> PAGE_SHIFT);
1329
1330 address = iommu_area_alloc(dom->aperture[i]->bitmap,
1331 limit, next_bit, pages, 0,
1332 boundary_size, align_mask);
1333 if (address != -1) {
1334 address = dom->aperture[i]->offset +
1335 (address << PAGE_SHIFT);
1336 dom->next_address = address + (pages << PAGE_SHIFT);
1337 break;
1338 }
1339
1340 next_bit = 0;
1341 }
1342
1343 return address;
1344}
1345
1346static unsigned long dma_ops_alloc_addresses(struct device *dev,
1347 struct dma_ops_domain *dom,
1348 unsigned int pages,
1349 unsigned long align_mask,
1350 u64 dma_mask)
1351{
1352 unsigned long address;
1353
1354#ifdef CONFIG_IOMMU_STRESS
1355 dom->next_address = 0;
1356 dom->need_flush = true;
1357#endif
1358
1359 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1360 dma_mask, dom->next_address);
1361
1362 if (address == -1) {
1363 dom->next_address = 0;
1364 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1365 dma_mask, 0);
1366 dom->need_flush = true;
1367 }
1368
1369 if (unlikely(address == -1))
1370 address = DMA_ERROR_CODE;
1371
1372 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
1373
1374 return address;
1375}
1376
1377/*
1378 * The address free function.
1379 *
1380 * called with domain->lock held
1381 */
1382static void dma_ops_free_addresses(struct dma_ops_domain *dom,
1383 unsigned long address,
1384 unsigned int pages)
1385{
1386 unsigned i = address >> APERTURE_RANGE_SHIFT;
1387 struct aperture_range *range = dom->aperture[i];
1388
1389 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
1390
1391#ifdef CONFIG_IOMMU_STRESS
1392 if (i < 4)
1393 return;
1394#endif
1395
1396 if (address >= dom->next_address)
1397 dom->need_flush = true;
1398
1399 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
1400
1401 bitmap_clear(range->bitmap, address, pages);
1402
1403}
1404
1405/****************************************************************************
1406 *
1407 * The next functions belong to the domain allocation. A domain is
1408 * allocated for every IOMMU as the default domain. If device isolation
1409 * is enabled, every device get its own domain. The most important thing
1410 * about domains is the page table mapping the DMA address space they
1411 * contain.
1412 *
1413 ****************************************************************************/
1414
1415/*
1416 * This function adds a protection domain to the global protection domain list
1417 */
1418static void add_domain_to_list(struct protection_domain *domain)
1419{
1420 unsigned long flags;
1421
1422 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1423 list_add(&domain->list, &amd_iommu_pd_list);
1424 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1425}
1426
1427/*
1428 * This function removes a protection domain to the global
1429 * protection domain list
1430 */
1431static void del_domain_from_list(struct protection_domain *domain)
1432{
1433 unsigned long flags;
1434
1435 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1436 list_del(&domain->list);
1437 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1438}
1439
1440static u16 domain_id_alloc(void)
1441{
1442 unsigned long flags;
1443 int id;
1444
1445 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1446 id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
1447 BUG_ON(id == 0);
1448 if (id > 0 && id < MAX_DOMAIN_ID)
1449 __set_bit(id, amd_iommu_pd_alloc_bitmap);
1450 else
1451 id = 0;
1452 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1453
1454 return id;
1455}
1456
1457static void domain_id_free(int id)
1458{
1459 unsigned long flags;
1460
1461 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1462 if (id > 0 && id < MAX_DOMAIN_ID)
1463 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
1464 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1465}
1466
1467static void free_pagetable(struct protection_domain *domain)
1468{
1469 int i, j;
1470 u64 *p1, *p2, *p3;
1471
1472 p1 = domain->pt_root;
1473
1474 if (!p1)
1475 return;
1476
1477 for (i = 0; i < 512; ++i) {
1478 if (!IOMMU_PTE_PRESENT(p1[i]))
1479 continue;
1480
1481 p2 = IOMMU_PTE_PAGE(p1[i]);
1482 for (j = 0; j < 512; ++j) {
1483 if (!IOMMU_PTE_PRESENT(p2[j]))
1484 continue;
1485 p3 = IOMMU_PTE_PAGE(p2[j]);
1486 free_page((unsigned long)p3);
1487 }
1488
1489 free_page((unsigned long)p2);
1490 }
1491
1492 free_page((unsigned long)p1);
1493
1494 domain->pt_root = NULL;
1495}
1496
1497/*
1498 * Free a domain, only used if something went wrong in the
1499 * allocation path and we need to free an already allocated page table
1500 */
1501static void dma_ops_domain_free(struct dma_ops_domain *dom)
1502{
1503 int i;
1504
1505 if (!dom)
1506 return;
1507
1508 del_domain_from_list(&dom->domain);
1509
1510 free_pagetable(&dom->domain);
1511
1512 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
1513 if (!dom->aperture[i])
1514 continue;
1515 free_page((unsigned long)dom->aperture[i]->bitmap);
1516 kfree(dom->aperture[i]);
1517 }
1518
1519 kfree(dom);
1520}
1521
1522/*
1523 * Allocates a new protection domain usable for the dma_ops functions.
1524 * It also initializes the page table and the address allocator data
1525 * structures required for the dma_ops interface
1526 */
1527static struct dma_ops_domain *dma_ops_domain_alloc(void)
1528{
1529 struct dma_ops_domain *dma_dom;
1530
1531 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
1532 if (!dma_dom)
1533 return NULL;
1534
1535 spin_lock_init(&dma_dom->domain.lock);
1536
1537 dma_dom->domain.id = domain_id_alloc();
1538 if (dma_dom->domain.id == 0)
1539 goto free_dma_dom;
1540 INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1541 dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1542 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1543 dma_dom->domain.flags = PD_DMA_OPS_MASK;
1544 dma_dom->domain.priv = dma_dom;
1545 if (!dma_dom->domain.pt_root)
1546 goto free_dma_dom;
1547
1548 dma_dom->need_flush = false;
1549 dma_dom->target_dev = 0xffff;
1550
1551 add_domain_to_list(&dma_dom->domain);
1552
1553 if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1554 goto free_dma_dom;
1555
1556 /*
1557 * mark the first page as allocated so we never return 0 as
1558 * a valid dma-address. So we can use 0 as error value
1559 */
1560 dma_dom->aperture[0]->bitmap[0] = 1;
1561 dma_dom->next_address = 0;
1562
1563
1564 return dma_dom;
1565
1566free_dma_dom:
1567 dma_ops_domain_free(dma_dom);
1568
1569 return NULL;
1570}
1571
1572/*
1573 * little helper function to check whether a given protection domain is a
1574 * dma_ops domain
1575 */
1576static bool dma_ops_domain(struct protection_domain *domain)
1577{
1578 return domain->flags & PD_DMA_OPS_MASK;
1579}
1580
1581static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1582{
1583 u64 pte_root = virt_to_phys(domain->pt_root);
1584 u32 flags = 0;
1585
1586 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1587 << DEV_ENTRY_MODE_SHIFT;
1588 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1589
1590 if (ats)
1591 flags |= DTE_FLAG_IOTLB;
1592
1593 amd_iommu_dev_table[devid].data[3] |= flags;
1594 amd_iommu_dev_table[devid].data[2] = domain->id;
1595 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1596 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1597}
1598
1599static void clear_dte_entry(u16 devid)
1600{
1601 /* remove entry from the device table seen by the hardware */
1602 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1603 amd_iommu_dev_table[devid].data[1] = 0;
1604 amd_iommu_dev_table[devid].data[2] = 0;
1605
1606 amd_iommu_apply_erratum_63(devid);
1607}
1608
1609static void do_attach(struct iommu_dev_data *dev_data,
1610 struct protection_domain *domain)
1611{
1612 struct amd_iommu *iommu;
1613 bool ats;
1614
1615 iommu = amd_iommu_rlookup_table[dev_data->devid];
1616 ats = dev_data->ats.enabled;
1617
1618 /* Update data structures */
1619 dev_data->domain = domain;
1620 list_add(&dev_data->list, &domain->dev_list);
1621 set_dte_entry(dev_data->devid, domain, ats);
1622
1623 /* Do reference counting */
1624 domain->dev_iommu[iommu->index] += 1;
1625 domain->dev_cnt += 1;
1626
1627 /* Flush the DTE entry */
1628 device_flush_dte(dev_data);
1629}
1630
1631static void do_detach(struct iommu_dev_data *dev_data)
1632{
1633 struct amd_iommu *iommu;
1634
1635 iommu = amd_iommu_rlookup_table[dev_data->devid];
1636
1637 /* decrease reference counters */
1638 dev_data->domain->dev_iommu[iommu->index] -= 1;
1639 dev_data->domain->dev_cnt -= 1;
1640
1641 /* Update data structures */
1642 dev_data->domain = NULL;
1643 list_del(&dev_data->list);
1644 clear_dte_entry(dev_data->devid);
1645
1646 /* Flush the DTE entry */
1647 device_flush_dte(dev_data);
1648}
1649
1650/*
1651 * If a device is not yet associated with a domain, this function does
1652 * assigns it visible for the hardware
1653 */
1654static int __attach_device(struct iommu_dev_data *dev_data,
1655 struct protection_domain *domain)
1656{
1657 int ret;
1658
1659 /* lock domain */
1660 spin_lock(&domain->lock);
1661
1662 if (dev_data->alias_data != NULL) {
1663 struct iommu_dev_data *alias_data = dev_data->alias_data;
1664
1665 /* Some sanity checks */
1666 ret = -EBUSY;
1667 if (alias_data->domain != NULL &&
1668 alias_data->domain != domain)
1669 goto out_unlock;
1670
1671 if (dev_data->domain != NULL &&
1672 dev_data->domain != domain)
1673 goto out_unlock;
1674
1675 /* Do real assignment */
1676 if (alias_data->domain == NULL)
1677 do_attach(alias_data, domain);
1678
1679 atomic_inc(&alias_data->bind);
1680 }
1681
1682 if (dev_data->domain == NULL)
1683 do_attach(dev_data, domain);
1684
1685 atomic_inc(&dev_data->bind);
1686
1687 ret = 0;
1688
1689out_unlock:
1690
1691 /* ready */
1692 spin_unlock(&domain->lock);
1693
1694 return ret;
1695}
1696
1697/*
1698 * If a device is not yet associated with a domain, this function does
1699 * assigns it visible for the hardware
1700 */
1701static int attach_device(struct device *dev,
1702 struct protection_domain *domain)
1703{
1704 struct pci_dev *pdev = to_pci_dev(dev);
1705 struct iommu_dev_data *dev_data;
1706 unsigned long flags;
1707 int ret;
1708
1709 dev_data = get_dev_data(dev);
1710
1711 if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
1712 dev_data->ats.enabled = true;
1713 dev_data->ats.qdep = pci_ats_queue_depth(pdev);
1714 }
1715
1716 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1717 ret = __attach_device(dev_data, domain);
1718 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1719
1720 /*
1721 * We might boot into a crash-kernel here. The crashed kernel
1722 * left the caches in the IOMMU dirty. So we have to flush
1723 * here to evict all dirty stuff.
1724 */
1725 domain_flush_tlb_pde(domain);
1726
1727 return ret;
1728}
1729
1730/*
1731 * Removes a device from a protection domain (unlocked)
1732 */
1733static void __detach_device(struct iommu_dev_data *dev_data)
1734{
1735 struct protection_domain *domain;
1736 unsigned long flags;
1737
1738 BUG_ON(!dev_data->domain);
1739
1740 domain = dev_data->domain;
1741
1742 spin_lock_irqsave(&domain->lock, flags);
1743
1744 if (dev_data->alias_data != NULL) {
1745 struct iommu_dev_data *alias_data = dev_data->alias_data;
1746
1747 if (atomic_dec_and_test(&alias_data->bind))
1748 do_detach(alias_data);
1749 }
1750
1751 if (atomic_dec_and_test(&dev_data->bind))
1752 do_detach(dev_data);
1753
1754 spin_unlock_irqrestore(&domain->lock, flags);
1755
1756 /*
1757 * If we run in passthrough mode the device must be assigned to the
1758 * passthrough domain if it is detached from any other domain.
1759 * Make sure we can deassign from the pt_domain itself.
1760 */
1761 if (iommu_pass_through &&
1762 (dev_data->domain == NULL && domain != pt_domain))
1763 __attach_device(dev_data, pt_domain);
1764}
1765
1766/*
1767 * Removes a device from a protection domain (with devtable_lock held)
1768 */
1769static void detach_device(struct device *dev)
1770{
1771 struct iommu_dev_data *dev_data;
1772 unsigned long flags;
1773
1774 dev_data = get_dev_data(dev);
1775
1776 /* lock device table */
1777 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1778 __detach_device(dev_data);
1779 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1780
1781 if (dev_data->ats.enabled) {
1782 pci_disable_ats(to_pci_dev(dev));
1783 dev_data->ats.enabled = false;
1784 }
1785}
1786
1787/*
1788 * Find out the protection domain structure for a given PCI device. This
1789 * will give us the pointer to the page table root for example.
1790 */
1791static struct protection_domain *domain_for_device(struct device *dev)
1792{
1793 struct iommu_dev_data *dev_data;
1794 struct protection_domain *dom = NULL;
1795 unsigned long flags;
1796
1797 dev_data = get_dev_data(dev);
1798
1799 if (dev_data->domain)
1800 return dev_data->domain;
1801
1802 if (dev_data->alias_data != NULL) {
1803 struct iommu_dev_data *alias_data = dev_data->alias_data;
1804
1805 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1806 if (alias_data->domain != NULL) {
1807 __attach_device(dev_data, alias_data->domain);
1808 dom = alias_data->domain;
1809 }
1810 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1811 }
1812
1813 return dom;
1814}
1815
1816static int device_change_notifier(struct notifier_block *nb,
1817 unsigned long action, void *data)
1818{
1819 struct device *dev = data;
1820 u16 devid;
1821 struct protection_domain *domain;
1822 struct dma_ops_domain *dma_domain;
1823 struct amd_iommu *iommu;
1824 unsigned long flags;
1825
1826 if (!check_device(dev))
1827 return 0;
1828
1829 devid = get_device_id(dev);
1830 iommu = amd_iommu_rlookup_table[devid];
1831
1832 switch (action) {
1833 case BUS_NOTIFY_UNBOUND_DRIVER:
1834
1835 domain = domain_for_device(dev);
1836
1837 if (!domain)
1838 goto out;
1839 if (iommu_pass_through)
1840 break;
1841 detach_device(dev);
1842 break;
1843 case BUS_NOTIFY_ADD_DEVICE:
1844
1845 iommu_init_device(dev);
1846
1847 domain = domain_for_device(dev);
1848
1849 /* allocate a protection domain if a device is added */
1850 dma_domain = find_protection_domain(devid);
1851 if (dma_domain)
1852 goto out;
1853 dma_domain = dma_ops_domain_alloc();
1854 if (!dma_domain)
1855 goto out;
1856 dma_domain->target_dev = devid;
1857
1858 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1859 list_add_tail(&dma_domain->list, &iommu_pd_list);
1860 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1861
1862 break;
1863 case BUS_NOTIFY_DEL_DEVICE:
1864
1865 iommu_uninit_device(dev);
1866
1867 default:
1868 goto out;
1869 }
1870
1871 iommu_completion_wait(iommu);
1872
1873out:
1874 return 0;
1875}
1876
1877static struct notifier_block device_nb = {
1878 .notifier_call = device_change_notifier,
1879};
1880
1881void amd_iommu_init_notifier(void)
1882{
1883 bus_register_notifier(&pci_bus_type, &device_nb);
1884}
1885
1886/*****************************************************************************
1887 *
1888 * The next functions belong to the dma_ops mapping/unmapping code.
1889 *
1890 *****************************************************************************/
1891
1892/*
1893 * In the dma_ops path we only have the struct device. This function
1894 * finds the corresponding IOMMU, the protection domain and the
1895 * requestor id for a given device.
1896 * If the device is not yet associated with a domain this is also done
1897 * in this function.
1898 */
1899static struct protection_domain *get_domain(struct device *dev)
1900{
1901 struct protection_domain *domain;
1902 struct dma_ops_domain *dma_dom;
1903 u16 devid = get_device_id(dev);
1904
1905 if (!check_device(dev))
1906 return ERR_PTR(-EINVAL);
1907
1908 domain = domain_for_device(dev);
1909 if (domain != NULL && !dma_ops_domain(domain))
1910 return ERR_PTR(-EBUSY);
1911
1912 if (domain != NULL)
1913 return domain;
1914
1915 /* Device not bount yet - bind it */
1916 dma_dom = find_protection_domain(devid);
1917 if (!dma_dom)
1918 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1919 attach_device(dev, &dma_dom->domain);
1920 DUMP_printk("Using protection domain %d for device %s\n",
1921 dma_dom->domain.id, dev_name(dev));
1922
1923 return &dma_dom->domain;
1924}
1925
1926static void update_device_table(struct protection_domain *domain)
1927{
1928 struct iommu_dev_data *dev_data;
1929
1930 list_for_each_entry(dev_data, &domain->dev_list, list)
1931 set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
1932}
1933
1934static void update_domain(struct protection_domain *domain)
1935{
1936 if (!domain->updated)
1937 return;
1938
1939 update_device_table(domain);
1940
1941 domain_flush_devices(domain);
1942 domain_flush_tlb_pde(domain);
1943
1944 domain->updated = false;
1945}
1946
1947/*
1948 * This function fetches the PTE for a given address in the aperture
1949 */
1950static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1951 unsigned long address)
1952{
1953 struct aperture_range *aperture;
1954 u64 *pte, *pte_page;
1955
1956 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1957 if (!aperture)
1958 return NULL;
1959
1960 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1961 if (!pte) {
1962 pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
1963 GFP_ATOMIC);
1964 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1965 } else
1966 pte += PM_LEVEL_INDEX(0, address);
1967
1968 update_domain(&dom->domain);
1969
1970 return pte;
1971}
1972
1973/*
1974 * This is the generic map function. It maps one 4kb page at paddr to
1975 * the given address in the DMA address space for the domain.
1976 */
1977static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
1978 unsigned long address,
1979 phys_addr_t paddr,
1980 int direction)
1981{
1982 u64 *pte, __pte;
1983
1984 WARN_ON(address > dom->aperture_size);
1985
1986 paddr &= PAGE_MASK;
1987
1988 pte = dma_ops_get_pte(dom, address);
1989 if (!pte)
1990 return DMA_ERROR_CODE;
1991
1992 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1993
1994 if (direction == DMA_TO_DEVICE)
1995 __pte |= IOMMU_PTE_IR;
1996 else if (direction == DMA_FROM_DEVICE)
1997 __pte |= IOMMU_PTE_IW;
1998 else if (direction == DMA_BIDIRECTIONAL)
1999 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
2000
2001 WARN_ON(*pte);
2002
2003 *pte = __pte;
2004
2005 return (dma_addr_t)address;
2006}
2007
2008/*
2009 * The generic unmapping function for on page in the DMA address space.
2010 */
2011static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
2012 unsigned long address)
2013{
2014 struct aperture_range *aperture;
2015 u64 *pte;
2016
2017 if (address >= dom->aperture_size)
2018 return;
2019
2020 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
2021 if (!aperture)
2022 return;
2023
2024 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
2025 if (!pte)
2026 return;
2027
2028 pte += PM_LEVEL_INDEX(0, address);
2029
2030 WARN_ON(!*pte);
2031
2032 *pte = 0ULL;
2033}
2034
2035/*
2036 * This function contains common code for mapping of a physically
2037 * contiguous memory region into DMA address space. It is used by all
2038 * mapping functions provided with this IOMMU driver.
2039 * Must be called with the domain lock held.
2040 */
2041static dma_addr_t __map_single(struct device *dev,
2042 struct dma_ops_domain *dma_dom,
2043 phys_addr_t paddr,
2044 size_t size,
2045 int dir,
2046 bool align,
2047 u64 dma_mask)
2048{
2049 dma_addr_t offset = paddr & ~PAGE_MASK;
2050 dma_addr_t address, start, ret;
2051 unsigned int pages;
2052 unsigned long align_mask = 0;
2053 int i;
2054
2055 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
2056 paddr &= PAGE_MASK;
2057
2058 INC_STATS_COUNTER(total_map_requests);
2059
2060 if (pages > 1)
2061 INC_STATS_COUNTER(cross_page);
2062
2063 if (align)
2064 align_mask = (1UL << get_order(size)) - 1;
2065
2066retry:
2067 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
2068 dma_mask);
2069 if (unlikely(address == DMA_ERROR_CODE)) {
2070 /*
2071 * setting next_address here will let the address
2072 * allocator only scan the new allocated range in the
2073 * first run. This is a small optimization.
2074 */
2075 dma_dom->next_address = dma_dom->aperture_size;
2076
2077 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
2078 goto out;
2079
2080 /*
2081 * aperture was successfully enlarged by 128 MB, try
2082 * allocation again
2083 */
2084 goto retry;
2085 }
2086
2087 start = address;
2088 for (i = 0; i < pages; ++i) {
2089 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
2090 if (ret == DMA_ERROR_CODE)
2091 goto out_unmap;
2092
2093 paddr += PAGE_SIZE;
2094 start += PAGE_SIZE;
2095 }
2096 address += offset;
2097
2098 ADD_STATS_COUNTER(alloced_io_mem, size);
2099
2100 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
2101 domain_flush_tlb(&dma_dom->domain);
2102 dma_dom->need_flush = false;
2103 } else if (unlikely(amd_iommu_np_cache))
2104 domain_flush_pages(&dma_dom->domain, address, size);
2105
2106out:
2107 return address;
2108
2109out_unmap:
2110
2111 for (--i; i >= 0; --i) {
2112 start -= PAGE_SIZE;
2113 dma_ops_domain_unmap(dma_dom, start);
2114 }
2115
2116 dma_ops_free_addresses(dma_dom, address, pages);
2117
2118 return DMA_ERROR_CODE;
2119}
2120
2121/*
2122 * Does the reverse of the __map_single function. Must be called with
2123 * the domain lock held too
2124 */
2125static void __unmap_single(struct dma_ops_domain *dma_dom,
2126 dma_addr_t dma_addr,
2127 size_t size,
2128 int dir)
2129{
2130 dma_addr_t flush_addr;
2131 dma_addr_t i, start;
2132 unsigned int pages;
2133
2134 if ((dma_addr == DMA_ERROR_CODE) ||
2135 (dma_addr + size > dma_dom->aperture_size))
2136 return;
2137
2138 flush_addr = dma_addr;
2139 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
2140 dma_addr &= PAGE_MASK;
2141 start = dma_addr;
2142
2143 for (i = 0; i < pages; ++i) {
2144 dma_ops_domain_unmap(dma_dom, start);
2145 start += PAGE_SIZE;
2146 }
2147
2148 SUB_STATS_COUNTER(alloced_io_mem, size);
2149
2150 dma_ops_free_addresses(dma_dom, dma_addr, pages);
2151
2152 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
2153 domain_flush_pages(&dma_dom->domain, flush_addr, size);
2154 dma_dom->need_flush = false;
2155 }
2156}
2157
2158/*
2159 * The exported map_single function for dma_ops.
2160 */
2161static dma_addr_t map_page(struct device *dev, struct page *page,
2162 unsigned long offset, size_t size,
2163 enum dma_data_direction dir,
2164 struct dma_attrs *attrs)
2165{
2166 unsigned long flags;
2167 struct protection_domain *domain;
2168 dma_addr_t addr;
2169 u64 dma_mask;
2170 phys_addr_t paddr = page_to_phys(page) + offset;
2171
2172 INC_STATS_COUNTER(cnt_map_single);
2173
2174 domain = get_domain(dev);
2175 if (PTR_ERR(domain) == -EINVAL)
2176 return (dma_addr_t)paddr;
2177 else if (IS_ERR(domain))
2178 return DMA_ERROR_CODE;
2179
2180 dma_mask = *dev->dma_mask;
2181
2182 spin_lock_irqsave(&domain->lock, flags);
2183
2184 addr = __map_single(dev, domain->priv, paddr, size, dir, false,
2185 dma_mask);
2186 if (addr == DMA_ERROR_CODE)
2187 goto out;
2188
2189 domain_flush_complete(domain);
2190
2191out:
2192 spin_unlock_irqrestore(&domain->lock, flags);
2193
2194 return addr;
2195}
2196
2197/*
2198 * The exported unmap_single function for dma_ops.
2199 */
2200static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
2201 enum dma_data_direction dir, struct dma_attrs *attrs)
2202{
2203 unsigned long flags;
2204 struct protection_domain *domain;
2205
2206 INC_STATS_COUNTER(cnt_unmap_single);
2207
2208 domain = get_domain(dev);
2209 if (IS_ERR(domain))
2210 return;
2211
2212 spin_lock_irqsave(&domain->lock, flags);
2213
2214 __unmap_single(domain->priv, dma_addr, size, dir);
2215
2216 domain_flush_complete(domain);
2217
2218 spin_unlock_irqrestore(&domain->lock, flags);
2219}
2220
2221/*
2222 * This is a special map_sg function which is used if we should map a
2223 * device which is not handled by an AMD IOMMU in the system.
2224 */
2225static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
2226 int nelems, int dir)
2227{
2228 struct scatterlist *s;
2229 int i;
2230
2231 for_each_sg(sglist, s, nelems, i) {
2232 s->dma_address = (dma_addr_t)sg_phys(s);
2233 s->dma_length = s->length;
2234 }
2235
2236 return nelems;
2237}
2238
2239/*
2240 * The exported map_sg function for dma_ops (handles scatter-gather
2241 * lists).
2242 */
2243static int map_sg(struct device *dev, struct scatterlist *sglist,
2244 int nelems, enum dma_data_direction dir,
2245 struct dma_attrs *attrs)
2246{
2247 unsigned long flags;
2248 struct protection_domain *domain;
2249 int i;
2250 struct scatterlist *s;
2251 phys_addr_t paddr;
2252 int mapped_elems = 0;
2253 u64 dma_mask;
2254
2255 INC_STATS_COUNTER(cnt_map_sg);
2256
2257 domain = get_domain(dev);
2258 if (PTR_ERR(domain) == -EINVAL)
2259 return map_sg_no_iommu(dev, sglist, nelems, dir);
2260 else if (IS_ERR(domain))
2261 return 0;
2262
2263 dma_mask = *dev->dma_mask;
2264
2265 spin_lock_irqsave(&domain->lock, flags);
2266
2267 for_each_sg(sglist, s, nelems, i) {
2268 paddr = sg_phys(s);
2269
2270 s->dma_address = __map_single(dev, domain->priv,
2271 paddr, s->length, dir, false,
2272 dma_mask);
2273
2274 if (s->dma_address) {
2275 s->dma_length = s->length;
2276 mapped_elems++;
2277 } else
2278 goto unmap;
2279 }
2280
2281 domain_flush_complete(domain);
2282
2283out:
2284 spin_unlock_irqrestore(&domain->lock, flags);
2285
2286 return mapped_elems;
2287unmap:
2288 for_each_sg(sglist, s, mapped_elems, i) {
2289 if (s->dma_address)
2290 __unmap_single(domain->priv, s->dma_address,
2291 s->dma_length, dir);
2292 s->dma_address = s->dma_length = 0;
2293 }
2294
2295 mapped_elems = 0;
2296
2297 goto out;
2298}
2299
2300/*
2301 * The exported map_sg function for dma_ops (handles scatter-gather
2302 * lists).
2303 */
2304static void unmap_sg(struct device *dev, struct scatterlist *sglist,
2305 int nelems, enum dma_data_direction dir,
2306 struct dma_attrs *attrs)
2307{
2308 unsigned long flags;
2309 struct protection_domain *domain;
2310 struct scatterlist *s;
2311 int i;
2312
2313 INC_STATS_COUNTER(cnt_unmap_sg);
2314
2315 domain = get_domain(dev);
2316 if (IS_ERR(domain))
2317 return;
2318
2319 spin_lock_irqsave(&domain->lock, flags);
2320
2321 for_each_sg(sglist, s, nelems, i) {
2322 __unmap_single(domain->priv, s->dma_address,
2323 s->dma_length, dir);
2324 s->dma_address = s->dma_length = 0;
2325 }
2326
2327 domain_flush_complete(domain);
2328
2329 spin_unlock_irqrestore(&domain->lock, flags);
2330}
2331
2332/*
2333 * The exported alloc_coherent function for dma_ops.
2334 */
2335static void *alloc_coherent(struct device *dev, size_t size,
2336 dma_addr_t *dma_addr, gfp_t flag)
2337{
2338 unsigned long flags;
2339 void *virt_addr;
2340 struct protection_domain *domain;
2341 phys_addr_t paddr;
2342 u64 dma_mask = dev->coherent_dma_mask;
2343
2344 INC_STATS_COUNTER(cnt_alloc_coherent);
2345
2346 domain = get_domain(dev);
2347 if (PTR_ERR(domain) == -EINVAL) {
2348 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2349 *dma_addr = __pa(virt_addr);
2350 return virt_addr;
2351 } else if (IS_ERR(domain))
2352 return NULL;
2353
2354 dma_mask = dev->coherent_dma_mask;
2355 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2356 flag |= __GFP_ZERO;
2357
2358 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2359 if (!virt_addr)
2360 return NULL;
2361
2362 paddr = virt_to_phys(virt_addr);
2363
2364 if (!dma_mask)
2365 dma_mask = *dev->dma_mask;
2366
2367 spin_lock_irqsave(&domain->lock, flags);
2368
2369 *dma_addr = __map_single(dev, domain->priv, paddr,
2370 size, DMA_BIDIRECTIONAL, true, dma_mask);
2371
2372 if (*dma_addr == DMA_ERROR_CODE) {
2373 spin_unlock_irqrestore(&domain->lock, flags);
2374 goto out_free;
2375 }
2376
2377 domain_flush_complete(domain);
2378
2379 spin_unlock_irqrestore(&domain->lock, flags);
2380
2381 return virt_addr;
2382
2383out_free:
2384
2385 free_pages((unsigned long)virt_addr, get_order(size));
2386
2387 return NULL;
2388}
2389
2390/*
2391 * The exported free_coherent function for dma_ops.
2392 */
2393static void free_coherent(struct device *dev, size_t size,
2394 void *virt_addr, dma_addr_t dma_addr)
2395{
2396 unsigned long flags;
2397 struct protection_domain *domain;
2398
2399 INC_STATS_COUNTER(cnt_free_coherent);
2400
2401 domain = get_domain(dev);
2402 if (IS_ERR(domain))
2403 goto free_mem;
2404
2405 spin_lock_irqsave(&domain->lock, flags);
2406
2407 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2408
2409 domain_flush_complete(domain);
2410
2411 spin_unlock_irqrestore(&domain->lock, flags);
2412
2413free_mem:
2414 free_pages((unsigned long)virt_addr, get_order(size));
2415}
2416
2417/*
2418 * This function is called by the DMA layer to find out if we can handle a
2419 * particular device. It is part of the dma_ops.
2420 */
2421static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2422{
2423 return check_device(dev);
2424}
2425
2426/*
2427 * The function for pre-allocating protection domains.
2428 *
2429 * If the driver core informs the DMA layer if a driver grabs a device
2430 * we don't need to preallocate the protection domains anymore.
2431 * For now we have to.
2432 */
2433static void prealloc_protection_domains(void)
2434{
2435 struct pci_dev *dev = NULL;
2436 struct dma_ops_domain *dma_dom;
2437 u16 devid;
2438
2439 for_each_pci_dev(dev) {
2440
2441 /* Do we handle this device? */
2442 if (!check_device(&dev->dev))
2443 continue;
2444
2445 /* Is there already any domain for it? */
2446 if (domain_for_device(&dev->dev))
2447 continue;
2448
2449 devid = get_device_id(&dev->dev);
2450
2451 dma_dom = dma_ops_domain_alloc();
2452 if (!dma_dom)
2453 continue;
2454 init_unity_mappings_for_device(dma_dom, devid);
2455 dma_dom->target_dev = devid;
2456
2457 attach_device(&dev->dev, &dma_dom->domain);
2458
2459 list_add_tail(&dma_dom->list, &iommu_pd_list);
2460 }
2461}
2462
2463static struct dma_map_ops amd_iommu_dma_ops = {
2464 .alloc_coherent = alloc_coherent,
2465 .free_coherent = free_coherent,
2466 .map_page = map_page,
2467 .unmap_page = unmap_page,
2468 .map_sg = map_sg,
2469 .unmap_sg = unmap_sg,
2470 .dma_supported = amd_iommu_dma_supported,
2471};
2472
2473static unsigned device_dma_ops_init(void)
2474{
2475 struct pci_dev *pdev = NULL;
2476 unsigned unhandled = 0;
2477
2478 for_each_pci_dev(pdev) {
2479 if (!check_device(&pdev->dev)) {
2480 unhandled += 1;
2481 continue;
2482 }
2483
2484 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
2485 }
2486
2487 return unhandled;
2488}
2489
2490/*
2491 * The function which clues the AMD IOMMU driver into dma_ops.
2492 */
2493
2494void __init amd_iommu_init_api(void)
2495{
2496 register_iommu(&amd_iommu_ops);
2497}
2498
2499int __init amd_iommu_init_dma_ops(void)
2500{
2501 struct amd_iommu *iommu;
2502 int ret, unhandled;
2503
2504 /*
2505 * first allocate a default protection domain for every IOMMU we
2506 * found in the system. Devices not assigned to any other
2507 * protection domain will be assigned to the default one.
2508 */
2509 for_each_iommu(iommu) {
2510 iommu->default_dom = dma_ops_domain_alloc();
2511 if (iommu->default_dom == NULL)
2512 return -ENOMEM;
2513 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
2514 ret = iommu_init_unity_mappings(iommu);
2515 if (ret)
2516 goto free_domains;
2517 }
2518
2519 /*
2520 * Pre-allocate the protection domains for each device.
2521 */
2522 prealloc_protection_domains();
2523
2524 iommu_detected = 1;
2525 swiotlb = 0;
2526
2527 /* Make the driver finally visible to the drivers */
2528 unhandled = device_dma_ops_init();
2529 if (unhandled && max_pfn > MAX_DMA32_PFN) {
2530 /* There are unhandled devices - initialize swiotlb for them */
2531 swiotlb = 1;
2532 }
2533
2534 amd_iommu_stats_init();
2535
2536 return 0;
2537
2538free_domains:
2539
2540 for_each_iommu(iommu) {
2541 if (iommu->default_dom)
2542 dma_ops_domain_free(iommu->default_dom);
2543 }
2544
2545 return ret;
2546}
2547
2548/*****************************************************************************
2549 *
2550 * The following functions belong to the exported interface of AMD IOMMU
2551 *
2552 * This interface allows access to lower level functions of the IOMMU
2553 * like protection domain handling and assignement of devices to domains
2554 * which is not possible with the dma_ops interface.
2555 *
2556 *****************************************************************************/
2557
2558static void cleanup_domain(struct protection_domain *domain)
2559{
2560 struct iommu_dev_data *dev_data, *next;
2561 unsigned long flags;
2562
2563 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2564
2565 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2566 __detach_device(dev_data);
2567 atomic_set(&dev_data->bind, 0);
2568 }
2569
2570 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2571}
2572
2573static void protection_domain_free(struct protection_domain *domain)
2574{
2575 if (!domain)
2576 return;
2577
2578 del_domain_from_list(domain);
2579
2580 if (domain->id)
2581 domain_id_free(domain->id);
2582
2583 kfree(domain);
2584}
2585
2586static struct protection_domain *protection_domain_alloc(void)
2587{
2588 struct protection_domain *domain;
2589
2590 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2591 if (!domain)
2592 return NULL;
2593
2594 spin_lock_init(&domain->lock);
2595 mutex_init(&domain->api_lock);
2596 domain->id = domain_id_alloc();
2597 if (!domain->id)
2598 goto out_err;
2599 INIT_LIST_HEAD(&domain->dev_list);
2600
2601 add_domain_to_list(domain);
2602
2603 return domain;
2604
2605out_err:
2606 kfree(domain);
2607
2608 return NULL;
2609}
2610
2611static int amd_iommu_domain_init(struct iommu_domain *dom)
2612{
2613 struct protection_domain *domain;
2614
2615 domain = protection_domain_alloc();
2616 if (!domain)
2617 goto out_free;
2618
2619 domain->mode = PAGE_MODE_3_LEVEL;
2620 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
2621 if (!domain->pt_root)
2622 goto out_free;
2623
2624 dom->priv = domain;
2625
2626 return 0;
2627
2628out_free:
2629 protection_domain_free(domain);
2630
2631 return -ENOMEM;
2632}
2633
2634static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2635{
2636 struct protection_domain *domain = dom->priv;
2637
2638 if (!domain)
2639 return;
2640
2641 if (domain->dev_cnt > 0)
2642 cleanup_domain(domain);
2643
2644 BUG_ON(domain->dev_cnt != 0);
2645
2646 free_pagetable(domain);
2647
2648 protection_domain_free(domain);
2649
2650 dom->priv = NULL;
2651}
2652
2653static void amd_iommu_detach_device(struct iommu_domain *dom,
2654 struct device *dev)
2655{
2656 struct iommu_dev_data *dev_data = dev->archdata.iommu;
2657 struct amd_iommu *iommu;
2658 u16 devid;
2659
2660 if (!check_device(dev))
2661 return;
2662
2663 devid = get_device_id(dev);
2664
2665 if (dev_data->domain != NULL)
2666 detach_device(dev);
2667
2668 iommu = amd_iommu_rlookup_table[devid];
2669 if (!iommu)
2670 return;
2671
2672 iommu_completion_wait(iommu);
2673}
2674
2675static int amd_iommu_attach_device(struct iommu_domain *dom,
2676 struct device *dev)
2677{
2678 struct protection_domain *domain = dom->priv;
2679 struct iommu_dev_data *dev_data;
2680 struct amd_iommu *iommu;
2681 int ret;
2682
2683 if (!check_device(dev))
2684 return -EINVAL;
2685
2686 dev_data = dev->archdata.iommu;
2687
2688 iommu = amd_iommu_rlookup_table[dev_data->devid];
2689 if (!iommu)
2690 return -EINVAL;
2691
2692 if (dev_data->domain)
2693 detach_device(dev);
2694
2695 ret = attach_device(dev, domain);
2696
2697 iommu_completion_wait(iommu);
2698
2699 return ret;
2700}
2701
2702static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2703 phys_addr_t paddr, int gfp_order, int iommu_prot)
2704{
2705 unsigned long page_size = 0x1000UL << gfp_order;
2706 struct protection_domain *domain = dom->priv;
2707 int prot = 0;
2708 int ret;
2709
2710 if (iommu_prot & IOMMU_READ)
2711 prot |= IOMMU_PROT_IR;
2712 if (iommu_prot & IOMMU_WRITE)
2713 prot |= IOMMU_PROT_IW;
2714
2715 mutex_lock(&domain->api_lock);
2716 ret = iommu_map_page(domain, iova, paddr, prot, page_size);
2717 mutex_unlock(&domain->api_lock);
2718
2719 return ret;
2720}
2721
2722static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2723 int gfp_order)
2724{
2725 struct protection_domain *domain = dom->priv;
2726 unsigned long page_size, unmap_size;
2727
2728 page_size = 0x1000UL << gfp_order;
2729
2730 mutex_lock(&domain->api_lock);
2731 unmap_size = iommu_unmap_page(domain, iova, page_size);
2732 mutex_unlock(&domain->api_lock);
2733
2734 domain_flush_tlb_pde(domain);
2735
2736 return get_order(unmap_size);
2737}
2738
2739static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2740 unsigned long iova)
2741{
2742 struct protection_domain *domain = dom->priv;
2743 unsigned long offset_mask;
2744 phys_addr_t paddr;
2745 u64 *pte, __pte;
2746
2747 pte = fetch_pte(domain, iova);
2748
2749 if (!pte || !IOMMU_PTE_PRESENT(*pte))
2750 return 0;
2751
2752 if (PM_PTE_LEVEL(*pte) == 0)
2753 offset_mask = PAGE_SIZE - 1;
2754 else
2755 offset_mask = PTE_PAGE_SIZE(*pte) - 1;
2756
2757 __pte = *pte & PM_ADDR_MASK;
2758 paddr = (__pte & ~offset_mask) | (iova & offset_mask);
2759
2760 return paddr;
2761}
2762
2763static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2764 unsigned long cap)
2765{
2766 switch (cap) {
2767 case IOMMU_CAP_CACHE_COHERENCY:
2768 return 1;
2769 }
2770
2771 return 0;
2772}
2773
2774static struct iommu_ops amd_iommu_ops = {
2775 .domain_init = amd_iommu_domain_init,
2776 .domain_destroy = amd_iommu_domain_destroy,
2777 .attach_dev = amd_iommu_attach_device,
2778 .detach_dev = amd_iommu_detach_device,
2779 .map = amd_iommu_map,
2780 .unmap = amd_iommu_unmap,
2781 .iova_to_phys = amd_iommu_iova_to_phys,
2782 .domain_has_cap = amd_iommu_domain_has_cap,
2783};
2784
2785/*****************************************************************************
2786 *
2787 * The next functions do a basic initialization of IOMMU for pass through
2788 * mode
2789 *
2790 * In passthrough mode the IOMMU is initialized and enabled but not used for
2791 * DMA-API translation.
2792 *
2793 *****************************************************************************/
2794
2795int __init amd_iommu_init_passthrough(void)
2796{
2797 struct amd_iommu *iommu;
2798 struct pci_dev *dev = NULL;
2799 u16 devid;
2800
2801 /* allocate passthrough domain */
2802 pt_domain = protection_domain_alloc();
2803 if (!pt_domain)
2804 return -ENOMEM;
2805
2806 pt_domain->mode |= PAGE_MODE_NONE;
2807
2808 for_each_pci_dev(dev) {
2809 if (!check_device(&dev->dev))
2810 continue;
2811
2812 devid = get_device_id(&dev->dev);
2813
2814 iommu = amd_iommu_rlookup_table[devid];
2815 if (!iommu)
2816 continue;
2817
2818 attach_device(&dev->dev, pt_domain);
2819 }
2820
2821 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
2822
2823 return 0;
2824}
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
new file mode 100644
index 000000000000..82d2410f4205
--- /dev/null
+++ b/drivers/iommu/amd_iommu_init.c
@@ -0,0 +1,1574 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/acpi.h>
22#include <linux/list.h>
23#include <linux/slab.h>
24#include <linux/syscore_ops.h>
25#include <linux/interrupt.h>
26#include <linux/msi.h>
27#include <linux/amd-iommu.h>
28#include <asm/pci-direct.h>
29#include <asm/iommu.h>
30#include <asm/gart.h>
31#include <asm/x86_init.h>
32#include <asm/iommu_table.h>
33
34#include "amd_iommu_proto.h"
35#include "amd_iommu_types.h"
36
37/*
38 * definitions for the ACPI scanning code
39 */
40#define IVRS_HEADER_LENGTH 48
41
42#define ACPI_IVHD_TYPE 0x10
43#define ACPI_IVMD_TYPE_ALL 0x20
44#define ACPI_IVMD_TYPE 0x21
45#define ACPI_IVMD_TYPE_RANGE 0x22
46
47#define IVHD_DEV_ALL 0x01
48#define IVHD_DEV_SELECT 0x02
49#define IVHD_DEV_SELECT_RANGE_START 0x03
50#define IVHD_DEV_RANGE_END 0x04
51#define IVHD_DEV_ALIAS 0x42
52#define IVHD_DEV_ALIAS_RANGE 0x43
53#define IVHD_DEV_EXT_SELECT 0x46
54#define IVHD_DEV_EXT_SELECT_RANGE 0x47
55
56#define IVHD_FLAG_HT_TUN_EN_MASK 0x01
57#define IVHD_FLAG_PASSPW_EN_MASK 0x02
58#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04
59#define IVHD_FLAG_ISOC_EN_MASK 0x08
60
61#define IVMD_FLAG_EXCL_RANGE 0x08
62#define IVMD_FLAG_UNITY_MAP 0x01
63
64#define ACPI_DEVFLAG_INITPASS 0x01
65#define ACPI_DEVFLAG_EXTINT 0x02
66#define ACPI_DEVFLAG_NMI 0x04
67#define ACPI_DEVFLAG_SYSMGT1 0x10
68#define ACPI_DEVFLAG_SYSMGT2 0x20
69#define ACPI_DEVFLAG_LINT0 0x40
70#define ACPI_DEVFLAG_LINT1 0x80
71#define ACPI_DEVFLAG_ATSDIS 0x10000000
72
73/*
74 * ACPI table definitions
75 *
76 * These data structures are laid over the table to parse the important values
77 * out of it.
78 */
79
80/*
81 * structure describing one IOMMU in the ACPI table. Typically followed by one
82 * or more ivhd_entrys.
83 */
84struct ivhd_header {
85 u8 type;
86 u8 flags;
87 u16 length;
88 u16 devid;
89 u16 cap_ptr;
90 u64 mmio_phys;
91 u16 pci_seg;
92 u16 info;
93 u32 reserved;
94} __attribute__((packed));
95
96/*
97 * A device entry describing which devices a specific IOMMU translates and
98 * which requestor ids they use.
99 */
100struct ivhd_entry {
101 u8 type;
102 u16 devid;
103 u8 flags;
104 u32 ext;
105} __attribute__((packed));
106
107/*
108 * An AMD IOMMU memory definition structure. It defines things like exclusion
109 * ranges for devices and regions that should be unity mapped.
110 */
111struct ivmd_header {
112 u8 type;
113 u8 flags;
114 u16 length;
115 u16 devid;
116 u16 aux;
117 u64 resv;
118 u64 range_start;
119 u64 range_length;
120} __attribute__((packed));
121
122bool amd_iommu_dump;
123
124static int __initdata amd_iommu_detected;
125static bool __initdata amd_iommu_disabled;
126
127u16 amd_iommu_last_bdf; /* largest PCI device id we have
128 to handle */
129LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
130 we find in ACPI */
131bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
132
133LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
134 system */
135
136/* Array to assign indices to IOMMUs*/
137struct amd_iommu *amd_iommus[MAX_IOMMUS];
138int amd_iommus_present;
139
140/* IOMMUs have a non-present cache? */
141bool amd_iommu_np_cache __read_mostly;
142bool amd_iommu_iotlb_sup __read_mostly = true;
143
144/*
145 * The ACPI table parsing functions set this variable on an error
146 */
147static int __initdata amd_iommu_init_err;
148
149/*
150 * List of protection domains - used during resume
151 */
152LIST_HEAD(amd_iommu_pd_list);
153spinlock_t amd_iommu_pd_lock;
154
155/*
156 * Pointer to the device table which is shared by all AMD IOMMUs
157 * it is indexed by the PCI device id or the HT unit id and contains
158 * information about the domain the device belongs to as well as the
159 * page table root pointer.
160 */
161struct dev_table_entry *amd_iommu_dev_table;
162
163/*
164 * The alias table is a driver specific data structure which contains the
165 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
166 * More than one device can share the same requestor id.
167 */
168u16 *amd_iommu_alias_table;
169
170/*
171 * The rlookup table is used to find the IOMMU which is responsible
172 * for a specific device. It is also indexed by the PCI device id.
173 */
174struct amd_iommu **amd_iommu_rlookup_table;
175
176/*
177 * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
178 * to know which ones are already in use.
179 */
180unsigned long *amd_iommu_pd_alloc_bitmap;
181
182static u32 dev_table_size; /* size of the device table */
183static u32 alias_table_size; /* size of the alias table */
184static u32 rlookup_table_size; /* size if the rlookup table */
185
186/*
187 * This function flushes all internal caches of
188 * the IOMMU used by this driver.
189 */
190extern void iommu_flush_all_caches(struct amd_iommu *iommu);
191
192static inline void update_last_devid(u16 devid)
193{
194 if (devid > amd_iommu_last_bdf)
195 amd_iommu_last_bdf = devid;
196}
197
198static inline unsigned long tbl_size(int entry_size)
199{
200 unsigned shift = PAGE_SHIFT +
201 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
202
203 return 1UL << shift;
204}
205
206/* Access to l1 and l2 indexed register spaces */
207
208static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
209{
210 u32 val;
211
212 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
213 pci_read_config_dword(iommu->dev, 0xfc, &val);
214 return val;
215}
216
217static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
218{
219 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
220 pci_write_config_dword(iommu->dev, 0xfc, val);
221 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
222}
223
224static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
225{
226 u32 val;
227
228 pci_write_config_dword(iommu->dev, 0xf0, address);
229 pci_read_config_dword(iommu->dev, 0xf4, &val);
230 return val;
231}
232
233static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
234{
235 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
236 pci_write_config_dword(iommu->dev, 0xf4, val);
237}
238
239/****************************************************************************
240 *
241 * AMD IOMMU MMIO register space handling functions
242 *
243 * These functions are used to program the IOMMU device registers in
244 * MMIO space required for that driver.
245 *
246 ****************************************************************************/
247
248/*
249 * This function set the exclusion range in the IOMMU. DMA accesses to the
250 * exclusion range are passed through untranslated
251 */
252static void iommu_set_exclusion_range(struct amd_iommu *iommu)
253{
254 u64 start = iommu->exclusion_start & PAGE_MASK;
255 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
256 u64 entry;
257
258 if (!iommu->exclusion_start)
259 return;
260
261 entry = start | MMIO_EXCL_ENABLE_MASK;
262 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
263 &entry, sizeof(entry));
264
265 entry = limit;
266 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
267 &entry, sizeof(entry));
268}
269
270/* Programs the physical address of the device table into the IOMMU hardware */
271static void __init iommu_set_device_table(struct amd_iommu *iommu)
272{
273 u64 entry;
274
275 BUG_ON(iommu->mmio_base == NULL);
276
277 entry = virt_to_phys(amd_iommu_dev_table);
278 entry |= (dev_table_size >> 12) - 1;
279 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
280 &entry, sizeof(entry));
281}
282
283/* Generic functions to enable/disable certain features of the IOMMU. */
284static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
285{
286 u32 ctrl;
287
288 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
289 ctrl |= (1 << bit);
290 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
291}
292
293static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
294{
295 u32 ctrl;
296
297 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
298 ctrl &= ~(1 << bit);
299 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
300}
301
302/* Function to enable the hardware */
303static void iommu_enable(struct amd_iommu *iommu)
304{
305 static const char * const feat_str[] = {
306 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
307 "IA", "GA", "HE", "PC", NULL
308 };
309 int i;
310
311 printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
312 dev_name(&iommu->dev->dev), iommu->cap_ptr);
313
314 if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
315 printk(KERN_CONT " extended features: ");
316 for (i = 0; feat_str[i]; ++i)
317 if (iommu_feature(iommu, (1ULL << i)))
318 printk(KERN_CONT " %s", feat_str[i]);
319 }
320 printk(KERN_CONT "\n");
321
322 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
323}
324
325static void iommu_disable(struct amd_iommu *iommu)
326{
327 /* Disable command buffer */
328 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
329
330 /* Disable event logging and event interrupts */
331 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
332 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
333
334 /* Disable IOMMU hardware itself */
335 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
336}
337
338/*
339 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
340 * the system has one.
341 */
342static u8 * __init iommu_map_mmio_space(u64 address)
343{
344 u8 *ret;
345
346 if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
347 pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
348 address);
349 pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
350 return NULL;
351 }
352
353 ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
354 if (ret != NULL)
355 return ret;
356
357 release_mem_region(address, MMIO_REGION_LENGTH);
358
359 return NULL;
360}
361
362static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
363{
364 if (iommu->mmio_base)
365 iounmap(iommu->mmio_base);
366 release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
367}
368
369/****************************************************************************
370 *
371 * The functions below belong to the first pass of AMD IOMMU ACPI table
372 * parsing. In this pass we try to find out the highest device id this
373 * code has to handle. Upon this information the size of the shared data
374 * structures is determined later.
375 *
376 ****************************************************************************/
377
378/*
379 * This function calculates the length of a given IVHD entry
380 */
381static inline int ivhd_entry_length(u8 *ivhd)
382{
383 return 0x04 << (*ivhd >> 6);
384}
385
386/*
387 * This function reads the last device id the IOMMU has to handle from the PCI
388 * capability header for this IOMMU
389 */
390static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
391{
392 u32 cap;
393
394 cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
395 update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
396
397 return 0;
398}
399
400/*
401 * After reading the highest device id from the IOMMU PCI capability header
402 * this function looks if there is a higher device id defined in the ACPI table
403 */
404static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
405{
406 u8 *p = (void *)h, *end = (void *)h;
407 struct ivhd_entry *dev;
408
409 p += sizeof(*h);
410 end += h->length;
411
412 find_last_devid_on_pci(PCI_BUS(h->devid),
413 PCI_SLOT(h->devid),
414 PCI_FUNC(h->devid),
415 h->cap_ptr);
416
417 while (p < end) {
418 dev = (struct ivhd_entry *)p;
419 switch (dev->type) {
420 case IVHD_DEV_SELECT:
421 case IVHD_DEV_RANGE_END:
422 case IVHD_DEV_ALIAS:
423 case IVHD_DEV_EXT_SELECT:
424 /* all the above subfield types refer to device ids */
425 update_last_devid(dev->devid);
426 break;
427 default:
428 break;
429 }
430 p += ivhd_entry_length(p);
431 }
432
433 WARN_ON(p != end);
434
435 return 0;
436}
437
438/*
439 * Iterate over all IVHD entries in the ACPI table and find the highest device
440 * id which we need to handle. This is the first of three functions which parse
441 * the ACPI table. So we check the checksum here.
442 */
443static int __init find_last_devid_acpi(struct acpi_table_header *table)
444{
445 int i;
446 u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
447 struct ivhd_header *h;
448
449 /*
450 * Validate checksum here so we don't need to do it when
451 * we actually parse the table
452 */
453 for (i = 0; i < table->length; ++i)
454 checksum += p[i];
455 if (checksum != 0) {
456 /* ACPI table corrupt */
457 amd_iommu_init_err = -ENODEV;
458 return 0;
459 }
460
461 p += IVRS_HEADER_LENGTH;
462
463 end += table->length;
464 while (p < end) {
465 h = (struct ivhd_header *)p;
466 switch (h->type) {
467 case ACPI_IVHD_TYPE:
468 find_last_devid_from_ivhd(h);
469 break;
470 default:
471 break;
472 }
473 p += h->length;
474 }
475 WARN_ON(p != end);
476
477 return 0;
478}
479
480/****************************************************************************
481 *
482 * The following functions belong the the code path which parses the ACPI table
483 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
484 * data structures, initialize the device/alias/rlookup table and also
485 * basically initialize the hardware.
486 *
487 ****************************************************************************/
488
489/*
490 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
491 * write commands to that buffer later and the IOMMU will execute them
492 * asynchronously
493 */
494static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
495{
496 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
497 get_order(CMD_BUFFER_SIZE));
498
499 if (cmd_buf == NULL)
500 return NULL;
501
502 iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
503
504 return cmd_buf;
505}
506
507/*
508 * This function resets the command buffer if the IOMMU stopped fetching
509 * commands from it.
510 */
511void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
512{
513 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
514
515 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
516 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
517
518 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
519}
520
521/*
522 * This function writes the command buffer address to the hardware and
523 * enables it.
524 */
525static void iommu_enable_command_buffer(struct amd_iommu *iommu)
526{
527 u64 entry;
528
529 BUG_ON(iommu->cmd_buf == NULL);
530
531 entry = (u64)virt_to_phys(iommu->cmd_buf);
532 entry |= MMIO_CMD_SIZE_512;
533
534 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
535 &entry, sizeof(entry));
536
537 amd_iommu_reset_cmd_buffer(iommu);
538 iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
539}
540
541static void __init free_command_buffer(struct amd_iommu *iommu)
542{
543 free_pages((unsigned long)iommu->cmd_buf,
544 get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
545}
546
547/* allocates the memory where the IOMMU will log its events to */
548static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
549{
550 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
551 get_order(EVT_BUFFER_SIZE));
552
553 if (iommu->evt_buf == NULL)
554 return NULL;
555
556 iommu->evt_buf_size = EVT_BUFFER_SIZE;
557
558 return iommu->evt_buf;
559}
560
561static void iommu_enable_event_buffer(struct amd_iommu *iommu)
562{
563 u64 entry;
564
565 BUG_ON(iommu->evt_buf == NULL);
566
567 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
568
569 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
570 &entry, sizeof(entry));
571
572 /* set head and tail to zero manually */
573 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
574 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
575
576 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
577}
578
579static void __init free_event_buffer(struct amd_iommu *iommu)
580{
581 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
582}
583
584/* sets a specific bit in the device table entry. */
585static void set_dev_entry_bit(u16 devid, u8 bit)
586{
587 int i = (bit >> 5) & 0x07;
588 int _bit = bit & 0x1f;
589
590 amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
591}
592
593static int get_dev_entry_bit(u16 devid, u8 bit)
594{
595 int i = (bit >> 5) & 0x07;
596 int _bit = bit & 0x1f;
597
598 return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
599}
600
601
602void amd_iommu_apply_erratum_63(u16 devid)
603{
604 int sysmgt;
605
606 sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
607 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
608
609 if (sysmgt == 0x01)
610 set_dev_entry_bit(devid, DEV_ENTRY_IW);
611}
612
613/* Writes the specific IOMMU for a device into the rlookup table */
614static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
615{
616 amd_iommu_rlookup_table[devid] = iommu;
617}
618
619/*
620 * This function takes the device specific flags read from the ACPI
621 * table and sets up the device table entry with that information
622 */
623static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
624 u16 devid, u32 flags, u32 ext_flags)
625{
626 if (flags & ACPI_DEVFLAG_INITPASS)
627 set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
628 if (flags & ACPI_DEVFLAG_EXTINT)
629 set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
630 if (flags & ACPI_DEVFLAG_NMI)
631 set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
632 if (flags & ACPI_DEVFLAG_SYSMGT1)
633 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
634 if (flags & ACPI_DEVFLAG_SYSMGT2)
635 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
636 if (flags & ACPI_DEVFLAG_LINT0)
637 set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
638 if (flags & ACPI_DEVFLAG_LINT1)
639 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
640
641 amd_iommu_apply_erratum_63(devid);
642
643 set_iommu_for_device(iommu, devid);
644}
645
646/*
647 * Reads the device exclusion range from ACPI and initialize IOMMU with
648 * it
649 */
650static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
651{
652 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
653
654 if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
655 return;
656
657 if (iommu) {
658 /*
659 * We only can configure exclusion ranges per IOMMU, not
660 * per device. But we can enable the exclusion range per
661 * device. This is done here
662 */
663 set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
664 iommu->exclusion_start = m->range_start;
665 iommu->exclusion_length = m->range_length;
666 }
667}
668
669/*
670 * This function reads some important data from the IOMMU PCI space and
671 * initializes the driver data structure with it. It reads the hardware
672 * capabilities and the first/last device entries
673 */
674static void __init init_iommu_from_pci(struct amd_iommu *iommu)
675{
676 int cap_ptr = iommu->cap_ptr;
677 u32 range, misc, low, high;
678 int i, j;
679
680 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
681 &iommu->cap);
682 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
683 &range);
684 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
685 &misc);
686
687 iommu->first_device = calc_devid(MMIO_GET_BUS(range),
688 MMIO_GET_FD(range));
689 iommu->last_device = calc_devid(MMIO_GET_BUS(range),
690 MMIO_GET_LD(range));
691 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
692
693 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
694 amd_iommu_iotlb_sup = false;
695
696 /* read extended feature bits */
697 low = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
698 high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
699
700 iommu->features = ((u64)high << 32) | low;
701
702 if (!is_rd890_iommu(iommu->dev))
703 return;
704
705 /*
706 * Some rd890 systems may not be fully reconfigured by the BIOS, so
707 * it's necessary for us to store this information so it can be
708 * reprogrammed on resume
709 */
710
711 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
712 &iommu->stored_addr_lo);
713 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
714 &iommu->stored_addr_hi);
715
716 /* Low bit locks writes to configuration space */
717 iommu->stored_addr_lo &= ~1;
718
719 for (i = 0; i < 6; i++)
720 for (j = 0; j < 0x12; j++)
721 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
722
723 for (i = 0; i < 0x83; i++)
724 iommu->stored_l2[i] = iommu_read_l2(iommu, i);
725}
726
727/*
728 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
729 * initializes the hardware and our data structures with it.
730 */
731static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
732 struct ivhd_header *h)
733{
734 u8 *p = (u8 *)h;
735 u8 *end = p, flags = 0;
736 u16 devid = 0, devid_start = 0, devid_to = 0;
737 u32 dev_i, ext_flags = 0;
738 bool alias = false;
739 struct ivhd_entry *e;
740
741 /*
742 * First save the recommended feature enable bits from ACPI
743 */
744 iommu->acpi_flags = h->flags;
745
746 /*
747 * Done. Now parse the device entries
748 */
749 p += sizeof(struct ivhd_header);
750 end += h->length;
751
752
753 while (p < end) {
754 e = (struct ivhd_entry *)p;
755 switch (e->type) {
756 case IVHD_DEV_ALL:
757
758 DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
759 " last device %02x:%02x.%x flags: %02x\n",
760 PCI_BUS(iommu->first_device),
761 PCI_SLOT(iommu->first_device),
762 PCI_FUNC(iommu->first_device),
763 PCI_BUS(iommu->last_device),
764 PCI_SLOT(iommu->last_device),
765 PCI_FUNC(iommu->last_device),
766 e->flags);
767
768 for (dev_i = iommu->first_device;
769 dev_i <= iommu->last_device; ++dev_i)
770 set_dev_entry_from_acpi(iommu, dev_i,
771 e->flags, 0);
772 break;
773 case IVHD_DEV_SELECT:
774
775 DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
776 "flags: %02x\n",
777 PCI_BUS(e->devid),
778 PCI_SLOT(e->devid),
779 PCI_FUNC(e->devid),
780 e->flags);
781
782 devid = e->devid;
783 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
784 break;
785 case IVHD_DEV_SELECT_RANGE_START:
786
787 DUMP_printk(" DEV_SELECT_RANGE_START\t "
788 "devid: %02x:%02x.%x flags: %02x\n",
789 PCI_BUS(e->devid),
790 PCI_SLOT(e->devid),
791 PCI_FUNC(e->devid),
792 e->flags);
793
794 devid_start = e->devid;
795 flags = e->flags;
796 ext_flags = 0;
797 alias = false;
798 break;
799 case IVHD_DEV_ALIAS:
800
801 DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
802 "flags: %02x devid_to: %02x:%02x.%x\n",
803 PCI_BUS(e->devid),
804 PCI_SLOT(e->devid),
805 PCI_FUNC(e->devid),
806 e->flags,
807 PCI_BUS(e->ext >> 8),
808 PCI_SLOT(e->ext >> 8),
809 PCI_FUNC(e->ext >> 8));
810
811 devid = e->devid;
812 devid_to = e->ext >> 8;
813 set_dev_entry_from_acpi(iommu, devid , e->flags, 0);
814 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
815 amd_iommu_alias_table[devid] = devid_to;
816 break;
817 case IVHD_DEV_ALIAS_RANGE:
818
819 DUMP_printk(" DEV_ALIAS_RANGE\t\t "
820 "devid: %02x:%02x.%x flags: %02x "
821 "devid_to: %02x:%02x.%x\n",
822 PCI_BUS(e->devid),
823 PCI_SLOT(e->devid),
824 PCI_FUNC(e->devid),
825 e->flags,
826 PCI_BUS(e->ext >> 8),
827 PCI_SLOT(e->ext >> 8),
828 PCI_FUNC(e->ext >> 8));
829
830 devid_start = e->devid;
831 flags = e->flags;
832 devid_to = e->ext >> 8;
833 ext_flags = 0;
834 alias = true;
835 break;
836 case IVHD_DEV_EXT_SELECT:
837
838 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
839 "flags: %02x ext: %08x\n",
840 PCI_BUS(e->devid),
841 PCI_SLOT(e->devid),
842 PCI_FUNC(e->devid),
843 e->flags, e->ext);
844
845 devid = e->devid;
846 set_dev_entry_from_acpi(iommu, devid, e->flags,
847 e->ext);
848 break;
849 case IVHD_DEV_EXT_SELECT_RANGE:
850
851 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
852 "%02x:%02x.%x flags: %02x ext: %08x\n",
853 PCI_BUS(e->devid),
854 PCI_SLOT(e->devid),
855 PCI_FUNC(e->devid),
856 e->flags, e->ext);
857
858 devid_start = e->devid;
859 flags = e->flags;
860 ext_flags = e->ext;
861 alias = false;
862 break;
863 case IVHD_DEV_RANGE_END:
864
865 DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
866 PCI_BUS(e->devid),
867 PCI_SLOT(e->devid),
868 PCI_FUNC(e->devid));
869
870 devid = e->devid;
871 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
872 if (alias) {
873 amd_iommu_alias_table[dev_i] = devid_to;
874 set_dev_entry_from_acpi(iommu,
875 devid_to, flags, ext_flags);
876 }
877 set_dev_entry_from_acpi(iommu, dev_i,
878 flags, ext_flags);
879 }
880 break;
881 default:
882 break;
883 }
884
885 p += ivhd_entry_length(p);
886 }
887}
888
889/* Initializes the device->iommu mapping for the driver */
890static int __init init_iommu_devices(struct amd_iommu *iommu)
891{
892 u32 i;
893
894 for (i = iommu->first_device; i <= iommu->last_device; ++i)
895 set_iommu_for_device(iommu, i);
896
897 return 0;
898}
899
900static void __init free_iommu_one(struct amd_iommu *iommu)
901{
902 free_command_buffer(iommu);
903 free_event_buffer(iommu);
904 iommu_unmap_mmio_space(iommu);
905}
906
907static void __init free_iommu_all(void)
908{
909 struct amd_iommu *iommu, *next;
910
911 for_each_iommu_safe(iommu, next) {
912 list_del(&iommu->list);
913 free_iommu_one(iommu);
914 kfree(iommu);
915 }
916}
917
918/*
919 * This function clues the initialization function for one IOMMU
920 * together and also allocates the command buffer and programs the
921 * hardware. It does NOT enable the IOMMU. This is done afterwards.
922 */
923static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
924{
925 spin_lock_init(&iommu->lock);
926
927 /* Add IOMMU to internal data structures */
928 list_add_tail(&iommu->list, &amd_iommu_list);
929 iommu->index = amd_iommus_present++;
930
931 if (unlikely(iommu->index >= MAX_IOMMUS)) {
932 WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
933 return -ENOSYS;
934 }
935
936 /* Index is fine - add IOMMU to the array */
937 amd_iommus[iommu->index] = iommu;
938
939 /*
940 * Copy data from ACPI table entry to the iommu struct
941 */
942 iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff);
943 if (!iommu->dev)
944 return 1;
945
946 iommu->cap_ptr = h->cap_ptr;
947 iommu->pci_seg = h->pci_seg;
948 iommu->mmio_phys = h->mmio_phys;
949 iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
950 if (!iommu->mmio_base)
951 return -ENOMEM;
952
953 iommu->cmd_buf = alloc_command_buffer(iommu);
954 if (!iommu->cmd_buf)
955 return -ENOMEM;
956
957 iommu->evt_buf = alloc_event_buffer(iommu);
958 if (!iommu->evt_buf)
959 return -ENOMEM;
960
961 iommu->int_enabled = false;
962
963 init_iommu_from_pci(iommu);
964 init_iommu_from_acpi(iommu, h);
965 init_iommu_devices(iommu);
966
967 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
968 amd_iommu_np_cache = true;
969
970 return pci_enable_device(iommu->dev);
971}
972
973/*
974 * Iterates over all IOMMU entries in the ACPI table, allocates the
975 * IOMMU structure and initializes it with init_iommu_one()
976 */
977static int __init init_iommu_all(struct acpi_table_header *table)
978{
979 u8 *p = (u8 *)table, *end = (u8 *)table;
980 struct ivhd_header *h;
981 struct amd_iommu *iommu;
982 int ret;
983
984 end += table->length;
985 p += IVRS_HEADER_LENGTH;
986
987 while (p < end) {
988 h = (struct ivhd_header *)p;
989 switch (*p) {
990 case ACPI_IVHD_TYPE:
991
992 DUMP_printk("device: %02x:%02x.%01x cap: %04x "
993 "seg: %d flags: %01x info %04x\n",
994 PCI_BUS(h->devid), PCI_SLOT(h->devid),
995 PCI_FUNC(h->devid), h->cap_ptr,
996 h->pci_seg, h->flags, h->info);
997 DUMP_printk(" mmio-addr: %016llx\n",
998 h->mmio_phys);
999
1000 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1001 if (iommu == NULL) {
1002 amd_iommu_init_err = -ENOMEM;
1003 return 0;
1004 }
1005
1006 ret = init_iommu_one(iommu, h);
1007 if (ret) {
1008 amd_iommu_init_err = ret;
1009 return 0;
1010 }
1011 break;
1012 default:
1013 break;
1014 }
1015 p += h->length;
1016
1017 }
1018 WARN_ON(p != end);
1019
1020 return 0;
1021}
1022
1023/****************************************************************************
1024 *
1025 * The following functions initialize the MSI interrupts for all IOMMUs
1026 * in the system. Its a bit challenging because there could be multiple
1027 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
1028 * pci_dev.
1029 *
1030 ****************************************************************************/
1031
1032static int iommu_setup_msi(struct amd_iommu *iommu)
1033{
1034 int r;
1035
1036 if (pci_enable_msi(iommu->dev))
1037 return 1;
1038
1039 r = request_threaded_irq(iommu->dev->irq,
1040 amd_iommu_int_handler,
1041 amd_iommu_int_thread,
1042 0, "AMD-Vi",
1043 iommu->dev);
1044
1045 if (r) {
1046 pci_disable_msi(iommu->dev);
1047 return 1;
1048 }
1049
1050 iommu->int_enabled = true;
1051 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
1052
1053 return 0;
1054}
1055
1056static int iommu_init_msi(struct amd_iommu *iommu)
1057{
1058 if (iommu->int_enabled)
1059 return 0;
1060
1061 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
1062 return iommu_setup_msi(iommu);
1063
1064 return 1;
1065}
1066
1067/****************************************************************************
1068 *
1069 * The next functions belong to the third pass of parsing the ACPI
1070 * table. In this last pass the memory mapping requirements are
1071 * gathered (like exclusion and unity mapping reanges).
1072 *
1073 ****************************************************************************/
1074
1075static void __init free_unity_maps(void)
1076{
1077 struct unity_map_entry *entry, *next;
1078
1079 list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
1080 list_del(&entry->list);
1081 kfree(entry);
1082 }
1083}
1084
1085/* called when we find an exclusion range definition in ACPI */
1086static int __init init_exclusion_range(struct ivmd_header *m)
1087{
1088 int i;
1089
1090 switch (m->type) {
1091 case ACPI_IVMD_TYPE:
1092 set_device_exclusion_range(m->devid, m);
1093 break;
1094 case ACPI_IVMD_TYPE_ALL:
1095 for (i = 0; i <= amd_iommu_last_bdf; ++i)
1096 set_device_exclusion_range(i, m);
1097 break;
1098 case ACPI_IVMD_TYPE_RANGE:
1099 for (i = m->devid; i <= m->aux; ++i)
1100 set_device_exclusion_range(i, m);
1101 break;
1102 default:
1103 break;
1104 }
1105
1106 return 0;
1107}
1108
1109/* called for unity map ACPI definition */
1110static int __init init_unity_map_range(struct ivmd_header *m)
1111{
1112 struct unity_map_entry *e = 0;
1113 char *s;
1114
1115 e = kzalloc(sizeof(*e), GFP_KERNEL);
1116 if (e == NULL)
1117 return -ENOMEM;
1118
1119 switch (m->type) {
1120 default:
1121 kfree(e);
1122 return 0;
1123 case ACPI_IVMD_TYPE:
1124 s = "IVMD_TYPEi\t\t\t";
1125 e->devid_start = e->devid_end = m->devid;
1126 break;
1127 case ACPI_IVMD_TYPE_ALL:
1128 s = "IVMD_TYPE_ALL\t\t";
1129 e->devid_start = 0;
1130 e->devid_end = amd_iommu_last_bdf;
1131 break;
1132 case ACPI_IVMD_TYPE_RANGE:
1133 s = "IVMD_TYPE_RANGE\t\t";
1134 e->devid_start = m->devid;
1135 e->devid_end = m->aux;
1136 break;
1137 }
1138 e->address_start = PAGE_ALIGN(m->range_start);
1139 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
1140 e->prot = m->flags >> 1;
1141
1142 DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
1143 " range_start: %016llx range_end: %016llx flags: %x\n", s,
1144 PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
1145 PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
1146 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
1147 e->address_start, e->address_end, m->flags);
1148
1149 list_add_tail(&e->list, &amd_iommu_unity_map);
1150
1151 return 0;
1152}
1153
1154/* iterates over all memory definitions we find in the ACPI table */
1155static int __init init_memory_definitions(struct acpi_table_header *table)
1156{
1157 u8 *p = (u8 *)table, *end = (u8 *)table;
1158 struct ivmd_header *m;
1159
1160 end += table->length;
1161 p += IVRS_HEADER_LENGTH;
1162
1163 while (p < end) {
1164 m = (struct ivmd_header *)p;
1165 if (m->flags & IVMD_FLAG_EXCL_RANGE)
1166 init_exclusion_range(m);
1167 else if (m->flags & IVMD_FLAG_UNITY_MAP)
1168 init_unity_map_range(m);
1169
1170 p += m->length;
1171 }
1172
1173 return 0;
1174}
1175
1176/*
1177 * Init the device table to not allow DMA access for devices and
1178 * suppress all page faults
1179 */
1180static void init_device_table(void)
1181{
1182 u32 devid;
1183
1184 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
1185 set_dev_entry_bit(devid, DEV_ENTRY_VALID);
1186 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
1187 }
1188}
1189
1190static void iommu_init_flags(struct amd_iommu *iommu)
1191{
1192 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
1193 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
1194 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
1195
1196 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
1197 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
1198 iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
1199
1200 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
1201 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
1202 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
1203
1204 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
1205 iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
1206 iommu_feature_disable(iommu, CONTROL_ISOC_EN);
1207
1208 /*
1209 * make IOMMU memory accesses cache coherent
1210 */
1211 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1212}
1213
1214static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1215{
1216 int i, j;
1217 u32 ioc_feature_control;
1218 struct pci_dev *pdev = NULL;
1219
1220 /* RD890 BIOSes may not have completely reconfigured the iommu */
1221 if (!is_rd890_iommu(iommu->dev))
1222 return;
1223
1224 /*
1225 * First, we need to ensure that the iommu is enabled. This is
1226 * controlled by a register in the northbridge
1227 */
1228 pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
1229
1230 if (!pdev)
1231 return;
1232
1233 /* Select Northbridge indirect register 0x75 and enable writing */
1234 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
1235 pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
1236
1237 /* Enable the iommu */
1238 if (!(ioc_feature_control & 0x1))
1239 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
1240
1241 pci_dev_put(pdev);
1242
1243 /* Restore the iommu BAR */
1244 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1245 iommu->stored_addr_lo);
1246 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
1247 iommu->stored_addr_hi);
1248
1249 /* Restore the l1 indirect regs for each of the 6 l1s */
1250 for (i = 0; i < 6; i++)
1251 for (j = 0; j < 0x12; j++)
1252 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
1253
1254 /* Restore the l2 indirect regs */
1255 for (i = 0; i < 0x83; i++)
1256 iommu_write_l2(iommu, i, iommu->stored_l2[i]);
1257
1258 /* Lock PCI setup registers */
1259 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1260 iommu->stored_addr_lo | 1);
1261}
1262
1263/*
1264 * This function finally enables all IOMMUs found in the system after
1265 * they have been initialized
1266 */
1267static void enable_iommus(void)
1268{
1269 struct amd_iommu *iommu;
1270
1271 for_each_iommu(iommu) {
1272 iommu_disable(iommu);
1273 iommu_init_flags(iommu);
1274 iommu_set_device_table(iommu);
1275 iommu_enable_command_buffer(iommu);
1276 iommu_enable_event_buffer(iommu);
1277 iommu_set_exclusion_range(iommu);
1278 iommu_init_msi(iommu);
1279 iommu_enable(iommu);
1280 iommu_flush_all_caches(iommu);
1281 }
1282}
1283
1284static void disable_iommus(void)
1285{
1286 struct amd_iommu *iommu;
1287
1288 for_each_iommu(iommu)
1289 iommu_disable(iommu);
1290}
1291
1292/*
1293 * Suspend/Resume support
1294 * disable suspend until real resume implemented
1295 */
1296
1297static void amd_iommu_resume(void)
1298{
1299 struct amd_iommu *iommu;
1300
1301 for_each_iommu(iommu)
1302 iommu_apply_resume_quirks(iommu);
1303
1304 /* re-load the hardware */
1305 enable_iommus();
1306
1307 /*
1308 * we have to flush after the IOMMUs are enabled because a
1309 * disabled IOMMU will never execute the commands we send
1310 */
1311 for_each_iommu(iommu)
1312 iommu_flush_all_caches(iommu);
1313}
1314
1315static int amd_iommu_suspend(void)
1316{
1317 /* disable IOMMUs to go out of the way for BIOS */
1318 disable_iommus();
1319
1320 return 0;
1321}
1322
1323static struct syscore_ops amd_iommu_syscore_ops = {
1324 .suspend = amd_iommu_suspend,
1325 .resume = amd_iommu_resume,
1326};
1327
1328/*
1329 * This is the core init function for AMD IOMMU hardware in the system.
1330 * This function is called from the generic x86 DMA layer initialization
1331 * code.
1332 *
1333 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
1334 * three times:
1335 *
1336 * 1 pass) Find the highest PCI device id the driver has to handle.
1337 * Upon this information the size of the data structures is
1338 * determined that needs to be allocated.
1339 *
1340 * 2 pass) Initialize the data structures just allocated with the
1341 * information in the ACPI table about available AMD IOMMUs
1342 * in the system. It also maps the PCI devices in the
1343 * system to specific IOMMUs
1344 *
1345 * 3 pass) After the basic data structures are allocated and
1346 * initialized we update them with information about memory
1347 * remapping requirements parsed out of the ACPI table in
1348 * this last pass.
1349 *
1350 * After that the hardware is initialized and ready to go. In the last
1351 * step we do some Linux specific things like registering the driver in
1352 * the dma_ops interface and initializing the suspend/resume support
1353 * functions. Finally it prints some information about AMD IOMMUs and
1354 * the driver state and enables the hardware.
1355 */
1356static int __init amd_iommu_init(void)
1357{
1358 int i, ret = 0;
1359
1360 /*
1361 * First parse ACPI tables to find the largest Bus/Dev/Func
1362 * we need to handle. Upon this information the shared data
1363 * structures for the IOMMUs in the system will be allocated
1364 */
1365 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
1366 return -ENODEV;
1367
1368 ret = amd_iommu_init_err;
1369 if (ret)
1370 goto out;
1371
1372 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
1373 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
1374 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
1375
1376 ret = -ENOMEM;
1377
1378 /* Device table - directly used by all IOMMUs */
1379 amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1380 get_order(dev_table_size));
1381 if (amd_iommu_dev_table == NULL)
1382 goto out;
1383
1384 /*
1385 * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
1386 * IOMMU see for that device
1387 */
1388 amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
1389 get_order(alias_table_size));
1390 if (amd_iommu_alias_table == NULL)
1391 goto free;
1392
1393 /* IOMMU rlookup table - find the IOMMU for a specific device */
1394 amd_iommu_rlookup_table = (void *)__get_free_pages(
1395 GFP_KERNEL | __GFP_ZERO,
1396 get_order(rlookup_table_size));
1397 if (amd_iommu_rlookup_table == NULL)
1398 goto free;
1399
1400 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
1401 GFP_KERNEL | __GFP_ZERO,
1402 get_order(MAX_DOMAIN_ID/8));
1403 if (amd_iommu_pd_alloc_bitmap == NULL)
1404 goto free;
1405
1406 /* init the device table */
1407 init_device_table();
1408
1409 /*
1410 * let all alias entries point to itself
1411 */
1412 for (i = 0; i <= amd_iommu_last_bdf; ++i)
1413 amd_iommu_alias_table[i] = i;
1414
1415 /*
1416 * never allocate domain 0 because its used as the non-allocated and
1417 * error value placeholder
1418 */
1419 amd_iommu_pd_alloc_bitmap[0] = 1;
1420
1421 spin_lock_init(&amd_iommu_pd_lock);
1422
1423 /*
1424 * now the data structures are allocated and basically initialized
1425 * start the real acpi table scan
1426 */
1427 ret = -ENODEV;
1428 if (acpi_table_parse("IVRS", init_iommu_all) != 0)
1429 goto free;
1430
1431 if (amd_iommu_init_err) {
1432 ret = amd_iommu_init_err;
1433 goto free;
1434 }
1435
1436 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
1437 goto free;
1438
1439 if (amd_iommu_init_err) {
1440 ret = amd_iommu_init_err;
1441 goto free;
1442 }
1443
1444 ret = amd_iommu_init_devices();
1445 if (ret)
1446 goto free;
1447
1448 enable_iommus();
1449
1450 if (iommu_pass_through)
1451 ret = amd_iommu_init_passthrough();
1452 else
1453 ret = amd_iommu_init_dma_ops();
1454
1455 if (ret)
1456 goto free_disable;
1457
1458 amd_iommu_init_api();
1459
1460 amd_iommu_init_notifier();
1461
1462 register_syscore_ops(&amd_iommu_syscore_ops);
1463
1464 if (iommu_pass_through)
1465 goto out;
1466
1467 if (amd_iommu_unmap_flush)
1468 printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
1469 else
1470 printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
1471
1472 x86_platform.iommu_shutdown = disable_iommus;
1473out:
1474 return ret;
1475
1476free_disable:
1477 disable_iommus();
1478
1479free:
1480 amd_iommu_uninit_devices();
1481
1482 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1483 get_order(MAX_DOMAIN_ID/8));
1484
1485 free_pages((unsigned long)amd_iommu_rlookup_table,
1486 get_order(rlookup_table_size));
1487
1488 free_pages((unsigned long)amd_iommu_alias_table,
1489 get_order(alias_table_size));
1490
1491 free_pages((unsigned long)amd_iommu_dev_table,
1492 get_order(dev_table_size));
1493
1494 free_iommu_all();
1495
1496 free_unity_maps();
1497
1498#ifdef CONFIG_GART_IOMMU
1499 /*
1500 * We failed to initialize the AMD IOMMU - try fallback to GART
1501 * if possible.
1502 */
1503 gart_iommu_init();
1504
1505#endif
1506
1507 goto out;
1508}
1509
1510/****************************************************************************
1511 *
1512 * Early detect code. This code runs at IOMMU detection time in the DMA
1513 * layer. It just looks if there is an IVRS ACPI table to detect AMD
1514 * IOMMUs
1515 *
1516 ****************************************************************************/
1517static int __init early_amd_iommu_detect(struct acpi_table_header *table)
1518{
1519 return 0;
1520}
1521
1522int __init amd_iommu_detect(void)
1523{
1524 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1525 return -ENODEV;
1526
1527 if (amd_iommu_disabled)
1528 return -ENODEV;
1529
1530 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
1531 iommu_detected = 1;
1532 amd_iommu_detected = 1;
1533 x86_init.iommu.iommu_init = amd_iommu_init;
1534
1535 /* Make sure ACS will be enabled */
1536 pci_request_acs();
1537 return 1;
1538 }
1539 return -ENODEV;
1540}
1541
1542/****************************************************************************
1543 *
1544 * Parsing functions for the AMD IOMMU specific kernel command line
1545 * options.
1546 *
1547 ****************************************************************************/
1548
1549static int __init parse_amd_iommu_dump(char *str)
1550{
1551 amd_iommu_dump = true;
1552
1553 return 1;
1554}
1555
1556static int __init parse_amd_iommu_options(char *str)
1557{
1558 for (; *str; ++str) {
1559 if (strncmp(str, "fullflush", 9) == 0)
1560 amd_iommu_unmap_flush = true;
1561 if (strncmp(str, "off", 3) == 0)
1562 amd_iommu_disabled = true;
1563 }
1564
1565 return 1;
1566}
1567
1568__setup("amd_iommu_dump", parse_amd_iommu_dump);
1569__setup("amd_iommu=", parse_amd_iommu_options);
1570
1571IOMMU_INIT_FINISH(amd_iommu_detect,
1572 gart_iommu_hole_init,
1573 0,
1574 0);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
new file mode 100644
index 000000000000..7ffaa64410b0
--- /dev/null
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -0,0 +1,54 @@
1/*
2 * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
20#define _ASM_X86_AMD_IOMMU_PROTO_H
21
22#include "amd_iommu_types.h"
23
24extern int amd_iommu_init_dma_ops(void);
25extern int amd_iommu_init_passthrough(void);
26extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
27extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
28extern void amd_iommu_apply_erratum_63(u16 devid);
29extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
30extern int amd_iommu_init_devices(void);
31extern void amd_iommu_uninit_devices(void);
32extern void amd_iommu_init_notifier(void);
33extern void amd_iommu_init_api(void);
34#ifndef CONFIG_AMD_IOMMU_STATS
35
36static inline void amd_iommu_stats_init(void) { }
37
38#endif /* !CONFIG_AMD_IOMMU_STATS */
39
40static inline bool is_rd890_iommu(struct pci_dev *pdev)
41{
42 return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
43 (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
44}
45
46static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
47{
48 if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
49 return false;
50
51 return !!(iommu->features & f);
52}
53
54#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
new file mode 100644
index 000000000000..5b9c5075e81a
--- /dev/null
+++ b/drivers/iommu/amd_iommu_types.h
@@ -0,0 +1,585 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#ifndef _ASM_X86_AMD_IOMMU_TYPES_H
21#define _ASM_X86_AMD_IOMMU_TYPES_H
22
23#include <linux/types.h>
24#include <linux/mutex.h>
25#include <linux/list.h>
26#include <linux/spinlock.h>
27
28/*
29 * Maximum number of IOMMUs supported
30 */
31#define MAX_IOMMUS 32
32
33/*
34 * some size calculation constants
35 */
36#define DEV_TABLE_ENTRY_SIZE 32
37#define ALIAS_TABLE_ENTRY_SIZE 2
38#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
39
40/* Length of the MMIO region for the AMD IOMMU */
41#define MMIO_REGION_LENGTH 0x4000
42
43/* Capability offsets used by the driver */
44#define MMIO_CAP_HDR_OFFSET 0x00
45#define MMIO_RANGE_OFFSET 0x0c
46#define MMIO_MISC_OFFSET 0x10
47
48/* Masks, shifts and macros to parse the device range capability */
49#define MMIO_RANGE_LD_MASK 0xff000000
50#define MMIO_RANGE_FD_MASK 0x00ff0000
51#define MMIO_RANGE_BUS_MASK 0x0000ff00
52#define MMIO_RANGE_LD_SHIFT 24
53#define MMIO_RANGE_FD_SHIFT 16
54#define MMIO_RANGE_BUS_SHIFT 8
55#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT)
56#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT)
57#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
58#define MMIO_MSI_NUM(x) ((x) & 0x1f)
59
60/* Flag masks for the AMD IOMMU exclusion range */
61#define MMIO_EXCL_ENABLE_MASK 0x01ULL
62#define MMIO_EXCL_ALLOW_MASK 0x02ULL
63
64/* Used offsets into the MMIO space */
65#define MMIO_DEV_TABLE_OFFSET 0x0000
66#define MMIO_CMD_BUF_OFFSET 0x0008
67#define MMIO_EVT_BUF_OFFSET 0x0010
68#define MMIO_CONTROL_OFFSET 0x0018
69#define MMIO_EXCL_BASE_OFFSET 0x0020
70#define MMIO_EXCL_LIMIT_OFFSET 0x0028
71#define MMIO_EXT_FEATURES 0x0030
72#define MMIO_CMD_HEAD_OFFSET 0x2000
73#define MMIO_CMD_TAIL_OFFSET 0x2008
74#define MMIO_EVT_HEAD_OFFSET 0x2010
75#define MMIO_EVT_TAIL_OFFSET 0x2018
76#define MMIO_STATUS_OFFSET 0x2020
77
78
79/* Extended Feature Bits */
80#define FEATURE_PREFETCH (1ULL<<0)
81#define FEATURE_PPR (1ULL<<1)
82#define FEATURE_X2APIC (1ULL<<2)
83#define FEATURE_NX (1ULL<<3)
84#define FEATURE_GT (1ULL<<4)
85#define FEATURE_IA (1ULL<<6)
86#define FEATURE_GA (1ULL<<7)
87#define FEATURE_HE (1ULL<<8)
88#define FEATURE_PC (1ULL<<9)
89
90/* MMIO status bits */
91#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
92
93/* event logging constants */
94#define EVENT_ENTRY_SIZE 0x10
95#define EVENT_TYPE_SHIFT 28
96#define EVENT_TYPE_MASK 0xf
97#define EVENT_TYPE_ILL_DEV 0x1
98#define EVENT_TYPE_IO_FAULT 0x2
99#define EVENT_TYPE_DEV_TAB_ERR 0x3
100#define EVENT_TYPE_PAGE_TAB_ERR 0x4
101#define EVENT_TYPE_ILL_CMD 0x5
102#define EVENT_TYPE_CMD_HARD_ERR 0x6
103#define EVENT_TYPE_IOTLB_INV_TO 0x7
104#define EVENT_TYPE_INV_DEV_REQ 0x8
105#define EVENT_DEVID_MASK 0xffff
106#define EVENT_DEVID_SHIFT 0
107#define EVENT_DOMID_MASK 0xffff
108#define EVENT_DOMID_SHIFT 0
109#define EVENT_FLAGS_MASK 0xfff
110#define EVENT_FLAGS_SHIFT 0x10
111
112/* feature control bits */
113#define CONTROL_IOMMU_EN 0x00ULL
114#define CONTROL_HT_TUN_EN 0x01ULL
115#define CONTROL_EVT_LOG_EN 0x02ULL
116#define CONTROL_EVT_INT_EN 0x03ULL
117#define CONTROL_COMWAIT_EN 0x04ULL
118#define CONTROL_PASSPW_EN 0x08ULL
119#define CONTROL_RESPASSPW_EN 0x09ULL
120#define CONTROL_COHERENT_EN 0x0aULL
121#define CONTROL_ISOC_EN 0x0bULL
122#define CONTROL_CMDBUF_EN 0x0cULL
123#define CONTROL_PPFLOG_EN 0x0dULL
124#define CONTROL_PPFINT_EN 0x0eULL
125
126/* command specific defines */
127#define CMD_COMPL_WAIT 0x01
128#define CMD_INV_DEV_ENTRY 0x02
129#define CMD_INV_IOMMU_PAGES 0x03
130#define CMD_INV_IOTLB_PAGES 0x04
131#define CMD_INV_ALL 0x08
132
133#define CMD_COMPL_WAIT_STORE_MASK 0x01
134#define CMD_COMPL_WAIT_INT_MASK 0x02
135#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
136#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
137
138#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
139
140/* macros and definitions for device table entries */
141#define DEV_ENTRY_VALID 0x00
142#define DEV_ENTRY_TRANSLATION 0x01
143#define DEV_ENTRY_IR 0x3d
144#define DEV_ENTRY_IW 0x3e
145#define DEV_ENTRY_NO_PAGE_FAULT 0x62
146#define DEV_ENTRY_EX 0x67
147#define DEV_ENTRY_SYSMGT1 0x68
148#define DEV_ENTRY_SYSMGT2 0x69
149#define DEV_ENTRY_INIT_PASS 0xb8
150#define DEV_ENTRY_EINT_PASS 0xb9
151#define DEV_ENTRY_NMI_PASS 0xba
152#define DEV_ENTRY_LINT0_PASS 0xbe
153#define DEV_ENTRY_LINT1_PASS 0xbf
154#define DEV_ENTRY_MODE_MASK 0x07
155#define DEV_ENTRY_MODE_SHIFT 0x09
156
157/* constants to configure the command buffer */
158#define CMD_BUFFER_SIZE 8192
159#define CMD_BUFFER_UNINITIALIZED 1
160#define CMD_BUFFER_ENTRIES 512
161#define MMIO_CMD_SIZE_SHIFT 56
162#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
163
164/* constants for event buffer handling */
165#define EVT_BUFFER_SIZE 8192 /* 512 entries */
166#define EVT_LEN_MASK (0x9ULL << 56)
167
168#define PAGE_MODE_NONE 0x00
169#define PAGE_MODE_1_LEVEL 0x01
170#define PAGE_MODE_2_LEVEL 0x02
171#define PAGE_MODE_3_LEVEL 0x03
172#define PAGE_MODE_4_LEVEL 0x04
173#define PAGE_MODE_5_LEVEL 0x05
174#define PAGE_MODE_6_LEVEL 0x06
175
176#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
177#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
178 ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
179 (0xffffffffffffffffULL))
180#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
181#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL)
182#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
183 IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
184#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL)
185
186#define PM_MAP_4k 0
187#define PM_ADDR_MASK 0x000ffffffffff000ULL
188#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \
189 (~((1ULL << (12 + ((lvl) * 9))) - 1)))
190#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr))
191
192/*
193 * Returns the page table level to use for a given page size
194 * Pagesize is expected to be a power-of-two
195 */
196#define PAGE_SIZE_LEVEL(pagesize) \
197 ((__ffs(pagesize) - 12) / 9)
198/*
199 * Returns the number of ptes to use for a given page size
200 * Pagesize is expected to be a power-of-two
201 */
202#define PAGE_SIZE_PTE_COUNT(pagesize) \
203 (1ULL << ((__ffs(pagesize) - 12) % 9))
204
205/*
206 * Aligns a given io-virtual address to a given page size
207 * Pagesize is expected to be a power-of-two
208 */
209#define PAGE_SIZE_ALIGN(address, pagesize) \
210 ((address) & ~((pagesize) - 1))
211/*
212 * Creates an IOMMU PTE for an address an a given pagesize
213 * The PTE has no permission bits set
214 * Pagesize is expected to be a power-of-two larger than 4096
215 */
216#define PAGE_SIZE_PTE(address, pagesize) \
217 (((address) | ((pagesize) - 1)) & \
218 (~(pagesize >> 1)) & PM_ADDR_MASK)
219
220/*
221 * Takes a PTE value with mode=0x07 and returns the page size it maps
222 */
223#define PTE_PAGE_SIZE(pte) \
224 (1ULL << (1 + ffz(((pte) | 0xfffULL))))
225
226#define IOMMU_PTE_P (1ULL << 0)
227#define IOMMU_PTE_TV (1ULL << 1)
228#define IOMMU_PTE_U (1ULL << 59)
229#define IOMMU_PTE_FC (1ULL << 60)
230#define IOMMU_PTE_IR (1ULL << 61)
231#define IOMMU_PTE_IW (1ULL << 62)
232
233#define DTE_FLAG_IOTLB 0x01
234
235#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
236#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
237#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
238#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
239
240#define IOMMU_PROT_MASK 0x03
241#define IOMMU_PROT_IR 0x01
242#define IOMMU_PROT_IW 0x02
243
244/* IOMMU capabilities */
245#define IOMMU_CAP_IOTLB 24
246#define IOMMU_CAP_NPCACHE 26
247#define IOMMU_CAP_EFR 27
248
249#define MAX_DOMAIN_ID 65536
250
251/* FIXME: move this macro to <linux/pci.h> */
252#define PCI_BUS(x) (((x) >> 8) & 0xff)
253
254/* Protection domain flags */
255#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
256#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
257 domain for an IOMMU */
258#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
259 translation */
260
261extern bool amd_iommu_dump;
262#define DUMP_printk(format, arg...) \
263 do { \
264 if (amd_iommu_dump) \
265 printk(KERN_INFO "AMD-Vi: " format, ## arg); \
266 } while(0);
267
268/* global flag if IOMMUs cache non-present entries */
269extern bool amd_iommu_np_cache;
270/* Only true if all IOMMUs support device IOTLBs */
271extern bool amd_iommu_iotlb_sup;
272
273/*
274 * Make iterating over all IOMMUs easier
275 */
276#define for_each_iommu(iommu) \
277 list_for_each_entry((iommu), &amd_iommu_list, list)
278#define for_each_iommu_safe(iommu, next) \
279 list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
280
281#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
282#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
283#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
284#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
285#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
286#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
287
288/*
289 * This structure contains generic data for IOMMU protection domains
290 * independent of their use.
291 */
292struct protection_domain {
293 struct list_head list; /* for list of all protection domains */
294 struct list_head dev_list; /* List of all devices in this domain */
295 spinlock_t lock; /* mostly used to lock the page table*/
296 struct mutex api_lock; /* protect page tables in the iommu-api path */
297 u16 id; /* the domain id written to the device table */
298 int mode; /* paging mode (0-6 levels) */
299 u64 *pt_root; /* page table root pointer */
300 unsigned long flags; /* flags to find out type of domain */
301 bool updated; /* complete domain flush required */
302 unsigned dev_cnt; /* devices assigned to this domain */
303 unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
304 void *priv; /* private data */
305
306};
307
308/*
309 * This struct contains device specific data for the IOMMU
310 */
311struct iommu_dev_data {
312 struct list_head list; /* For domain->dev_list */
313 struct list_head dev_data_list; /* For global dev_data_list */
314 struct iommu_dev_data *alias_data;/* The alias dev_data */
315 struct protection_domain *domain; /* Domain the device is bound to */
316 atomic_t bind; /* Domain attach reverent count */
317 u16 devid; /* PCI Device ID */
318 struct {
319 bool enabled;
320 int qdep;
321 } ats; /* ATS state */
322};
323
324/*
325 * For dynamic growth the aperture size is split into ranges of 128MB of
326 * DMA address space each. This struct represents one such range.
327 */
328struct aperture_range {
329
330 /* address allocation bitmap */
331 unsigned long *bitmap;
332
333 /*
334 * Array of PTE pages for the aperture. In this array we save all the
335 * leaf pages of the domain page table used for the aperture. This way
336 * we don't need to walk the page table to find a specific PTE. We can
337 * just calculate its address in constant time.
338 */
339 u64 *pte_pages[64];
340
341 unsigned long offset;
342};
343
344/*
345 * Data container for a dma_ops specific protection domain
346 */
347struct dma_ops_domain {
348 struct list_head list;
349
350 /* generic protection domain information */
351 struct protection_domain domain;
352
353 /* size of the aperture for the mappings */
354 unsigned long aperture_size;
355
356 /* address we start to search for free addresses */
357 unsigned long next_address;
358
359 /* address space relevant data */
360 struct aperture_range *aperture[APERTURE_MAX_RANGES];
361
362 /* This will be set to true when TLB needs to be flushed */
363 bool need_flush;
364
365 /*
366 * if this is a preallocated domain, keep the device for which it was
367 * preallocated in this variable
368 */
369 u16 target_dev;
370};
371
372/*
373 * Structure where we save information about one hardware AMD IOMMU in the
374 * system.
375 */
376struct amd_iommu {
377 struct list_head list;
378
379 /* Index within the IOMMU array */
380 int index;
381
382 /* locks the accesses to the hardware */
383 spinlock_t lock;
384
385 /* Pointer to PCI device of this IOMMU */
386 struct pci_dev *dev;
387
388 /* physical address of MMIO space */
389 u64 mmio_phys;
390 /* virtual address of MMIO space */
391 u8 *mmio_base;
392
393 /* capabilities of that IOMMU read from ACPI */
394 u32 cap;
395
396 /* flags read from acpi table */
397 u8 acpi_flags;
398
399 /* Extended features */
400 u64 features;
401
402 /*
403 * Capability pointer. There could be more than one IOMMU per PCI
404 * device function if there are more than one AMD IOMMU capability
405 * pointers.
406 */
407 u16 cap_ptr;
408
409 /* pci domain of this IOMMU */
410 u16 pci_seg;
411
412 /* first device this IOMMU handles. read from PCI */
413 u16 first_device;
414 /* last device this IOMMU handles. read from PCI */
415 u16 last_device;
416
417 /* start of exclusion range of that IOMMU */
418 u64 exclusion_start;
419 /* length of exclusion range of that IOMMU */
420 u64 exclusion_length;
421
422 /* command buffer virtual address */
423 u8 *cmd_buf;
424 /* size of command buffer */
425 u32 cmd_buf_size;
426
427 /* size of event buffer */
428 u32 evt_buf_size;
429 /* event buffer virtual address */
430 u8 *evt_buf;
431 /* MSI number for event interrupt */
432 u16 evt_msi_num;
433
434 /* true if interrupts for this IOMMU are already enabled */
435 bool int_enabled;
436
437 /* if one, we need to send a completion wait command */
438 bool need_sync;
439
440 /* default dma_ops domain for that IOMMU */
441 struct dma_ops_domain *default_dom;
442
443 /*
444 * We can't rely on the BIOS to restore all values on reinit, so we
445 * need to stash them
446 */
447
448 /* The iommu BAR */
449 u32 stored_addr_lo;
450 u32 stored_addr_hi;
451
452 /*
453 * Each iommu has 6 l1s, each of which is documented as having 0x12
454 * registers
455 */
456 u32 stored_l1[6][0x12];
457
458 /* The l2 indirect registers */
459 u32 stored_l2[0x83];
460};
461
462/*
463 * List with all IOMMUs in the system. This list is not locked because it is
464 * only written and read at driver initialization or suspend time
465 */
466extern struct list_head amd_iommu_list;
467
468/*
469 * Array with pointers to each IOMMU struct
470 * The indices are referenced in the protection domains
471 */
472extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
473
474/* Number of IOMMUs present in the system */
475extern int amd_iommus_present;
476
477/*
478 * Declarations for the global list of all protection domains
479 */
480extern spinlock_t amd_iommu_pd_lock;
481extern struct list_head amd_iommu_pd_list;
482
483/*
484 * Structure defining one entry in the device table
485 */
486struct dev_table_entry {
487 u32 data[8];
488};
489
490/*
491 * One entry for unity mappings parsed out of the ACPI table.
492 */
493struct unity_map_entry {
494 struct list_head list;
495
496 /* starting device id this entry is used for (including) */
497 u16 devid_start;
498 /* end device id this entry is used for (including) */
499 u16 devid_end;
500
501 /* start address to unity map (including) */
502 u64 address_start;
503 /* end address to unity map (including) */
504 u64 address_end;
505
506 /* required protection */
507 int prot;
508};
509
510/*
511 * List of all unity mappings. It is not locked because as runtime it is only
512 * read. It is created at ACPI table parsing time.
513 */
514extern struct list_head amd_iommu_unity_map;
515
516/*
517 * Data structures for device handling
518 */
519
520/*
521 * Device table used by hardware. Read and write accesses by software are
522 * locked with the amd_iommu_pd_table lock.
523 */
524extern struct dev_table_entry *amd_iommu_dev_table;
525
526/*
527 * Alias table to find requestor ids to device ids. Not locked because only
528 * read on runtime.
529 */
530extern u16 *amd_iommu_alias_table;
531
532/*
533 * Reverse lookup table to find the IOMMU which translates a specific device.
534 */
535extern struct amd_iommu **amd_iommu_rlookup_table;
536
537/* size of the dma_ops aperture as power of 2 */
538extern unsigned amd_iommu_aperture_order;
539
540/* largest PCI device id we expect translation requests for */
541extern u16 amd_iommu_last_bdf;
542
543/* allocation bitmap for domain ids */
544extern unsigned long *amd_iommu_pd_alloc_bitmap;
545
546/*
547 * If true, the addresses will be flushed on unmap time, not when
548 * they are reused
549 */
550extern bool amd_iommu_unmap_flush;
551
552/* takes bus and device/function and returns the device id
553 * FIXME: should that be in generic PCI code? */
554static inline u16 calc_devid(u8 bus, u8 devfn)
555{
556 return (((u16)bus) << 8) | devfn;
557}
558
559#ifdef CONFIG_AMD_IOMMU_STATS
560
561struct __iommu_counter {
562 char *name;
563 struct dentry *dent;
564 u64 value;
565};
566
567#define DECLARE_STATS_COUNTER(nm) \
568 static struct __iommu_counter nm = { \
569 .name = #nm, \
570 }
571
572#define INC_STATS_COUNTER(name) name.value += 1
573#define ADD_STATS_COUNTER(name, x) name.value += (x)
574#define SUB_STATS_COUNTER(name, x) name.value -= (x)
575
576#else /* CONFIG_AMD_IOMMU_STATS */
577
578#define DECLARE_STATS_COUNTER(name)
579#define INC_STATS_COUNTER(name)
580#define ADD_STATS_COUNTER(name, x)
581#define SUB_STATS_COUNTER(name, x)
582
583#endif /* CONFIG_AMD_IOMMU_STATS */
584
585#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/drivers/pci/dmar.c b/drivers/iommu/dmar.c
index 3dc9befa5aec..3dc9befa5aec 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/iommu/dmar.c
diff --git a/drivers/pci/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f02c34d26d1b..c621c98c99da 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -42,7 +42,6 @@
42#include <linux/pci-ats.h> 42#include <linux/pci-ats.h>
43#include <asm/cacheflush.h> 43#include <asm/cacheflush.h>
44#include <asm/iommu.h> 44#include <asm/iommu.h>
45#include "pci.h"
46 45
47#define ROOT_SIZE VTD_PAGE_SIZE 46#define ROOT_SIZE VTD_PAGE_SIZE
48#define CONTEXT_SIZE VTD_PAGE_SIZE 47#define CONTEXT_SIZE VTD_PAGE_SIZE
diff --git a/drivers/pci/intr_remapping.c b/drivers/iommu/intr_remapping.c
index 3607faf28a4d..1a89d4a2cadf 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/iommu/intr_remapping.c
@@ -13,7 +13,6 @@
13#include "intr_remapping.h" 13#include "intr_remapping.h"
14#include <acpi/acpi.h> 14#include <acpi/acpi.h>
15#include <asm/pci-direct.h> 15#include <asm/pci-direct.h>
16#include "pci.h"
17 16
18static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; 17static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
19static struct hpet_scope ir_hpet[MAX_HPET_TBS]; 18static struct hpet_scope ir_hpet[MAX_HPET_TBS];
diff --git a/drivers/pci/intr_remapping.h b/drivers/iommu/intr_remapping.h
index 5662fecfee60..5662fecfee60 100644
--- a/drivers/pci/intr_remapping.h
+++ b/drivers/iommu/intr_remapping.h
diff --git a/drivers/base/iommu.c b/drivers/iommu/iommu.c
index 6e6b6a11b3ce..6e6b6a11b3ce 100644
--- a/drivers/base/iommu.c
+++ b/drivers/iommu/iommu.c
diff --git a/drivers/pci/iova.c b/drivers/iommu/iova.c
index c5c274ab5c5a..c5c274ab5c5a 100644
--- a/drivers/pci/iova.c
+++ b/drivers/iommu/iova.c
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
new file mode 100644
index 000000000000..1a584e077c61
--- /dev/null
+++ b/drivers/iommu/msm_iommu.c
@@ -0,0 +1,731 @@
1/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/platform_device.h>
22#include <linux/errno.h>
23#include <linux/io.h>
24#include <linux/interrupt.h>
25#include <linux/list.h>
26#include <linux/spinlock.h>
27#include <linux/slab.h>
28#include <linux/iommu.h>
29#include <linux/clk.h>
30
31#include <asm/cacheflush.h>
32#include <asm/sizes.h>
33
34#include <mach/iommu_hw-8xxx.h>
35#include <mach/iommu.h>
36
37#define MRC(reg, processor, op1, crn, crm, op2) \
38__asm__ __volatile__ ( \
39" mrc " #processor "," #op1 ", %0," #crn "," #crm "," #op2 "\n" \
40: "=r" (reg))
41
42#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0)
43#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1)
44
45static int msm_iommu_tex_class[4];
46
47DEFINE_SPINLOCK(msm_iommu_lock);
48
49struct msm_priv {
50 unsigned long *pgtable;
51 struct list_head list_attached;
52};
53
54static int __enable_clocks(struct msm_iommu_drvdata *drvdata)
55{
56 int ret;
57
58 ret = clk_enable(drvdata->pclk);
59 if (ret)
60 goto fail;
61
62 if (drvdata->clk) {
63 ret = clk_enable(drvdata->clk);
64 if (ret)
65 clk_disable(drvdata->pclk);
66 }
67fail:
68 return ret;
69}
70
71static void __disable_clocks(struct msm_iommu_drvdata *drvdata)
72{
73 if (drvdata->clk)
74 clk_disable(drvdata->clk);
75 clk_disable(drvdata->pclk);
76}
77
78static int __flush_iotlb(struct iommu_domain *domain)
79{
80 struct msm_priv *priv = domain->priv;
81 struct msm_iommu_drvdata *iommu_drvdata;
82 struct msm_iommu_ctx_drvdata *ctx_drvdata;
83 int ret = 0;
84#ifndef CONFIG_IOMMU_PGTABLES_L2
85 unsigned long *fl_table = priv->pgtable;
86 int i;
87
88 if (!list_empty(&priv->list_attached)) {
89 dmac_flush_range(fl_table, fl_table + SZ_16K);
90
91 for (i = 0; i < NUM_FL_PTE; i++)
92 if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
93 void *sl_table = __va(fl_table[i] &
94 FL_BASE_MASK);
95 dmac_flush_range(sl_table, sl_table + SZ_4K);
96 }
97 }
98#endif
99
100 list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
101 if (!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent)
102 BUG();
103
104 iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
105 BUG_ON(!iommu_drvdata);
106
107 ret = __enable_clocks(iommu_drvdata);
108 if (ret)
109 goto fail;
110
111 SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0);
112 __disable_clocks(iommu_drvdata);
113 }
114fail:
115 return ret;
116}
117
118static void __reset_context(void __iomem *base, int ctx)
119{
120 SET_BPRCOSH(base, ctx, 0);
121 SET_BPRCISH(base, ctx, 0);
122 SET_BPRCNSH(base, ctx, 0);
123 SET_BPSHCFG(base, ctx, 0);
124 SET_BPMTCFG(base, ctx, 0);
125 SET_ACTLR(base, ctx, 0);
126 SET_SCTLR(base, ctx, 0);
127 SET_FSRRESTORE(base, ctx, 0);
128 SET_TTBR0(base, ctx, 0);
129 SET_TTBR1(base, ctx, 0);
130 SET_TTBCR(base, ctx, 0);
131 SET_BFBCR(base, ctx, 0);
132 SET_PAR(base, ctx, 0);
133 SET_FAR(base, ctx, 0);
134 SET_CTX_TLBIALL(base, ctx, 0);
135 SET_TLBFLPTER(base, ctx, 0);
136 SET_TLBSLPTER(base, ctx, 0);
137 SET_TLBLKCR(base, ctx, 0);
138 SET_PRRR(base, ctx, 0);
139 SET_NMRR(base, ctx, 0);
140}
141
142static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
143{
144 unsigned int prrr, nmrr;
145 __reset_context(base, ctx);
146
147 /* Set up HTW mode */
148 /* TLB miss configuration: perform HTW on miss */
149 SET_TLBMCFG(base, ctx, 0x3);
150
151 /* V2P configuration: HTW for access */
152 SET_V2PCFG(base, ctx, 0x3);
153
154 SET_TTBCR(base, ctx, 0);
155 SET_TTBR0_PA(base, ctx, (pgtable >> 14));
156
157 /* Invalidate the TLB for this context */
158 SET_CTX_TLBIALL(base, ctx, 0);
159
160 /* Set interrupt number to "secure" interrupt */
161 SET_IRPTNDX(base, ctx, 0);
162
163 /* Enable context fault interrupt */
164 SET_CFEIE(base, ctx, 1);
165
166 /* Stall access on a context fault and let the handler deal with it */
167 SET_CFCFG(base, ctx, 1);
168
169 /* Redirect all cacheable requests to L2 slave port. */
170 SET_RCISH(base, ctx, 1);
171 SET_RCOSH(base, ctx, 1);
172 SET_RCNSH(base, ctx, 1);
173
174 /* Turn on TEX Remap */
175 SET_TRE(base, ctx, 1);
176
177 /* Set TEX remap attributes */
178 RCP15_PRRR(prrr);
179 RCP15_NMRR(nmrr);
180 SET_PRRR(base, ctx, prrr);
181 SET_NMRR(base, ctx, nmrr);
182
183 /* Turn on BFB prefetch */
184 SET_BFBDFE(base, ctx, 1);
185
186#ifdef CONFIG_IOMMU_PGTABLES_L2
187 /* Configure page tables as inner-cacheable and shareable to reduce
188 * the TLB miss penalty.
189 */
190 SET_TTBR0_SH(base, ctx, 1);
191 SET_TTBR1_SH(base, ctx, 1);
192
193 SET_TTBR0_NOS(base, ctx, 1);
194 SET_TTBR1_NOS(base, ctx, 1);
195
196 SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */
197 SET_TTBR0_IRGNL(base, ctx, 1);
198
199 SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */
200 SET_TTBR1_IRGNL(base, ctx, 1);
201
202 SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */
203 SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */
204#endif
205
206 /* Enable the MMU */
207 SET_M(base, ctx, 1);
208}
209
210static int msm_iommu_domain_init(struct iommu_domain *domain)
211{
212 struct msm_priv *priv = kzalloc(sizeof(*priv), GFP_KERNEL);
213
214 if (!priv)
215 goto fail_nomem;
216
217 INIT_LIST_HEAD(&priv->list_attached);
218 priv->pgtable = (unsigned long *)__get_free_pages(GFP_KERNEL,
219 get_order(SZ_16K));
220
221 if (!priv->pgtable)
222 goto fail_nomem;
223
224 memset(priv->pgtable, 0, SZ_16K);
225 domain->priv = priv;
226 return 0;
227
228fail_nomem:
229 kfree(priv);
230 return -ENOMEM;
231}
232
233static void msm_iommu_domain_destroy(struct iommu_domain *domain)
234{
235 struct msm_priv *priv;
236 unsigned long flags;
237 unsigned long *fl_table;
238 int i;
239
240 spin_lock_irqsave(&msm_iommu_lock, flags);
241 priv = domain->priv;
242 domain->priv = NULL;
243
244 if (priv) {
245 fl_table = priv->pgtable;
246
247 for (i = 0; i < NUM_FL_PTE; i++)
248 if ((fl_table[i] & 0x03) == FL_TYPE_TABLE)
249 free_page((unsigned long) __va(((fl_table[i]) &
250 FL_BASE_MASK)));
251
252 free_pages((unsigned long)priv->pgtable, get_order(SZ_16K));
253 priv->pgtable = NULL;
254 }
255
256 kfree(priv);
257 spin_unlock_irqrestore(&msm_iommu_lock, flags);
258}
259
260static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
261{
262 struct msm_priv *priv;
263 struct msm_iommu_ctx_dev *ctx_dev;
264 struct msm_iommu_drvdata *iommu_drvdata;
265 struct msm_iommu_ctx_drvdata *ctx_drvdata;
266 struct msm_iommu_ctx_drvdata *tmp_drvdata;
267 int ret = 0;
268 unsigned long flags;
269
270 spin_lock_irqsave(&msm_iommu_lock, flags);
271
272 priv = domain->priv;
273
274 if (!priv || !dev) {
275 ret = -EINVAL;
276 goto fail;
277 }
278
279 iommu_drvdata = dev_get_drvdata(dev->parent);
280 ctx_drvdata = dev_get_drvdata(dev);
281 ctx_dev = dev->platform_data;
282
283 if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) {
284 ret = -EINVAL;
285 goto fail;
286 }
287
288 if (!list_empty(&ctx_drvdata->attached_elm)) {
289 ret = -EBUSY;
290 goto fail;
291 }
292
293 list_for_each_entry(tmp_drvdata, &priv->list_attached, attached_elm)
294 if (tmp_drvdata == ctx_drvdata) {
295 ret = -EBUSY;
296 goto fail;
297 }
298
299 ret = __enable_clocks(iommu_drvdata);
300 if (ret)
301 goto fail;
302
303 __program_context(iommu_drvdata->base, ctx_dev->num,
304 __pa(priv->pgtable));
305
306 __disable_clocks(iommu_drvdata);
307 list_add(&(ctx_drvdata->attached_elm), &priv->list_attached);
308 ret = __flush_iotlb(domain);
309
310fail:
311 spin_unlock_irqrestore(&msm_iommu_lock, flags);
312 return ret;
313}
314
315static void msm_iommu_detach_dev(struct iommu_domain *domain,
316 struct device *dev)
317{
318 struct msm_priv *priv;
319 struct msm_iommu_ctx_dev *ctx_dev;
320 struct msm_iommu_drvdata *iommu_drvdata;
321 struct msm_iommu_ctx_drvdata *ctx_drvdata;
322 unsigned long flags;
323 int ret;
324
325 spin_lock_irqsave(&msm_iommu_lock, flags);
326 priv = domain->priv;
327
328 if (!priv || !dev)
329 goto fail;
330
331 iommu_drvdata = dev_get_drvdata(dev->parent);
332 ctx_drvdata = dev_get_drvdata(dev);
333 ctx_dev = dev->platform_data;
334
335 if (!iommu_drvdata || !ctx_drvdata || !ctx_dev)
336 goto fail;
337
338 ret = __flush_iotlb(domain);
339 if (ret)
340 goto fail;
341
342 ret = __enable_clocks(iommu_drvdata);
343 if (ret)
344 goto fail;
345
346 __reset_context(iommu_drvdata->base, ctx_dev->num);
347 __disable_clocks(iommu_drvdata);
348 list_del_init(&ctx_drvdata->attached_elm);
349
350fail:
351 spin_unlock_irqrestore(&msm_iommu_lock, flags);
352}
353
354static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
355 phys_addr_t pa, int order, int prot)
356{
357 struct msm_priv *priv;
358 unsigned long flags;
359 unsigned long *fl_table;
360 unsigned long *fl_pte;
361 unsigned long fl_offset;
362 unsigned long *sl_table;
363 unsigned long *sl_pte;
364 unsigned long sl_offset;
365 unsigned int pgprot;
366 size_t len = 0x1000UL << order;
367 int ret = 0, tex, sh;
368
369 spin_lock_irqsave(&msm_iommu_lock, flags);
370
371 sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0;
372 tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK];
373
374 if (tex < 0 || tex > NUM_TEX_CLASS - 1) {
375 ret = -EINVAL;
376 goto fail;
377 }
378
379 priv = domain->priv;
380 if (!priv) {
381 ret = -EINVAL;
382 goto fail;
383 }
384
385 fl_table = priv->pgtable;
386
387 if (len != SZ_16M && len != SZ_1M &&
388 len != SZ_64K && len != SZ_4K) {
389 pr_debug("Bad size: %d\n", len);
390 ret = -EINVAL;
391 goto fail;
392 }
393
394 if (!fl_table) {
395 pr_debug("Null page table\n");
396 ret = -EINVAL;
397 goto fail;
398 }
399
400 if (len == SZ_16M || len == SZ_1M) {
401 pgprot = sh ? FL_SHARED : 0;
402 pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
403 pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
404 pgprot |= tex & 0x04 ? FL_TEX0 : 0;
405 } else {
406 pgprot = sh ? SL_SHARED : 0;
407 pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
408 pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
409 pgprot |= tex & 0x04 ? SL_TEX0 : 0;
410 }
411
412 fl_offset = FL_OFFSET(va); /* Upper 12 bits */
413 fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */
414
415 if (len == SZ_16M) {
416 int i = 0;
417 for (i = 0; i < 16; i++)
418 *(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
419 FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
420 FL_SHARED | FL_NG | pgprot;
421 }
422
423 if (len == SZ_1M)
424 *fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG |
425 FL_TYPE_SECT | FL_SHARED | pgprot;
426
427 /* Need a 2nd level table */
428 if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) {
429 unsigned long *sl;
430 sl = (unsigned long *) __get_free_pages(GFP_ATOMIC,
431 get_order(SZ_4K));
432
433 if (!sl) {
434 pr_debug("Could not allocate second level table\n");
435 ret = -ENOMEM;
436 goto fail;
437 }
438
439 memset(sl, 0, SZ_4K);
440 *fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | FL_TYPE_TABLE);
441 }
442
443 sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
444 sl_offset = SL_OFFSET(va);
445 sl_pte = sl_table + sl_offset;
446
447
448 if (len == SZ_4K)
449 *sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG |
450 SL_SHARED | SL_TYPE_SMALL | pgprot;
451
452 if (len == SZ_64K) {
453 int i;
454
455 for (i = 0; i < 16; i++)
456 *(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
457 SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
458 }
459
460 ret = __flush_iotlb(domain);
461fail:
462 spin_unlock_irqrestore(&msm_iommu_lock, flags);
463 return ret;
464}
465
466static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
467 int order)
468{
469 struct msm_priv *priv;
470 unsigned long flags;
471 unsigned long *fl_table;
472 unsigned long *fl_pte;
473 unsigned long fl_offset;
474 unsigned long *sl_table;
475 unsigned long *sl_pte;
476 unsigned long sl_offset;
477 size_t len = 0x1000UL << order;
478 int i, ret = 0;
479
480 spin_lock_irqsave(&msm_iommu_lock, flags);
481
482 priv = domain->priv;
483
484 if (!priv) {
485 ret = -ENODEV;
486 goto fail;
487 }
488
489 fl_table = priv->pgtable;
490
491 if (len != SZ_16M && len != SZ_1M &&
492 len != SZ_64K && len != SZ_4K) {
493 pr_debug("Bad length: %d\n", len);
494 ret = -EINVAL;
495 goto fail;
496 }
497
498 if (!fl_table) {
499 pr_debug("Null page table\n");
500 ret = -EINVAL;
501 goto fail;
502 }
503
504 fl_offset = FL_OFFSET(va); /* Upper 12 bits */
505 fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */
506
507 if (*fl_pte == 0) {
508 pr_debug("First level PTE is 0\n");
509 ret = -ENODEV;
510 goto fail;
511 }
512
513 /* Unmap supersection */
514 if (len == SZ_16M)
515 for (i = 0; i < 16; i++)
516 *(fl_pte+i) = 0;
517
518 if (len == SZ_1M)
519 *fl_pte = 0;
520
521 sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
522 sl_offset = SL_OFFSET(va);
523 sl_pte = sl_table + sl_offset;
524
525 if (len == SZ_64K) {
526 for (i = 0; i < 16; i++)
527 *(sl_pte+i) = 0;
528 }
529
530 if (len == SZ_4K)
531 *sl_pte = 0;
532
533 if (len == SZ_4K || len == SZ_64K) {
534 int used = 0;
535
536 for (i = 0; i < NUM_SL_PTE; i++)
537 if (sl_table[i])
538 used = 1;
539 if (!used) {
540 free_page((unsigned long)sl_table);
541 *fl_pte = 0;
542 }
543 }
544
545 ret = __flush_iotlb(domain);
546fail:
547 spin_unlock_irqrestore(&msm_iommu_lock, flags);
548 return ret;
549}
550
551static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
552 unsigned long va)
553{
554 struct msm_priv *priv;
555 struct msm_iommu_drvdata *iommu_drvdata;
556 struct msm_iommu_ctx_drvdata *ctx_drvdata;
557 unsigned int par;
558 unsigned long flags;
559 void __iomem *base;
560 phys_addr_t ret = 0;
561 int ctx;
562
563 spin_lock_irqsave(&msm_iommu_lock, flags);
564
565 priv = domain->priv;
566 if (list_empty(&priv->list_attached))
567 goto fail;
568
569 ctx_drvdata = list_entry(priv->list_attached.next,
570 struct msm_iommu_ctx_drvdata, attached_elm);
571 iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
572
573 base = iommu_drvdata->base;
574 ctx = ctx_drvdata->num;
575
576 ret = __enable_clocks(iommu_drvdata);
577 if (ret)
578 goto fail;
579
580 /* Invalidate context TLB */
581 SET_CTX_TLBIALL(base, ctx, 0);
582 SET_V2PPR(base, ctx, va & V2Pxx_VA);
583
584 par = GET_PAR(base, ctx);
585
586 /* We are dealing with a supersection */
587 if (GET_NOFAULT_SS(base, ctx))
588 ret = (par & 0xFF000000) | (va & 0x00FFFFFF);
589 else /* Upper 20 bits from PAR, lower 12 from VA */
590 ret = (par & 0xFFFFF000) | (va & 0x00000FFF);
591
592 if (GET_FAULT(base, ctx))
593 ret = 0;
594
595 __disable_clocks(iommu_drvdata);
596fail:
597 spin_unlock_irqrestore(&msm_iommu_lock, flags);
598 return ret;
599}
600
601static int msm_iommu_domain_has_cap(struct iommu_domain *domain,
602 unsigned long cap)
603{
604 return 0;
605}
606
607static void print_ctx_regs(void __iomem *base, int ctx)
608{
609 unsigned int fsr = GET_FSR(base, ctx);
610 pr_err("FAR = %08x PAR = %08x\n",
611 GET_FAR(base, ctx), GET_PAR(base, ctx));
612 pr_err("FSR = %08x [%s%s%s%s%s%s%s%s%s%s]\n", fsr,
613 (fsr & 0x02) ? "TF " : "",
614 (fsr & 0x04) ? "AFF " : "",
615 (fsr & 0x08) ? "APF " : "",
616 (fsr & 0x10) ? "TLBMF " : "",
617 (fsr & 0x20) ? "HTWDEEF " : "",
618 (fsr & 0x40) ? "HTWSEEF " : "",
619 (fsr & 0x80) ? "MHF " : "",
620 (fsr & 0x10000) ? "SL " : "",
621 (fsr & 0x40000000) ? "SS " : "",
622 (fsr & 0x80000000) ? "MULTI " : "");
623
624 pr_err("FSYNR0 = %08x FSYNR1 = %08x\n",
625 GET_FSYNR0(base, ctx), GET_FSYNR1(base, ctx));
626 pr_err("TTBR0 = %08x TTBR1 = %08x\n",
627 GET_TTBR0(base, ctx), GET_TTBR1(base, ctx));
628 pr_err("SCTLR = %08x ACTLR = %08x\n",
629 GET_SCTLR(base, ctx), GET_ACTLR(base, ctx));
630 pr_err("PRRR = %08x NMRR = %08x\n",
631 GET_PRRR(base, ctx), GET_NMRR(base, ctx));
632}
633
634irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
635{
636 struct msm_iommu_drvdata *drvdata = dev_id;
637 void __iomem *base;
638 unsigned int fsr;
639 int i, ret;
640
641 spin_lock(&msm_iommu_lock);
642
643 if (!drvdata) {
644 pr_err("Invalid device ID in context interrupt handler\n");
645 goto fail;
646 }
647
648 base = drvdata->base;
649
650 pr_err("Unexpected IOMMU page fault!\n");
651 pr_err("base = %08x\n", (unsigned int) base);
652
653 ret = __enable_clocks(drvdata);
654 if (ret)
655 goto fail;
656
657 for (i = 0; i < drvdata->ncb; i++) {
658 fsr = GET_FSR(base, i);
659 if (fsr) {
660 pr_err("Fault occurred in context %d.\n", i);
661 pr_err("Interesting registers:\n");
662 print_ctx_regs(base, i);
663 SET_FSR(base, i, 0x4000000F);
664 }
665 }
666 __disable_clocks(drvdata);
667fail:
668 spin_unlock(&msm_iommu_lock);
669 return 0;
670}
671
672static struct iommu_ops msm_iommu_ops = {
673 .domain_init = msm_iommu_domain_init,
674 .domain_destroy = msm_iommu_domain_destroy,
675 .attach_dev = msm_iommu_attach_dev,
676 .detach_dev = msm_iommu_detach_dev,
677 .map = msm_iommu_map,
678 .unmap = msm_iommu_unmap,
679 .iova_to_phys = msm_iommu_iova_to_phys,
680 .domain_has_cap = msm_iommu_domain_has_cap
681};
682
683static int __init get_tex_class(int icp, int ocp, int mt, int nos)
684{
685 int i = 0;
686 unsigned int prrr = 0;
687 unsigned int nmrr = 0;
688 int c_icp, c_ocp, c_mt, c_nos;
689
690 RCP15_PRRR(prrr);
691 RCP15_NMRR(nmrr);
692
693 for (i = 0; i < NUM_TEX_CLASS; i++) {
694 c_nos = PRRR_NOS(prrr, i);
695 c_mt = PRRR_MT(prrr, i);
696 c_icp = NMRR_ICP(nmrr, i);
697 c_ocp = NMRR_OCP(nmrr, i);
698
699 if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos)
700 return i;
701 }
702
703 return -ENODEV;
704}
705
706static void __init setup_iommu_tex_classes(void)
707{
708 msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] =
709 get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1);
710
711 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] =
712 get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1);
713
714 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] =
715 get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1);
716
717 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] =
718 get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1);
719}
720
721static int __init msm_iommu_init(void)
722{
723 setup_iommu_tex_classes();
724 register_iommu(&msm_iommu_ops);
725 return 0;
726}
727
728subsys_initcall(msm_iommu_init);
729
730MODULE_LICENSE("GPL v2");
731MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
new file mode 100644
index 000000000000..8e8fb079852d
--- /dev/null
+++ b/drivers/iommu/msm_iommu_dev.c
@@ -0,0 +1,422 @@
1/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/platform_device.h>
23#include <linux/io.h>
24#include <linux/clk.h>
25#include <linux/iommu.h>
26#include <linux/interrupt.h>
27#include <linux/err.h>
28#include <linux/slab.h>
29
30#include <mach/iommu_hw-8xxx.h>
31#include <mach/iommu.h>
32#include <mach/clk.h>
33
34struct iommu_ctx_iter_data {
35 /* input */
36 const char *name;
37
38 /* output */
39 struct device *dev;
40};
41
42static struct platform_device *msm_iommu_root_dev;
43
44static int each_iommu_ctx(struct device *dev, void *data)
45{
46 struct iommu_ctx_iter_data *res = data;
47 struct msm_iommu_ctx_dev *c = dev->platform_data;
48
49 if (!res || !c || !c->name || !res->name)
50 return -EINVAL;
51
52 if (!strcmp(res->name, c->name)) {
53 res->dev = dev;
54 return 1;
55 }
56 return 0;
57}
58
59static int each_iommu(struct device *dev, void *data)
60{
61 return device_for_each_child(dev, data, each_iommu_ctx);
62}
63
64struct device *msm_iommu_get_ctx(const char *ctx_name)
65{
66 struct iommu_ctx_iter_data r;
67 int found;
68
69 if (!msm_iommu_root_dev) {
70 pr_err("No root IOMMU device.\n");
71 goto fail;
72 }
73
74 r.name = ctx_name;
75 found = device_for_each_child(&msm_iommu_root_dev->dev, &r, each_iommu);
76
77 if (!found) {
78 pr_err("Could not find context <%s>\n", ctx_name);
79 goto fail;
80 }
81
82 return r.dev;
83fail:
84 return NULL;
85}
86EXPORT_SYMBOL(msm_iommu_get_ctx);
87
88static void msm_iommu_reset(void __iomem *base, int ncb)
89{
90 int ctx;
91
92 SET_RPUE(base, 0);
93 SET_RPUEIE(base, 0);
94 SET_ESRRESTORE(base, 0);
95 SET_TBE(base, 0);
96 SET_CR(base, 0);
97 SET_SPDMBE(base, 0);
98 SET_TESTBUSCR(base, 0);
99 SET_TLBRSW(base, 0);
100 SET_GLOBAL_TLBIALL(base, 0);
101 SET_RPU_ACR(base, 0);
102 SET_TLBLKCRWE(base, 1);
103
104 for (ctx = 0; ctx < ncb; ctx++) {
105 SET_BPRCOSH(base, ctx, 0);
106 SET_BPRCISH(base, ctx, 0);
107 SET_BPRCNSH(base, ctx, 0);
108 SET_BPSHCFG(base, ctx, 0);
109 SET_BPMTCFG(base, ctx, 0);
110 SET_ACTLR(base, ctx, 0);
111 SET_SCTLR(base, ctx, 0);
112 SET_FSRRESTORE(base, ctx, 0);
113 SET_TTBR0(base, ctx, 0);
114 SET_TTBR1(base, ctx, 0);
115 SET_TTBCR(base, ctx, 0);
116 SET_BFBCR(base, ctx, 0);
117 SET_PAR(base, ctx, 0);
118 SET_FAR(base, ctx, 0);
119 SET_CTX_TLBIALL(base, ctx, 0);
120 SET_TLBFLPTER(base, ctx, 0);
121 SET_TLBSLPTER(base, ctx, 0);
122 SET_TLBLKCR(base, ctx, 0);
123 SET_PRRR(base, ctx, 0);
124 SET_NMRR(base, ctx, 0);
125 SET_CONTEXTIDR(base, ctx, 0);
126 }
127}
128
129static int msm_iommu_probe(struct platform_device *pdev)
130{
131 struct resource *r, *r2;
132 struct clk *iommu_clk;
133 struct clk *iommu_pclk;
134 struct msm_iommu_drvdata *drvdata;
135 struct msm_iommu_dev *iommu_dev = pdev->dev.platform_data;
136 void __iomem *regs_base;
137 resource_size_t len;
138 int ret, irq, par;
139
140 if (pdev->id == -1) {
141 msm_iommu_root_dev = pdev;
142 return 0;
143 }
144
145 drvdata = kzalloc(sizeof(*drvdata), GFP_KERNEL);
146
147 if (!drvdata) {
148 ret = -ENOMEM;
149 goto fail;
150 }
151
152 if (!iommu_dev) {
153 ret = -ENODEV;
154 goto fail;
155 }
156
157 iommu_pclk = clk_get(NULL, "smmu_pclk");
158 if (IS_ERR(iommu_pclk)) {
159 ret = -ENODEV;
160 goto fail;
161 }
162
163 ret = clk_enable(iommu_pclk);
164 if (ret)
165 goto fail_enable;
166
167 iommu_clk = clk_get(&pdev->dev, "iommu_clk");
168
169 if (!IS_ERR(iommu_clk)) {
170 if (clk_get_rate(iommu_clk) == 0)
171 clk_set_min_rate(iommu_clk, 1);
172
173 ret = clk_enable(iommu_clk);
174 if (ret) {
175 clk_put(iommu_clk);
176 goto fail_pclk;
177 }
178 } else
179 iommu_clk = NULL;
180
181 r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase");
182
183 if (!r) {
184 ret = -ENODEV;
185 goto fail_clk;
186 }
187
188 len = resource_size(r);
189
190 r2 = request_mem_region(r->start, len, r->name);
191 if (!r2) {
192 pr_err("Could not request memory region: start=%p, len=%d\n",
193 (void *) r->start, len);
194 ret = -EBUSY;
195 goto fail_clk;
196 }
197
198 regs_base = ioremap(r2->start, len);
199
200 if (!regs_base) {
201 pr_err("Could not ioremap: start=%p, len=%d\n",
202 (void *) r2->start, len);
203 ret = -EBUSY;
204 goto fail_mem;
205 }
206
207 irq = platform_get_irq_byname(pdev, "secure_irq");
208 if (irq < 0) {
209 ret = -ENODEV;
210 goto fail_io;
211 }
212
213 msm_iommu_reset(regs_base, iommu_dev->ncb);
214
215 SET_M(regs_base, 0, 1);
216 SET_PAR(regs_base, 0, 0);
217 SET_V2PCFG(regs_base, 0, 1);
218 SET_V2PPR(regs_base, 0, 0);
219 par = GET_PAR(regs_base, 0);
220 SET_V2PCFG(regs_base, 0, 0);
221 SET_M(regs_base, 0, 0);
222
223 if (!par) {
224 pr_err("%s: Invalid PAR value detected\n", iommu_dev->name);
225 ret = -ENODEV;
226 goto fail_io;
227 }
228
229 ret = request_irq(irq, msm_iommu_fault_handler, 0,
230 "msm_iommu_secure_irpt_handler", drvdata);
231 if (ret) {
232 pr_err("Request IRQ %d failed with ret=%d\n", irq, ret);
233 goto fail_io;
234 }
235
236
237 drvdata->pclk = iommu_pclk;
238 drvdata->clk = iommu_clk;
239 drvdata->base = regs_base;
240 drvdata->irq = irq;
241 drvdata->ncb = iommu_dev->ncb;
242
243 pr_info("device %s mapped at %p, irq %d with %d ctx banks\n",
244 iommu_dev->name, regs_base, irq, iommu_dev->ncb);
245
246 platform_set_drvdata(pdev, drvdata);
247
248 if (iommu_clk)
249 clk_disable(iommu_clk);
250
251 clk_disable(iommu_pclk);
252
253 return 0;
254fail_io:
255 iounmap(regs_base);
256fail_mem:
257 release_mem_region(r->start, len);
258fail_clk:
259 if (iommu_clk) {
260 clk_disable(iommu_clk);
261 clk_put(iommu_clk);
262 }
263fail_pclk:
264 clk_disable(iommu_pclk);
265fail_enable:
266 clk_put(iommu_pclk);
267fail:
268 kfree(drvdata);
269 return ret;
270}
271
272static int msm_iommu_remove(struct platform_device *pdev)
273{
274 struct msm_iommu_drvdata *drv = NULL;
275
276 drv = platform_get_drvdata(pdev);
277 if (drv) {
278 if (drv->clk)
279 clk_put(drv->clk);
280 clk_put(drv->pclk);
281 memset(drv, 0, sizeof(*drv));
282 kfree(drv);
283 platform_set_drvdata(pdev, NULL);
284 }
285 return 0;
286}
287
288static int msm_iommu_ctx_probe(struct platform_device *pdev)
289{
290 struct msm_iommu_ctx_dev *c = pdev->dev.platform_data;
291 struct msm_iommu_drvdata *drvdata;
292 struct msm_iommu_ctx_drvdata *ctx_drvdata = NULL;
293 int i, ret;
294 if (!c || !pdev->dev.parent) {
295 ret = -EINVAL;
296 goto fail;
297 }
298
299 drvdata = dev_get_drvdata(pdev->dev.parent);
300
301 if (!drvdata) {
302 ret = -ENODEV;
303 goto fail;
304 }
305
306 ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL);
307 if (!ctx_drvdata) {
308 ret = -ENOMEM;
309 goto fail;
310 }
311 ctx_drvdata->num = c->num;
312 ctx_drvdata->pdev = pdev;
313
314 INIT_LIST_HEAD(&ctx_drvdata->attached_elm);
315 platform_set_drvdata(pdev, ctx_drvdata);
316
317 ret = clk_enable(drvdata->pclk);
318 if (ret)
319 goto fail;
320
321 if (drvdata->clk) {
322 ret = clk_enable(drvdata->clk);
323 if (ret) {
324 clk_disable(drvdata->pclk);
325 goto fail;
326 }
327 }
328
329 /* Program the M2V tables for this context */
330 for (i = 0; i < MAX_NUM_MIDS; i++) {
331 int mid = c->mids[i];
332 if (mid == -1)
333 break;
334
335 SET_M2VCBR_N(drvdata->base, mid, 0);
336 SET_CBACR_N(drvdata->base, c->num, 0);
337
338 /* Set VMID = 0 */
339 SET_VMID(drvdata->base, mid, 0);
340
341 /* Set the context number for that MID to this context */
342 SET_CBNDX(drvdata->base, mid, c->num);
343
344 /* Set MID associated with this context bank to 0*/
345 SET_CBVMID(drvdata->base, c->num, 0);
346
347 /* Set the ASID for TLB tagging for this context */
348 SET_CONTEXTIDR_ASID(drvdata->base, c->num, c->num);
349
350 /* Set security bit override to be Non-secure */
351 SET_NSCFG(drvdata->base, mid, 3);
352 }
353
354 if (drvdata->clk)
355 clk_disable(drvdata->clk);
356 clk_disable(drvdata->pclk);
357
358 dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num);
359 return 0;
360fail:
361 kfree(ctx_drvdata);
362 return ret;
363}
364
365static int msm_iommu_ctx_remove(struct platform_device *pdev)
366{
367 struct msm_iommu_ctx_drvdata *drv = NULL;
368 drv = platform_get_drvdata(pdev);
369 if (drv) {
370 memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata));
371 kfree(drv);
372 platform_set_drvdata(pdev, NULL);
373 }
374 return 0;
375}
376
377static struct platform_driver msm_iommu_driver = {
378 .driver = {
379 .name = "msm_iommu",
380 },
381 .probe = msm_iommu_probe,
382 .remove = msm_iommu_remove,
383};
384
385static struct platform_driver msm_iommu_ctx_driver = {
386 .driver = {
387 .name = "msm_iommu_ctx",
388 },
389 .probe = msm_iommu_ctx_probe,
390 .remove = msm_iommu_ctx_remove,
391};
392
393static int __init msm_iommu_driver_init(void)
394{
395 int ret;
396 ret = platform_driver_register(&msm_iommu_driver);
397 if (ret != 0) {
398 pr_err("Failed to register IOMMU driver\n");
399 goto error;
400 }
401
402 ret = platform_driver_register(&msm_iommu_ctx_driver);
403 if (ret != 0) {
404 pr_err("Failed to register IOMMU context driver\n");
405 goto error;
406 }
407
408error:
409 return ret;
410}
411
412static void __exit msm_iommu_driver_exit(void)
413{
414 platform_driver_unregister(&msm_iommu_ctx_driver);
415 platform_driver_unregister(&msm_iommu_driver);
416}
417
418subsys_initcall(msm_iommu_driver_init);
419module_exit(msm_iommu_driver_exit);
420
421MODULE_LICENSE("GPL v2");
422MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 094308e41be5..825c02b40daa 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -29,11 +29,6 @@ obj-$(CONFIG_PCI_MSI) += msi.o
29# Build the Hypertransport interrupt support 29# Build the Hypertransport interrupt support
30obj-$(CONFIG_HT_IRQ) += htirq.o 30obj-$(CONFIG_HT_IRQ) += htirq.o
31 31
32# Build Intel IOMMU support
33obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
34
35obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
36
37obj-$(CONFIG_PCI_IOV) += iov.o 32obj-$(CONFIG_PCI_IOV) += iov.o
38 33
39# 34#
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 731e20265ace..b7bf11dd546a 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -184,8 +184,6 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
184 return NULL; 184 return NULL;
185} 185}
186 186
187struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
188
189/* PCI slot sysfs helper code */ 187/* PCI slot sysfs helper code */
190#define to_pci_slot(s) container_of(s, struct pci_slot, kobj) 188#define to_pci_slot(s) container_of(s, struct pci_slot, kobj)
191 189