aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/base/Makefile1
-rw-r--r--drivers/iommu/Kconfig110
-rw-r--r--drivers/iommu/Makefile5
-rw-r--r--drivers/iommu/amd_iommu.c2810
-rw-r--r--drivers/iommu/amd_iommu_init.c1574
-rw-r--r--drivers/iommu/amd_iommu_proto.h54
-rw-r--r--drivers/iommu/amd_iommu_types.h585
-rw-r--r--drivers/iommu/dmar.c (renamed from drivers/pci/dmar.c)0
-rw-r--r--drivers/iommu/intel-iommu.c (renamed from drivers/pci/intel-iommu.c)1
-rw-r--r--drivers/iommu/intr_remapping.c (renamed from drivers/pci/intr_remapping.c)1
-rw-r--r--drivers/iommu/intr_remapping.h (renamed from drivers/pci/intr_remapping.h)0
-rw-r--r--drivers/iommu/iommu.c (renamed from drivers/base/iommu.c)0
-rw-r--r--drivers/iommu/iova.c (renamed from drivers/pci/iova.c)0
-rw-r--r--drivers/iommu/msm_iommu.c731
-rw-r--r--drivers/iommu/msm_iommu_dev.c422
-rw-r--r--drivers/pci/Makefile5
-rw-r--r--drivers/pci/pci.h2
19 files changed, 6294 insertions, 10 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3bb154d8c8cc..9d513188b47a 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -126,4 +126,6 @@ source "drivers/hwspinlock/Kconfig"
126 126
127source "drivers/clocksource/Kconfig" 127source "drivers/clocksource/Kconfig"
128 128
129source "drivers/iommu/Kconfig"
130
129endmenu 131endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 09f3232bcdcd..2f7a71a933de 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -122,3 +122,4 @@ obj-y += ieee802154/
122obj-y += clk/ 122obj-y += clk/
123 123
124obj-$(CONFIG_HWSPINLOCK) += hwspinlock/ 124obj-$(CONFIG_HWSPINLOCK) += hwspinlock/
125obj-$(CONFIG_IOMMU_API) += iommu/
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 4c5701c15f53..5ab0d07c4578 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_FW_LOADER) += firmware_class.o
13obj-$(CONFIG_NUMA) += node.o 13obj-$(CONFIG_NUMA) += node.o
14obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o 14obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
15obj-$(CONFIG_SMP) += topology.o 15obj-$(CONFIG_SMP) += topology.o
16obj-$(CONFIG_IOMMU_API) += iommu.o
17ifeq ($(CONFIG_SYSFS),y) 16ifeq ($(CONFIG_SYSFS),y)
18obj-$(CONFIG_MODULES) += module.o 17obj-$(CONFIG_MODULES) += module.o
19endif 18endif
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
new file mode 100644
index 000000000000..b57b3fa492f3
--- /dev/null
+++ b/drivers/iommu/Kconfig
@@ -0,0 +1,110 @@
1# IOMMU_API always gets selected by whoever wants it.
2config IOMMU_API
3 bool
4
5menuconfig IOMMU_SUPPORT
6 bool "IOMMU Hardware Support"
7 default y
8 ---help---
9 Say Y here if you want to compile device drivers for IO Memory
10 Management Units into the kernel. These devices usually allow to
11 remap DMA requests and/or remap interrupts from other devices on the
12 system.
13
14if IOMMU_SUPPORT
15
16# MSM IOMMU support
17config MSM_IOMMU
18 bool "MSM IOMMU Support"
19 depends on ARCH_MSM8X60 || ARCH_MSM8960
20 select IOMMU_API
21 help
22 Support for the IOMMUs found on certain Qualcomm SOCs.
23 These IOMMUs allow virtualization of the address space used by most
24 cores within the multimedia subsystem.
25
26 If unsure, say N here.
27
28config IOMMU_PGTABLES_L2
29 def_bool y
30 depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n
31
32# AMD IOMMU support
33config AMD_IOMMU
34 bool "AMD IOMMU support"
35 select SWIOTLB
36 select PCI_MSI
37 select PCI_IOV
38 select IOMMU_API
39 depends on X86_64 && PCI && ACPI
40 ---help---
41 With this option you can enable support for AMD IOMMU hardware in
42 your system. An IOMMU is a hardware component which provides
43 remapping of DMA memory accesses from devices. With an AMD IOMMU you
44 can isolate the the DMA memory of different devices and protect the
45 system from misbehaving device drivers or hardware.
46
47 You can find out if your system has an AMD IOMMU if you look into
48 your BIOS for an option to enable it or if you have an IVRS ACPI
49 table.
50
51config AMD_IOMMU_STATS
52 bool "Export AMD IOMMU statistics to debugfs"
53 depends on AMD_IOMMU
54 select DEBUG_FS
55 ---help---
56 This option enables code in the AMD IOMMU driver to collect various
57 statistics about whats happening in the driver and exports that
58 information to userspace via debugfs.
59 If unsure, say N.
60
61# Intel IOMMU support
62config DMAR
63 bool "Support for DMA Remapping Devices"
64 depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
65 select IOMMU_API
66 help
67 DMA remapping (DMAR) devices support enables independent address
68 translations for Direct Memory Access (DMA) from devices.
69 These DMA remapping devices are reported via ACPI tables
70 and include PCI device scope covered by these DMA
71 remapping devices.
72
73config DMAR_DEFAULT_ON
74 def_bool y
75 prompt "Enable DMA Remapping Devices by default"
76 depends on DMAR
77 help
78 Selecting this option will enable a DMAR device at boot time if
79 one is found. If this option is not selected, DMAR support can
80 be enabled by passing intel_iommu=on to the kernel.
81
82config DMAR_BROKEN_GFX_WA
83 bool "Workaround broken graphics drivers (going away soon)"
84 depends on DMAR && BROKEN && X86
85 ---help---
86 Current Graphics drivers tend to use physical address
87 for DMA and avoid using DMA APIs. Setting this config
88 option permits the IOMMU driver to set a unity map for
89 all the OS-visible memory. Hence the driver can continue
90 to use physical addresses for DMA, at least until this
91 option is removed in the 2.6.32 kernel.
92
93config DMAR_FLOPPY_WA
94 def_bool y
95 depends on DMAR && X86
96 ---help---
97 Floppy disk drivers are known to bypass DMA API calls
98 thereby failing to work when IOMMU is enabled. This
99 workaround will setup a 1:1 mapping for the first
100 16MiB to make floppy (an ISA device) work.
101
102config INTR_REMAP
103 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
104 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
105 ---help---
106 Supports Interrupt remapping for IO-APIC and MSI devices.
107 To use x2apic mode in the CPU's which support x2APIC enhancements or
108 to support platforms with CPU's having > 8 bit APIC ID, say Y.
109
110endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
new file mode 100644
index 000000000000..4d4d77df7cac
--- /dev/null
+++ b/drivers/iommu/Makefile
@@ -0,0 +1,5 @@
1obj-$(CONFIG_IOMMU_API) += iommu.o
2obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
3obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
4obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
5obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
new file mode 100644
index 000000000000..748eab063857
--- /dev/null
+++ b/drivers/iommu/amd_iommu.c
@@ -0,0 +1,2810 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/pci-ats.h>
22#include <linux/bitmap.h>
23#include <linux/slab.h>
24#include <linux/debugfs.h>
25#include <linux/scatterlist.h>
26#include <linux/dma-mapping.h>
27#include <linux/iommu-helper.h>
28#include <linux/iommu.h>
29#include <linux/delay.h>
30#include <linux/amd-iommu.h>
31#include <asm/proto.h>
32#include <asm/iommu.h>
33#include <asm/gart.h>
34#include <asm/dma.h>
35
36#include "amd_iommu_proto.h"
37#include "amd_iommu_types.h"
38
39#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
40
41#define LOOP_TIMEOUT 100000
42
43static DEFINE_RWLOCK(amd_iommu_devtable_lock);
44
45/* A list of preallocated protection domains */
46static LIST_HEAD(iommu_pd_list);
47static DEFINE_SPINLOCK(iommu_pd_list_lock);
48
49/* List of all available dev_data structures */
50static LIST_HEAD(dev_data_list);
51static DEFINE_SPINLOCK(dev_data_list_lock);
52
53/*
54 * Domain for untranslated devices - only allocated
55 * if iommu=pt passed on kernel cmd line.
56 */
57static struct protection_domain *pt_domain;
58
59static struct iommu_ops amd_iommu_ops;
60
61/*
62 * general struct to manage commands send to an IOMMU
63 */
64struct iommu_cmd {
65 u32 data[4];
66};
67
68static void update_domain(struct protection_domain *domain);
69
70/****************************************************************************
71 *
72 * Helper functions
73 *
74 ****************************************************************************/
75
76static struct iommu_dev_data *alloc_dev_data(u16 devid)
77{
78 struct iommu_dev_data *dev_data;
79 unsigned long flags;
80
81 dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
82 if (!dev_data)
83 return NULL;
84
85 dev_data->devid = devid;
86 atomic_set(&dev_data->bind, 0);
87
88 spin_lock_irqsave(&dev_data_list_lock, flags);
89 list_add_tail(&dev_data->dev_data_list, &dev_data_list);
90 spin_unlock_irqrestore(&dev_data_list_lock, flags);
91
92 return dev_data;
93}
94
95static void free_dev_data(struct iommu_dev_data *dev_data)
96{
97 unsigned long flags;
98
99 spin_lock_irqsave(&dev_data_list_lock, flags);
100 list_del(&dev_data->dev_data_list);
101 spin_unlock_irqrestore(&dev_data_list_lock, flags);
102
103 kfree(dev_data);
104}
105
106static struct iommu_dev_data *search_dev_data(u16 devid)
107{
108 struct iommu_dev_data *dev_data;
109 unsigned long flags;
110
111 spin_lock_irqsave(&dev_data_list_lock, flags);
112 list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
113 if (dev_data->devid == devid)
114 goto out_unlock;
115 }
116
117 dev_data = NULL;
118
119out_unlock:
120 spin_unlock_irqrestore(&dev_data_list_lock, flags);
121
122 return dev_data;
123}
124
125static struct iommu_dev_data *find_dev_data(u16 devid)
126{
127 struct iommu_dev_data *dev_data;
128
129 dev_data = search_dev_data(devid);
130
131 if (dev_data == NULL)
132 dev_data = alloc_dev_data(devid);
133
134 return dev_data;
135}
136
137static inline u16 get_device_id(struct device *dev)
138{
139 struct pci_dev *pdev = to_pci_dev(dev);
140
141 return calc_devid(pdev->bus->number, pdev->devfn);
142}
143
144static struct iommu_dev_data *get_dev_data(struct device *dev)
145{
146 return dev->archdata.iommu;
147}
148
149/*
150 * In this function the list of preallocated protection domains is traversed to
151 * find the domain for a specific device
152 */
153static struct dma_ops_domain *find_protection_domain(u16 devid)
154{
155 struct dma_ops_domain *entry, *ret = NULL;
156 unsigned long flags;
157 u16 alias = amd_iommu_alias_table[devid];
158
159 if (list_empty(&iommu_pd_list))
160 return NULL;
161
162 spin_lock_irqsave(&iommu_pd_list_lock, flags);
163
164 list_for_each_entry(entry, &iommu_pd_list, list) {
165 if (entry->target_dev == devid ||
166 entry->target_dev == alias) {
167 ret = entry;
168 break;
169 }
170 }
171
172 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
173
174 return ret;
175}
176
177/*
178 * This function checks if the driver got a valid device from the caller to
179 * avoid dereferencing invalid pointers.
180 */
181static bool check_device(struct device *dev)
182{
183 u16 devid;
184
185 if (!dev || !dev->dma_mask)
186 return false;
187
188 /* No device or no PCI device */
189 if (dev->bus != &pci_bus_type)
190 return false;
191
192 devid = get_device_id(dev);
193
194 /* Out of our scope? */
195 if (devid > amd_iommu_last_bdf)
196 return false;
197
198 if (amd_iommu_rlookup_table[devid] == NULL)
199 return false;
200
201 return true;
202}
203
204static int iommu_init_device(struct device *dev)
205{
206 struct iommu_dev_data *dev_data;
207 u16 alias;
208
209 if (dev->archdata.iommu)
210 return 0;
211
212 dev_data = find_dev_data(get_device_id(dev));
213 if (!dev_data)
214 return -ENOMEM;
215
216 alias = amd_iommu_alias_table[dev_data->devid];
217 if (alias != dev_data->devid) {
218 struct iommu_dev_data *alias_data;
219
220 alias_data = find_dev_data(alias);
221 if (alias_data == NULL) {
222 pr_err("AMD-Vi: Warning: Unhandled device %s\n",
223 dev_name(dev));
224 free_dev_data(dev_data);
225 return -ENOTSUPP;
226 }
227 dev_data->alias_data = alias_data;
228 }
229
230 dev->archdata.iommu = dev_data;
231
232 return 0;
233}
234
235static void iommu_ignore_device(struct device *dev)
236{
237 u16 devid, alias;
238
239 devid = get_device_id(dev);
240 alias = amd_iommu_alias_table[devid];
241
242 memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
243 memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
244
245 amd_iommu_rlookup_table[devid] = NULL;
246 amd_iommu_rlookup_table[alias] = NULL;
247}
248
249static void iommu_uninit_device(struct device *dev)
250{
251 /*
252 * Nothing to do here - we keep dev_data around for unplugged devices
253 * and reuse it when the device is re-plugged - not doing so would
254 * introduce a ton of races.
255 */
256}
257
258void __init amd_iommu_uninit_devices(void)
259{
260 struct iommu_dev_data *dev_data, *n;
261 struct pci_dev *pdev = NULL;
262
263 for_each_pci_dev(pdev) {
264
265 if (!check_device(&pdev->dev))
266 continue;
267
268 iommu_uninit_device(&pdev->dev);
269 }
270
271 /* Free all of our dev_data structures */
272 list_for_each_entry_safe(dev_data, n, &dev_data_list, dev_data_list)
273 free_dev_data(dev_data);
274}
275
276int __init amd_iommu_init_devices(void)
277{
278 struct pci_dev *pdev = NULL;
279 int ret = 0;
280
281 for_each_pci_dev(pdev) {
282
283 if (!check_device(&pdev->dev))
284 continue;
285
286 ret = iommu_init_device(&pdev->dev);
287 if (ret == -ENOTSUPP)
288 iommu_ignore_device(&pdev->dev);
289 else if (ret)
290 goto out_free;
291 }
292
293 return 0;
294
295out_free:
296
297 amd_iommu_uninit_devices();
298
299 return ret;
300}
301#ifdef CONFIG_AMD_IOMMU_STATS
302
303/*
304 * Initialization code for statistics collection
305 */
306
307DECLARE_STATS_COUNTER(compl_wait);
308DECLARE_STATS_COUNTER(cnt_map_single);
309DECLARE_STATS_COUNTER(cnt_unmap_single);
310DECLARE_STATS_COUNTER(cnt_map_sg);
311DECLARE_STATS_COUNTER(cnt_unmap_sg);
312DECLARE_STATS_COUNTER(cnt_alloc_coherent);
313DECLARE_STATS_COUNTER(cnt_free_coherent);
314DECLARE_STATS_COUNTER(cross_page);
315DECLARE_STATS_COUNTER(domain_flush_single);
316DECLARE_STATS_COUNTER(domain_flush_all);
317DECLARE_STATS_COUNTER(alloced_io_mem);
318DECLARE_STATS_COUNTER(total_map_requests);
319
320static struct dentry *stats_dir;
321static struct dentry *de_fflush;
322
323static void amd_iommu_stats_add(struct __iommu_counter *cnt)
324{
325 if (stats_dir == NULL)
326 return;
327
328 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
329 &cnt->value);
330}
331
332static void amd_iommu_stats_init(void)
333{
334 stats_dir = debugfs_create_dir("amd-iommu", NULL);
335 if (stats_dir == NULL)
336 return;
337
338 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
339 (u32 *)&amd_iommu_unmap_flush);
340
341 amd_iommu_stats_add(&compl_wait);
342 amd_iommu_stats_add(&cnt_map_single);
343 amd_iommu_stats_add(&cnt_unmap_single);
344 amd_iommu_stats_add(&cnt_map_sg);
345 amd_iommu_stats_add(&cnt_unmap_sg);
346 amd_iommu_stats_add(&cnt_alloc_coherent);
347 amd_iommu_stats_add(&cnt_free_coherent);
348 amd_iommu_stats_add(&cross_page);
349 amd_iommu_stats_add(&domain_flush_single);
350 amd_iommu_stats_add(&domain_flush_all);
351 amd_iommu_stats_add(&alloced_io_mem);
352 amd_iommu_stats_add(&total_map_requests);
353}
354
355#endif
356
357/****************************************************************************
358 *
359 * Interrupt handling functions
360 *
361 ****************************************************************************/
362
363static void dump_dte_entry(u16 devid)
364{
365 int i;
366
367 for (i = 0; i < 8; ++i)
368 pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
369 amd_iommu_dev_table[devid].data[i]);
370}
371
372static void dump_command(unsigned long phys_addr)
373{
374 struct iommu_cmd *cmd = phys_to_virt(phys_addr);
375 int i;
376
377 for (i = 0; i < 4; ++i)
378 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
379}
380
381static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
382{
383 u32 *event = __evt;
384 int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
385 int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
386 int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
387 int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
388 u64 address = (u64)(((u64)event[3]) << 32) | event[2];
389
390 printk(KERN_ERR "AMD-Vi: Event logged [");
391
392 switch (type) {
393 case EVENT_TYPE_ILL_DEV:
394 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
395 "address=0x%016llx flags=0x%04x]\n",
396 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
397 address, flags);
398 dump_dte_entry(devid);
399 break;
400 case EVENT_TYPE_IO_FAULT:
401 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
402 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
403 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
404 domid, address, flags);
405 break;
406 case EVENT_TYPE_DEV_TAB_ERR:
407 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
408 "address=0x%016llx flags=0x%04x]\n",
409 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
410 address, flags);
411 break;
412 case EVENT_TYPE_PAGE_TAB_ERR:
413 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
414 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
415 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
416 domid, address, flags);
417 break;
418 case EVENT_TYPE_ILL_CMD:
419 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
420 dump_command(address);
421 break;
422 case EVENT_TYPE_CMD_HARD_ERR:
423 printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
424 "flags=0x%04x]\n", address, flags);
425 break;
426 case EVENT_TYPE_IOTLB_INV_TO:
427 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
428 "address=0x%016llx]\n",
429 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
430 address);
431 break;
432 case EVENT_TYPE_INV_DEV_REQ:
433 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
434 "address=0x%016llx flags=0x%04x]\n",
435 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
436 address, flags);
437 break;
438 default:
439 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
440 }
441}
442
443static void iommu_poll_events(struct amd_iommu *iommu)
444{
445 u32 head, tail;
446 unsigned long flags;
447
448 spin_lock_irqsave(&iommu->lock, flags);
449
450 head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
451 tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
452
453 while (head != tail) {
454 iommu_print_event(iommu, iommu->evt_buf + head);
455 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
456 }
457
458 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
459
460 spin_unlock_irqrestore(&iommu->lock, flags);
461}
462
463irqreturn_t amd_iommu_int_thread(int irq, void *data)
464{
465 struct amd_iommu *iommu;
466
467 for_each_iommu(iommu)
468 iommu_poll_events(iommu);
469
470 return IRQ_HANDLED;
471}
472
473irqreturn_t amd_iommu_int_handler(int irq, void *data)
474{
475 return IRQ_WAKE_THREAD;
476}
477
478/****************************************************************************
479 *
480 * IOMMU command queuing functions
481 *
482 ****************************************************************************/
483
484static int wait_on_sem(volatile u64 *sem)
485{
486 int i = 0;
487
488 while (*sem == 0 && i < LOOP_TIMEOUT) {
489 udelay(1);
490 i += 1;
491 }
492
493 if (i == LOOP_TIMEOUT) {
494 pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
495 return -EIO;
496 }
497
498 return 0;
499}
500
501static void copy_cmd_to_buffer(struct amd_iommu *iommu,
502 struct iommu_cmd *cmd,
503 u32 tail)
504{
505 u8 *target;
506
507 target = iommu->cmd_buf + tail;
508 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
509
510 /* Copy command to buffer */
511 memcpy(target, cmd, sizeof(*cmd));
512
513 /* Tell the IOMMU about it */
514 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
515}
516
517static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
518{
519 WARN_ON(address & 0x7ULL);
520
521 memset(cmd, 0, sizeof(*cmd));
522 cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
523 cmd->data[1] = upper_32_bits(__pa(address));
524 cmd->data[2] = 1;
525 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
526}
527
528static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
529{
530 memset(cmd, 0, sizeof(*cmd));
531 cmd->data[0] = devid;
532 CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
533}
534
535static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
536 size_t size, u16 domid, int pde)
537{
538 u64 pages;
539 int s;
540
541 pages = iommu_num_pages(address, size, PAGE_SIZE);
542 s = 0;
543
544 if (pages > 1) {
545 /*
546 * If we have to flush more than one page, flush all
547 * TLB entries for this domain
548 */
549 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
550 s = 1;
551 }
552
553 address &= PAGE_MASK;
554
555 memset(cmd, 0, sizeof(*cmd));
556 cmd->data[1] |= domid;
557 cmd->data[2] = lower_32_bits(address);
558 cmd->data[3] = upper_32_bits(address);
559 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
560 if (s) /* size bit - we flush more than one 4kb page */
561 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
562 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
563 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
564}
565
566static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
567 u64 address, size_t size)
568{
569 u64 pages;
570 int s;
571
572 pages = iommu_num_pages(address, size, PAGE_SIZE);
573 s = 0;
574
575 if (pages > 1) {
576 /*
577 * If we have to flush more than one page, flush all
578 * TLB entries for this domain
579 */
580 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
581 s = 1;
582 }
583
584 address &= PAGE_MASK;
585
586 memset(cmd, 0, sizeof(*cmd));
587 cmd->data[0] = devid;
588 cmd->data[0] |= (qdep & 0xff) << 24;
589 cmd->data[1] = devid;
590 cmd->data[2] = lower_32_bits(address);
591 cmd->data[3] = upper_32_bits(address);
592 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
593 if (s)
594 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
595}
596
597static void build_inv_all(struct iommu_cmd *cmd)
598{
599 memset(cmd, 0, sizeof(*cmd));
600 CMD_SET_TYPE(cmd, CMD_INV_ALL);
601}
602
603/*
604 * Writes the command to the IOMMUs command buffer and informs the
605 * hardware about the new command.
606 */
607static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
608{
609 u32 left, tail, head, next_tail;
610 unsigned long flags;
611
612 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
613
614again:
615 spin_lock_irqsave(&iommu->lock, flags);
616
617 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
618 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
619 next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
620 left = (head - next_tail) % iommu->cmd_buf_size;
621
622 if (left <= 2) {
623 struct iommu_cmd sync_cmd;
624 volatile u64 sem = 0;
625 int ret;
626
627 build_completion_wait(&sync_cmd, (u64)&sem);
628 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
629
630 spin_unlock_irqrestore(&iommu->lock, flags);
631
632 if ((ret = wait_on_sem(&sem)) != 0)
633 return ret;
634
635 goto again;
636 }
637
638 copy_cmd_to_buffer(iommu, cmd, tail);
639
640 /* We need to sync now to make sure all commands are processed */
641 iommu->need_sync = true;
642
643 spin_unlock_irqrestore(&iommu->lock, flags);
644
645 return 0;
646}
647
648/*
649 * This function queues a completion wait command into the command
650 * buffer of an IOMMU
651 */
652static int iommu_completion_wait(struct amd_iommu *iommu)
653{
654 struct iommu_cmd cmd;
655 volatile u64 sem = 0;
656 int ret;
657
658 if (!iommu->need_sync)
659 return 0;
660
661 build_completion_wait(&cmd, (u64)&sem);
662
663 ret = iommu_queue_command(iommu, &cmd);
664 if (ret)
665 return ret;
666
667 return wait_on_sem(&sem);
668}
669
670static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
671{
672 struct iommu_cmd cmd;
673
674 build_inv_dte(&cmd, devid);
675
676 return iommu_queue_command(iommu, &cmd);
677}
678
679static void iommu_flush_dte_all(struct amd_iommu *iommu)
680{
681 u32 devid;
682
683 for (devid = 0; devid <= 0xffff; ++devid)
684 iommu_flush_dte(iommu, devid);
685
686 iommu_completion_wait(iommu);
687}
688
689/*
690 * This function uses heavy locking and may disable irqs for some time. But
691 * this is no issue because it is only called during resume.
692 */
693static void iommu_flush_tlb_all(struct amd_iommu *iommu)
694{
695 u32 dom_id;
696
697 for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
698 struct iommu_cmd cmd;
699 build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
700 dom_id, 1);
701 iommu_queue_command(iommu, &cmd);
702 }
703
704 iommu_completion_wait(iommu);
705}
706
707static void iommu_flush_all(struct amd_iommu *iommu)
708{
709 struct iommu_cmd cmd;
710
711 build_inv_all(&cmd);
712
713 iommu_queue_command(iommu, &cmd);
714 iommu_completion_wait(iommu);
715}
716
717void iommu_flush_all_caches(struct amd_iommu *iommu)
718{
719 if (iommu_feature(iommu, FEATURE_IA)) {
720 iommu_flush_all(iommu);
721 } else {
722 iommu_flush_dte_all(iommu);
723 iommu_flush_tlb_all(iommu);
724 }
725}
726
727/*
728 * Command send function for flushing on-device TLB
729 */
730static int device_flush_iotlb(struct iommu_dev_data *dev_data,
731 u64 address, size_t size)
732{
733 struct amd_iommu *iommu;
734 struct iommu_cmd cmd;
735 int qdep;
736
737 qdep = dev_data->ats.qdep;
738 iommu = amd_iommu_rlookup_table[dev_data->devid];
739
740 build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size);
741
742 return iommu_queue_command(iommu, &cmd);
743}
744
745/*
746 * Command send function for invalidating a device table entry
747 */
748static int device_flush_dte(struct iommu_dev_data *dev_data)
749{
750 struct amd_iommu *iommu;
751 int ret;
752
753 iommu = amd_iommu_rlookup_table[dev_data->devid];
754
755 ret = iommu_flush_dte(iommu, dev_data->devid);
756 if (ret)
757 return ret;
758
759 if (dev_data->ats.enabled)
760 ret = device_flush_iotlb(dev_data, 0, ~0UL);
761
762 return ret;
763}
764
765/*
766 * TLB invalidation function which is called from the mapping functions.
767 * It invalidates a single PTE if the range to flush is within a single
768 * page. Otherwise it flushes the whole TLB of the IOMMU.
769 */
770static void __domain_flush_pages(struct protection_domain *domain,
771 u64 address, size_t size, int pde)
772{
773 struct iommu_dev_data *dev_data;
774 struct iommu_cmd cmd;
775 int ret = 0, i;
776
777 build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
778
779 for (i = 0; i < amd_iommus_present; ++i) {
780 if (!domain->dev_iommu[i])
781 continue;
782
783 /*
784 * Devices of this domain are behind this IOMMU
785 * We need a TLB flush
786 */
787 ret |= iommu_queue_command(amd_iommus[i], &cmd);
788 }
789
790 list_for_each_entry(dev_data, &domain->dev_list, list) {
791
792 if (!dev_data->ats.enabled)
793 continue;
794
795 ret |= device_flush_iotlb(dev_data, address, size);
796 }
797
798 WARN_ON(ret);
799}
800
801static void domain_flush_pages(struct protection_domain *domain,
802 u64 address, size_t size)
803{
804 __domain_flush_pages(domain, address, size, 0);
805}
806
807/* Flush the whole IO/TLB for a given protection domain */
808static void domain_flush_tlb(struct protection_domain *domain)
809{
810 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
811}
812
813/* Flush the whole IO/TLB for a given protection domain - including PDE */
814static void domain_flush_tlb_pde(struct protection_domain *domain)
815{
816 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
817}
818
819static void domain_flush_complete(struct protection_domain *domain)
820{
821 int i;
822
823 for (i = 0; i < amd_iommus_present; ++i) {
824 if (!domain->dev_iommu[i])
825 continue;
826
827 /*
828 * Devices of this domain are behind this IOMMU
829 * We need to wait for completion of all commands.
830 */
831 iommu_completion_wait(amd_iommus[i]);
832 }
833}
834
835
836/*
837 * This function flushes the DTEs for all devices in domain
838 */
839static void domain_flush_devices(struct protection_domain *domain)
840{
841 struct iommu_dev_data *dev_data;
842 unsigned long flags;
843
844 spin_lock_irqsave(&domain->lock, flags);
845
846 list_for_each_entry(dev_data, &domain->dev_list, list)
847 device_flush_dte(dev_data);
848
849 spin_unlock_irqrestore(&domain->lock, flags);
850}
851
852/****************************************************************************
853 *
854 * The functions below are used the create the page table mappings for
855 * unity mapped regions.
856 *
857 ****************************************************************************/
858
859/*
860 * This function is used to add another level to an IO page table. Adding
861 * another level increases the size of the address space by 9 bits to a size up
862 * to 64 bits.
863 */
864static bool increase_address_space(struct protection_domain *domain,
865 gfp_t gfp)
866{
867 u64 *pte;
868
869 if (domain->mode == PAGE_MODE_6_LEVEL)
870 /* address space already 64 bit large */
871 return false;
872
873 pte = (void *)get_zeroed_page(gfp);
874 if (!pte)
875 return false;
876
877 *pte = PM_LEVEL_PDE(domain->mode,
878 virt_to_phys(domain->pt_root));
879 domain->pt_root = pte;
880 domain->mode += 1;
881 domain->updated = true;
882
883 return true;
884}
885
886static u64 *alloc_pte(struct protection_domain *domain,
887 unsigned long address,
888 unsigned long page_size,
889 u64 **pte_page,
890 gfp_t gfp)
891{
892 int level, end_lvl;
893 u64 *pte, *page;
894
895 BUG_ON(!is_power_of_2(page_size));
896
897 while (address > PM_LEVEL_SIZE(domain->mode))
898 increase_address_space(domain, gfp);
899
900 level = domain->mode - 1;
901 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
902 address = PAGE_SIZE_ALIGN(address, page_size);
903 end_lvl = PAGE_SIZE_LEVEL(page_size);
904
905 while (level > end_lvl) {
906 if (!IOMMU_PTE_PRESENT(*pte)) {
907 page = (u64 *)get_zeroed_page(gfp);
908 if (!page)
909 return NULL;
910 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
911 }
912
913 /* No level skipping support yet */
914 if (PM_PTE_LEVEL(*pte) != level)
915 return NULL;
916
917 level -= 1;
918
919 pte = IOMMU_PTE_PAGE(*pte);
920
921 if (pte_page && level == end_lvl)
922 *pte_page = pte;
923
924 pte = &pte[PM_LEVEL_INDEX(level, address)];
925 }
926
927 return pte;
928}
929
930/*
931 * This function checks if there is a PTE for a given dma address. If
932 * there is one, it returns the pointer to it.
933 */
934static u64 *fetch_pte(struct protection_domain *domain, unsigned long address)
935{
936 int level;
937 u64 *pte;
938
939 if (address > PM_LEVEL_SIZE(domain->mode))
940 return NULL;
941
942 level = domain->mode - 1;
943 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
944
945 while (level > 0) {
946
947 /* Not Present */
948 if (!IOMMU_PTE_PRESENT(*pte))
949 return NULL;
950
951 /* Large PTE */
952 if (PM_PTE_LEVEL(*pte) == 0x07) {
953 unsigned long pte_mask, __pte;
954
955 /*
956 * If we have a series of large PTEs, make
957 * sure to return a pointer to the first one.
958 */
959 pte_mask = PTE_PAGE_SIZE(*pte);
960 pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
961 __pte = ((unsigned long)pte) & pte_mask;
962
963 return (u64 *)__pte;
964 }
965
966 /* No level skipping support yet */
967 if (PM_PTE_LEVEL(*pte) != level)
968 return NULL;
969
970 level -= 1;
971
972 /* Walk to the next level */
973 pte = IOMMU_PTE_PAGE(*pte);
974 pte = &pte[PM_LEVEL_INDEX(level, address)];
975 }
976
977 return pte;
978}
979
980/*
981 * Generic mapping functions. It maps a physical address into a DMA
982 * address space. It allocates the page table pages if necessary.
983 * In the future it can be extended to a generic mapping function
984 * supporting all features of AMD IOMMU page tables like level skipping
985 * and full 64 bit address spaces.
986 */
987static int iommu_map_page(struct protection_domain *dom,
988 unsigned long bus_addr,
989 unsigned long phys_addr,
990 int prot,
991 unsigned long page_size)
992{
993 u64 __pte, *pte;
994 int i, count;
995
996 if (!(prot & IOMMU_PROT_MASK))
997 return -EINVAL;
998
999 bus_addr = PAGE_ALIGN(bus_addr);
1000 phys_addr = PAGE_ALIGN(phys_addr);
1001 count = PAGE_SIZE_PTE_COUNT(page_size);
1002 pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
1003
1004 for (i = 0; i < count; ++i)
1005 if (IOMMU_PTE_PRESENT(pte[i]))
1006 return -EBUSY;
1007
1008 if (page_size > PAGE_SIZE) {
1009 __pte = PAGE_SIZE_PTE(phys_addr, page_size);
1010 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
1011 } else
1012 __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
1013
1014 if (prot & IOMMU_PROT_IR)
1015 __pte |= IOMMU_PTE_IR;
1016 if (prot & IOMMU_PROT_IW)
1017 __pte |= IOMMU_PTE_IW;
1018
1019 for (i = 0; i < count; ++i)
1020 pte[i] = __pte;
1021
1022 update_domain(dom);
1023
1024 return 0;
1025}
1026
1027static unsigned long iommu_unmap_page(struct protection_domain *dom,
1028 unsigned long bus_addr,
1029 unsigned long page_size)
1030{
1031 unsigned long long unmap_size, unmapped;
1032 u64 *pte;
1033
1034 BUG_ON(!is_power_of_2(page_size));
1035
1036 unmapped = 0;
1037
1038 while (unmapped < page_size) {
1039
1040 pte = fetch_pte(dom, bus_addr);
1041
1042 if (!pte) {
1043 /*
1044 * No PTE for this address
1045 * move forward in 4kb steps
1046 */
1047 unmap_size = PAGE_SIZE;
1048 } else if (PM_PTE_LEVEL(*pte) == 0) {
1049 /* 4kb PTE found for this address */
1050 unmap_size = PAGE_SIZE;
1051 *pte = 0ULL;
1052 } else {
1053 int count, i;
1054
1055 /* Large PTE found which maps this address */
1056 unmap_size = PTE_PAGE_SIZE(*pte);
1057 count = PAGE_SIZE_PTE_COUNT(unmap_size);
1058 for (i = 0; i < count; i++)
1059 pte[i] = 0ULL;
1060 }
1061
1062 bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
1063 unmapped += unmap_size;
1064 }
1065
1066 BUG_ON(!is_power_of_2(unmapped));
1067
1068 return unmapped;
1069}
1070
1071/*
1072 * This function checks if a specific unity mapping entry is needed for
1073 * this specific IOMMU.
1074 */
1075static int iommu_for_unity_map(struct amd_iommu *iommu,
1076 struct unity_map_entry *entry)
1077{
1078 u16 bdf, i;
1079
1080 for (i = entry->devid_start; i <= entry->devid_end; ++i) {
1081 bdf = amd_iommu_alias_table[i];
1082 if (amd_iommu_rlookup_table[bdf] == iommu)
1083 return 1;
1084 }
1085
1086 return 0;
1087}
1088
1089/*
1090 * This function actually applies the mapping to the page table of the
1091 * dma_ops domain.
1092 */
1093static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
1094 struct unity_map_entry *e)
1095{
1096 u64 addr;
1097 int ret;
1098
1099 for (addr = e->address_start; addr < e->address_end;
1100 addr += PAGE_SIZE) {
1101 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
1102 PAGE_SIZE);
1103 if (ret)
1104 return ret;
1105 /*
1106 * if unity mapping is in aperture range mark the page
1107 * as allocated in the aperture
1108 */
1109 if (addr < dma_dom->aperture_size)
1110 __set_bit(addr >> PAGE_SHIFT,
1111 dma_dom->aperture[0]->bitmap);
1112 }
1113
1114 return 0;
1115}
1116
1117/*
1118 * Init the unity mappings for a specific IOMMU in the system
1119 *
1120 * Basically iterates over all unity mapping entries and applies them to
1121 * the default domain DMA of that IOMMU if necessary.
1122 */
1123static int iommu_init_unity_mappings(struct amd_iommu *iommu)
1124{
1125 struct unity_map_entry *entry;
1126 int ret;
1127
1128 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
1129 if (!iommu_for_unity_map(iommu, entry))
1130 continue;
1131 ret = dma_ops_unity_map(iommu->default_dom, entry);
1132 if (ret)
1133 return ret;
1134 }
1135
1136 return 0;
1137}
1138
1139/*
1140 * Inits the unity mappings required for a specific device
1141 */
1142static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
1143 u16 devid)
1144{
1145 struct unity_map_entry *e;
1146 int ret;
1147
1148 list_for_each_entry(e, &amd_iommu_unity_map, list) {
1149 if (!(devid >= e->devid_start && devid <= e->devid_end))
1150 continue;
1151 ret = dma_ops_unity_map(dma_dom, e);
1152 if (ret)
1153 return ret;
1154 }
1155
1156 return 0;
1157}
1158
1159/****************************************************************************
1160 *
1161 * The next functions belong to the address allocator for the dma_ops
1162 * interface functions. They work like the allocators in the other IOMMU
1163 * drivers. Its basically a bitmap which marks the allocated pages in
1164 * the aperture. Maybe it could be enhanced in the future to a more
1165 * efficient allocator.
1166 *
1167 ****************************************************************************/
1168
1169/*
1170 * The address allocator core functions.
1171 *
1172 * called with domain->lock held
1173 */
1174
1175/*
1176 * Used to reserve address ranges in the aperture (e.g. for exclusion
1177 * ranges.
1178 */
1179static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1180 unsigned long start_page,
1181 unsigned int pages)
1182{
1183 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1184
1185 if (start_page + pages > last_page)
1186 pages = last_page - start_page;
1187
1188 for (i = start_page; i < start_page + pages; ++i) {
1189 int index = i / APERTURE_RANGE_PAGES;
1190 int page = i % APERTURE_RANGE_PAGES;
1191 __set_bit(page, dom->aperture[index]->bitmap);
1192 }
1193}
1194
1195/*
1196 * This function is used to add a new aperture range to an existing
1197 * aperture in case of dma_ops domain allocation or address allocation
1198 * failure.
1199 */
1200static int alloc_new_range(struct dma_ops_domain *dma_dom,
1201 bool populate, gfp_t gfp)
1202{
1203 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
1204 struct amd_iommu *iommu;
1205 unsigned long i;
1206
1207#ifdef CONFIG_IOMMU_STRESS
1208 populate = false;
1209#endif
1210
1211 if (index >= APERTURE_MAX_RANGES)
1212 return -ENOMEM;
1213
1214 dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
1215 if (!dma_dom->aperture[index])
1216 return -ENOMEM;
1217
1218 dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
1219 if (!dma_dom->aperture[index]->bitmap)
1220 goto out_free;
1221
1222 dma_dom->aperture[index]->offset = dma_dom->aperture_size;
1223
1224 if (populate) {
1225 unsigned long address = dma_dom->aperture_size;
1226 int i, num_ptes = APERTURE_RANGE_PAGES / 512;
1227 u64 *pte, *pte_page;
1228
1229 for (i = 0; i < num_ptes; ++i) {
1230 pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
1231 &pte_page, gfp);
1232 if (!pte)
1233 goto out_free;
1234
1235 dma_dom->aperture[index]->pte_pages[i] = pte_page;
1236
1237 address += APERTURE_RANGE_SIZE / 64;
1238 }
1239 }
1240
1241 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
1242
1243 /* Initialize the exclusion range if necessary */
1244 for_each_iommu(iommu) {
1245 if (iommu->exclusion_start &&
1246 iommu->exclusion_start >= dma_dom->aperture[index]->offset
1247 && iommu->exclusion_start < dma_dom->aperture_size) {
1248 unsigned long startpage;
1249 int pages = iommu_num_pages(iommu->exclusion_start,
1250 iommu->exclusion_length,
1251 PAGE_SIZE);
1252 startpage = iommu->exclusion_start >> PAGE_SHIFT;
1253 dma_ops_reserve_addresses(dma_dom, startpage, pages);
1254 }
1255 }
1256
1257 /*
1258 * Check for areas already mapped as present in the new aperture
1259 * range and mark those pages as reserved in the allocator. Such
1260 * mappings may already exist as a result of requested unity
1261 * mappings for devices.
1262 */
1263 for (i = dma_dom->aperture[index]->offset;
1264 i < dma_dom->aperture_size;
1265 i += PAGE_SIZE) {
1266 u64 *pte = fetch_pte(&dma_dom->domain, i);
1267 if (!pte || !IOMMU_PTE_PRESENT(*pte))
1268 continue;
1269
1270 dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
1271 }
1272
1273 update_domain(&dma_dom->domain);
1274
1275 return 0;
1276
1277out_free:
1278 update_domain(&dma_dom->domain);
1279
1280 free_page((unsigned long)dma_dom->aperture[index]->bitmap);
1281
1282 kfree(dma_dom->aperture[index]);
1283 dma_dom->aperture[index] = NULL;
1284
1285 return -ENOMEM;
1286}
1287
1288static unsigned long dma_ops_area_alloc(struct device *dev,
1289 struct dma_ops_domain *dom,
1290 unsigned int pages,
1291 unsigned long align_mask,
1292 u64 dma_mask,
1293 unsigned long start)
1294{
1295 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
1296 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
1297 int i = start >> APERTURE_RANGE_SHIFT;
1298 unsigned long boundary_size;
1299 unsigned long address = -1;
1300 unsigned long limit;
1301
1302 next_bit >>= PAGE_SHIFT;
1303
1304 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1305 PAGE_SIZE) >> PAGE_SHIFT;
1306
1307 for (;i < max_index; ++i) {
1308 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
1309
1310 if (dom->aperture[i]->offset >= dma_mask)
1311 break;
1312
1313 limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
1314 dma_mask >> PAGE_SHIFT);
1315
1316 address = iommu_area_alloc(dom->aperture[i]->bitmap,
1317 limit, next_bit, pages, 0,
1318 boundary_size, align_mask);
1319 if (address != -1) {
1320 address = dom->aperture[i]->offset +
1321 (address << PAGE_SHIFT);
1322 dom->next_address = address + (pages << PAGE_SHIFT);
1323 break;
1324 }
1325
1326 next_bit = 0;
1327 }
1328
1329 return address;
1330}
1331
1332static unsigned long dma_ops_alloc_addresses(struct device *dev,
1333 struct dma_ops_domain *dom,
1334 unsigned int pages,
1335 unsigned long align_mask,
1336 u64 dma_mask)
1337{
1338 unsigned long address;
1339
1340#ifdef CONFIG_IOMMU_STRESS
1341 dom->next_address = 0;
1342 dom->need_flush = true;
1343#endif
1344
1345 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1346 dma_mask, dom->next_address);
1347
1348 if (address == -1) {
1349 dom->next_address = 0;
1350 address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1351 dma_mask, 0);
1352 dom->need_flush = true;
1353 }
1354
1355 if (unlikely(address == -1))
1356 address = DMA_ERROR_CODE;
1357
1358 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
1359
1360 return address;
1361}
1362
1363/*
1364 * The address free function.
1365 *
1366 * called with domain->lock held
1367 */
1368static void dma_ops_free_addresses(struct dma_ops_domain *dom,
1369 unsigned long address,
1370 unsigned int pages)
1371{
1372 unsigned i = address >> APERTURE_RANGE_SHIFT;
1373 struct aperture_range *range = dom->aperture[i];
1374
1375 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
1376
1377#ifdef CONFIG_IOMMU_STRESS
1378 if (i < 4)
1379 return;
1380#endif
1381
1382 if (address >= dom->next_address)
1383 dom->need_flush = true;
1384
1385 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
1386
1387 bitmap_clear(range->bitmap, address, pages);
1388
1389}
1390
1391/****************************************************************************
1392 *
1393 * The next functions belong to the domain allocation. A domain is
1394 * allocated for every IOMMU as the default domain. If device isolation
1395 * is enabled, every device get its own domain. The most important thing
1396 * about domains is the page table mapping the DMA address space they
1397 * contain.
1398 *
1399 ****************************************************************************/
1400
1401/*
1402 * This function adds a protection domain to the global protection domain list
1403 */
1404static void add_domain_to_list(struct protection_domain *domain)
1405{
1406 unsigned long flags;
1407
1408 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1409 list_add(&domain->list, &amd_iommu_pd_list);
1410 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1411}
1412
1413/*
1414 * This function removes a protection domain to the global
1415 * protection domain list
1416 */
1417static void del_domain_from_list(struct protection_domain *domain)
1418{
1419 unsigned long flags;
1420
1421 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1422 list_del(&domain->list);
1423 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1424}
1425
1426static u16 domain_id_alloc(void)
1427{
1428 unsigned long flags;
1429 int id;
1430
1431 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1432 id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
1433 BUG_ON(id == 0);
1434 if (id > 0 && id < MAX_DOMAIN_ID)
1435 __set_bit(id, amd_iommu_pd_alloc_bitmap);
1436 else
1437 id = 0;
1438 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1439
1440 return id;
1441}
1442
1443static void domain_id_free(int id)
1444{
1445 unsigned long flags;
1446
1447 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1448 if (id > 0 && id < MAX_DOMAIN_ID)
1449 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
1450 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1451}
1452
1453static void free_pagetable(struct protection_domain *domain)
1454{
1455 int i, j;
1456 u64 *p1, *p2, *p3;
1457
1458 p1 = domain->pt_root;
1459
1460 if (!p1)
1461 return;
1462
1463 for (i = 0; i < 512; ++i) {
1464 if (!IOMMU_PTE_PRESENT(p1[i]))
1465 continue;
1466
1467 p2 = IOMMU_PTE_PAGE(p1[i]);
1468 for (j = 0; j < 512; ++j) {
1469 if (!IOMMU_PTE_PRESENT(p2[j]))
1470 continue;
1471 p3 = IOMMU_PTE_PAGE(p2[j]);
1472 free_page((unsigned long)p3);
1473 }
1474
1475 free_page((unsigned long)p2);
1476 }
1477
1478 free_page((unsigned long)p1);
1479
1480 domain->pt_root = NULL;
1481}
1482
1483/*
1484 * Free a domain, only used if something went wrong in the
1485 * allocation path and we need to free an already allocated page table
1486 */
1487static void dma_ops_domain_free(struct dma_ops_domain *dom)
1488{
1489 int i;
1490
1491 if (!dom)
1492 return;
1493
1494 del_domain_from_list(&dom->domain);
1495
1496 free_pagetable(&dom->domain);
1497
1498 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
1499 if (!dom->aperture[i])
1500 continue;
1501 free_page((unsigned long)dom->aperture[i]->bitmap);
1502 kfree(dom->aperture[i]);
1503 }
1504
1505 kfree(dom);
1506}
1507
1508/*
1509 * Allocates a new protection domain usable for the dma_ops functions.
1510 * It also initializes the page table and the address allocator data
1511 * structures required for the dma_ops interface
1512 */
1513static struct dma_ops_domain *dma_ops_domain_alloc(void)
1514{
1515 struct dma_ops_domain *dma_dom;
1516
1517 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
1518 if (!dma_dom)
1519 return NULL;
1520
1521 spin_lock_init(&dma_dom->domain.lock);
1522
1523 dma_dom->domain.id = domain_id_alloc();
1524 if (dma_dom->domain.id == 0)
1525 goto free_dma_dom;
1526 INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1527 dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1528 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1529 dma_dom->domain.flags = PD_DMA_OPS_MASK;
1530 dma_dom->domain.priv = dma_dom;
1531 if (!dma_dom->domain.pt_root)
1532 goto free_dma_dom;
1533
1534 dma_dom->need_flush = false;
1535 dma_dom->target_dev = 0xffff;
1536
1537 add_domain_to_list(&dma_dom->domain);
1538
1539 if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1540 goto free_dma_dom;
1541
1542 /*
1543 * mark the first page as allocated so we never return 0 as
1544 * a valid dma-address. So we can use 0 as error value
1545 */
1546 dma_dom->aperture[0]->bitmap[0] = 1;
1547 dma_dom->next_address = 0;
1548
1549
1550 return dma_dom;
1551
1552free_dma_dom:
1553 dma_ops_domain_free(dma_dom);
1554
1555 return NULL;
1556}
1557
1558/*
1559 * little helper function to check whether a given protection domain is a
1560 * dma_ops domain
1561 */
1562static bool dma_ops_domain(struct protection_domain *domain)
1563{
1564 return domain->flags & PD_DMA_OPS_MASK;
1565}
1566
1567static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1568{
1569 u64 pte_root = virt_to_phys(domain->pt_root);
1570 u32 flags = 0;
1571
1572 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1573 << DEV_ENTRY_MODE_SHIFT;
1574 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1575
1576 if (ats)
1577 flags |= DTE_FLAG_IOTLB;
1578
1579 amd_iommu_dev_table[devid].data[3] |= flags;
1580 amd_iommu_dev_table[devid].data[2] = domain->id;
1581 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1582 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1583}
1584
1585static void clear_dte_entry(u16 devid)
1586{
1587 /* remove entry from the device table seen by the hardware */
1588 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1589 amd_iommu_dev_table[devid].data[1] = 0;
1590 amd_iommu_dev_table[devid].data[2] = 0;
1591
1592 amd_iommu_apply_erratum_63(devid);
1593}
1594
1595static void do_attach(struct iommu_dev_data *dev_data,
1596 struct protection_domain *domain)
1597{
1598 struct amd_iommu *iommu;
1599 bool ats;
1600
1601 iommu = amd_iommu_rlookup_table[dev_data->devid];
1602 ats = dev_data->ats.enabled;
1603
1604 /* Update data structures */
1605 dev_data->domain = domain;
1606 list_add(&dev_data->list, &domain->dev_list);
1607 set_dte_entry(dev_data->devid, domain, ats);
1608
1609 /* Do reference counting */
1610 domain->dev_iommu[iommu->index] += 1;
1611 domain->dev_cnt += 1;
1612
1613 /* Flush the DTE entry */
1614 device_flush_dte(dev_data);
1615}
1616
1617static void do_detach(struct iommu_dev_data *dev_data)
1618{
1619 struct amd_iommu *iommu;
1620
1621 iommu = amd_iommu_rlookup_table[dev_data->devid];
1622
1623 /* decrease reference counters */
1624 dev_data->domain->dev_iommu[iommu->index] -= 1;
1625 dev_data->domain->dev_cnt -= 1;
1626
1627 /* Update data structures */
1628 dev_data->domain = NULL;
1629 list_del(&dev_data->list);
1630 clear_dte_entry(dev_data->devid);
1631
1632 /* Flush the DTE entry */
1633 device_flush_dte(dev_data);
1634}
1635
1636/*
1637 * If a device is not yet associated with a domain, this function does
1638 * assigns it visible for the hardware
1639 */
1640static int __attach_device(struct iommu_dev_data *dev_data,
1641 struct protection_domain *domain)
1642{
1643 int ret;
1644
1645 /* lock domain */
1646 spin_lock(&domain->lock);
1647
1648 if (dev_data->alias_data != NULL) {
1649 struct iommu_dev_data *alias_data = dev_data->alias_data;
1650
1651 /* Some sanity checks */
1652 ret = -EBUSY;
1653 if (alias_data->domain != NULL &&
1654 alias_data->domain != domain)
1655 goto out_unlock;
1656
1657 if (dev_data->domain != NULL &&
1658 dev_data->domain != domain)
1659 goto out_unlock;
1660
1661 /* Do real assignment */
1662 if (alias_data->domain == NULL)
1663 do_attach(alias_data, domain);
1664
1665 atomic_inc(&alias_data->bind);
1666 }
1667
1668 if (dev_data->domain == NULL)
1669 do_attach(dev_data, domain);
1670
1671 atomic_inc(&dev_data->bind);
1672
1673 ret = 0;
1674
1675out_unlock:
1676
1677 /* ready */
1678 spin_unlock(&domain->lock);
1679
1680 return ret;
1681}
1682
1683/*
1684 * If a device is not yet associated with a domain, this function does
1685 * assigns it visible for the hardware
1686 */
1687static int attach_device(struct device *dev,
1688 struct protection_domain *domain)
1689{
1690 struct pci_dev *pdev = to_pci_dev(dev);
1691 struct iommu_dev_data *dev_data;
1692 unsigned long flags;
1693 int ret;
1694
1695 dev_data = get_dev_data(dev);
1696
1697 if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
1698 dev_data->ats.enabled = true;
1699 dev_data->ats.qdep = pci_ats_queue_depth(pdev);
1700 }
1701
1702 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1703 ret = __attach_device(dev_data, domain);
1704 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1705
1706 /*
1707 * We might boot into a crash-kernel here. The crashed kernel
1708 * left the caches in the IOMMU dirty. So we have to flush
1709 * here to evict all dirty stuff.
1710 */
1711 domain_flush_tlb_pde(domain);
1712
1713 return ret;
1714}
1715
1716/*
1717 * Removes a device from a protection domain (unlocked)
1718 */
1719static void __detach_device(struct iommu_dev_data *dev_data)
1720{
1721 struct protection_domain *domain;
1722 unsigned long flags;
1723
1724 BUG_ON(!dev_data->domain);
1725
1726 domain = dev_data->domain;
1727
1728 spin_lock_irqsave(&domain->lock, flags);
1729
1730 if (dev_data->alias_data != NULL) {
1731 struct iommu_dev_data *alias_data = dev_data->alias_data;
1732
1733 if (atomic_dec_and_test(&alias_data->bind))
1734 do_detach(alias_data);
1735 }
1736
1737 if (atomic_dec_and_test(&dev_data->bind))
1738 do_detach(dev_data);
1739
1740 spin_unlock_irqrestore(&domain->lock, flags);
1741
1742 /*
1743 * If we run in passthrough mode the device must be assigned to the
1744 * passthrough domain if it is detached from any other domain.
1745 * Make sure we can deassign from the pt_domain itself.
1746 */
1747 if (iommu_pass_through &&
1748 (dev_data->domain == NULL && domain != pt_domain))
1749 __attach_device(dev_data, pt_domain);
1750}
1751
1752/*
1753 * Removes a device from a protection domain (with devtable_lock held)
1754 */
1755static void detach_device(struct device *dev)
1756{
1757 struct iommu_dev_data *dev_data;
1758 unsigned long flags;
1759
1760 dev_data = get_dev_data(dev);
1761
1762 /* lock device table */
1763 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1764 __detach_device(dev_data);
1765 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1766
1767 if (dev_data->ats.enabled) {
1768 pci_disable_ats(to_pci_dev(dev));
1769 dev_data->ats.enabled = false;
1770 }
1771}
1772
1773/*
1774 * Find out the protection domain structure for a given PCI device. This
1775 * will give us the pointer to the page table root for example.
1776 */
1777static struct protection_domain *domain_for_device(struct device *dev)
1778{
1779 struct iommu_dev_data *dev_data;
1780 struct protection_domain *dom = NULL;
1781 unsigned long flags;
1782
1783 dev_data = get_dev_data(dev);
1784
1785 if (dev_data->domain)
1786 return dev_data->domain;
1787
1788 if (dev_data->alias_data != NULL) {
1789 struct iommu_dev_data *alias_data = dev_data->alias_data;
1790
1791 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1792 if (alias_data->domain != NULL) {
1793 __attach_device(dev_data, alias_data->domain);
1794 dom = alias_data->domain;
1795 }
1796 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1797 }
1798
1799 return dom;
1800}
1801
1802static int device_change_notifier(struct notifier_block *nb,
1803 unsigned long action, void *data)
1804{
1805 struct device *dev = data;
1806 u16 devid;
1807 struct protection_domain *domain;
1808 struct dma_ops_domain *dma_domain;
1809 struct amd_iommu *iommu;
1810 unsigned long flags;
1811
1812 if (!check_device(dev))
1813 return 0;
1814
1815 devid = get_device_id(dev);
1816 iommu = amd_iommu_rlookup_table[devid];
1817
1818 switch (action) {
1819 case BUS_NOTIFY_UNBOUND_DRIVER:
1820
1821 domain = domain_for_device(dev);
1822
1823 if (!domain)
1824 goto out;
1825 if (iommu_pass_through)
1826 break;
1827 detach_device(dev);
1828 break;
1829 case BUS_NOTIFY_ADD_DEVICE:
1830
1831 iommu_init_device(dev);
1832
1833 domain = domain_for_device(dev);
1834
1835 /* allocate a protection domain if a device is added */
1836 dma_domain = find_protection_domain(devid);
1837 if (dma_domain)
1838 goto out;
1839 dma_domain = dma_ops_domain_alloc();
1840 if (!dma_domain)
1841 goto out;
1842 dma_domain->target_dev = devid;
1843
1844 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1845 list_add_tail(&dma_domain->list, &iommu_pd_list);
1846 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1847
1848 break;
1849 case BUS_NOTIFY_DEL_DEVICE:
1850
1851 iommu_uninit_device(dev);
1852
1853 default:
1854 goto out;
1855 }
1856
1857 iommu_completion_wait(iommu);
1858
1859out:
1860 return 0;
1861}
1862
1863static struct notifier_block device_nb = {
1864 .notifier_call = device_change_notifier,
1865};
1866
1867void amd_iommu_init_notifier(void)
1868{
1869 bus_register_notifier(&pci_bus_type, &device_nb);
1870}
1871
1872/*****************************************************************************
1873 *
1874 * The next functions belong to the dma_ops mapping/unmapping code.
1875 *
1876 *****************************************************************************/
1877
1878/*
1879 * In the dma_ops path we only have the struct device. This function
1880 * finds the corresponding IOMMU, the protection domain and the
1881 * requestor id for a given device.
1882 * If the device is not yet associated with a domain this is also done
1883 * in this function.
1884 */
1885static struct protection_domain *get_domain(struct device *dev)
1886{
1887 struct protection_domain *domain;
1888 struct dma_ops_domain *dma_dom;
1889 u16 devid = get_device_id(dev);
1890
1891 if (!check_device(dev))
1892 return ERR_PTR(-EINVAL);
1893
1894 domain = domain_for_device(dev);
1895 if (domain != NULL && !dma_ops_domain(domain))
1896 return ERR_PTR(-EBUSY);
1897
1898 if (domain != NULL)
1899 return domain;
1900
1901 /* Device not bount yet - bind it */
1902 dma_dom = find_protection_domain(devid);
1903 if (!dma_dom)
1904 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1905 attach_device(dev, &dma_dom->domain);
1906 DUMP_printk("Using protection domain %d for device %s\n",
1907 dma_dom->domain.id, dev_name(dev));
1908
1909 return &dma_dom->domain;
1910}
1911
1912static void update_device_table(struct protection_domain *domain)
1913{
1914 struct iommu_dev_data *dev_data;
1915
1916 list_for_each_entry(dev_data, &domain->dev_list, list)
1917 set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
1918}
1919
1920static void update_domain(struct protection_domain *domain)
1921{
1922 if (!domain->updated)
1923 return;
1924
1925 update_device_table(domain);
1926
1927 domain_flush_devices(domain);
1928 domain_flush_tlb_pde(domain);
1929
1930 domain->updated = false;
1931}
1932
1933/*
1934 * This function fetches the PTE for a given address in the aperture
1935 */
1936static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1937 unsigned long address)
1938{
1939 struct aperture_range *aperture;
1940 u64 *pte, *pte_page;
1941
1942 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1943 if (!aperture)
1944 return NULL;
1945
1946 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1947 if (!pte) {
1948 pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
1949 GFP_ATOMIC);
1950 aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1951 } else
1952 pte += PM_LEVEL_INDEX(0, address);
1953
1954 update_domain(&dom->domain);
1955
1956 return pte;
1957}
1958
1959/*
1960 * This is the generic map function. It maps one 4kb page at paddr to
1961 * the given address in the DMA address space for the domain.
1962 */
1963static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
1964 unsigned long address,
1965 phys_addr_t paddr,
1966 int direction)
1967{
1968 u64 *pte, __pte;
1969
1970 WARN_ON(address > dom->aperture_size);
1971
1972 paddr &= PAGE_MASK;
1973
1974 pte = dma_ops_get_pte(dom, address);
1975 if (!pte)
1976 return DMA_ERROR_CODE;
1977
1978 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1979
1980 if (direction == DMA_TO_DEVICE)
1981 __pte |= IOMMU_PTE_IR;
1982 else if (direction == DMA_FROM_DEVICE)
1983 __pte |= IOMMU_PTE_IW;
1984 else if (direction == DMA_BIDIRECTIONAL)
1985 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
1986
1987 WARN_ON(*pte);
1988
1989 *pte = __pte;
1990
1991 return (dma_addr_t)address;
1992}
1993
1994/*
1995 * The generic unmapping function for on page in the DMA address space.
1996 */
1997static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
1998 unsigned long address)
1999{
2000 struct aperture_range *aperture;
2001 u64 *pte;
2002
2003 if (address >= dom->aperture_size)
2004 return;
2005
2006 aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
2007 if (!aperture)
2008 return;
2009
2010 pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
2011 if (!pte)
2012 return;
2013
2014 pte += PM_LEVEL_INDEX(0, address);
2015
2016 WARN_ON(!*pte);
2017
2018 *pte = 0ULL;
2019}
2020
2021/*
2022 * This function contains common code for mapping of a physically
2023 * contiguous memory region into DMA address space. It is used by all
2024 * mapping functions provided with this IOMMU driver.
2025 * Must be called with the domain lock held.
2026 */
2027static dma_addr_t __map_single(struct device *dev,
2028 struct dma_ops_domain *dma_dom,
2029 phys_addr_t paddr,
2030 size_t size,
2031 int dir,
2032 bool align,
2033 u64 dma_mask)
2034{
2035 dma_addr_t offset = paddr & ~PAGE_MASK;
2036 dma_addr_t address, start, ret;
2037 unsigned int pages;
2038 unsigned long align_mask = 0;
2039 int i;
2040
2041 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
2042 paddr &= PAGE_MASK;
2043
2044 INC_STATS_COUNTER(total_map_requests);
2045
2046 if (pages > 1)
2047 INC_STATS_COUNTER(cross_page);
2048
2049 if (align)
2050 align_mask = (1UL << get_order(size)) - 1;
2051
2052retry:
2053 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
2054 dma_mask);
2055 if (unlikely(address == DMA_ERROR_CODE)) {
2056 /*
2057 * setting next_address here will let the address
2058 * allocator only scan the new allocated range in the
2059 * first run. This is a small optimization.
2060 */
2061 dma_dom->next_address = dma_dom->aperture_size;
2062
2063 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
2064 goto out;
2065
2066 /*
2067 * aperture was successfully enlarged by 128 MB, try
2068 * allocation again
2069 */
2070 goto retry;
2071 }
2072
2073 start = address;
2074 for (i = 0; i < pages; ++i) {
2075 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
2076 if (ret == DMA_ERROR_CODE)
2077 goto out_unmap;
2078
2079 paddr += PAGE_SIZE;
2080 start += PAGE_SIZE;
2081 }
2082 address += offset;
2083
2084 ADD_STATS_COUNTER(alloced_io_mem, size);
2085
2086 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
2087 domain_flush_tlb(&dma_dom->domain);
2088 dma_dom->need_flush = false;
2089 } else if (unlikely(amd_iommu_np_cache))
2090 domain_flush_pages(&dma_dom->domain, address, size);
2091
2092out:
2093 return address;
2094
2095out_unmap:
2096
2097 for (--i; i >= 0; --i) {
2098 start -= PAGE_SIZE;
2099 dma_ops_domain_unmap(dma_dom, start);
2100 }
2101
2102 dma_ops_free_addresses(dma_dom, address, pages);
2103
2104 return DMA_ERROR_CODE;
2105}
2106
2107/*
2108 * Does the reverse of the __map_single function. Must be called with
2109 * the domain lock held too
2110 */
2111static void __unmap_single(struct dma_ops_domain *dma_dom,
2112 dma_addr_t dma_addr,
2113 size_t size,
2114 int dir)
2115{
2116 dma_addr_t flush_addr;
2117 dma_addr_t i, start;
2118 unsigned int pages;
2119
2120 if ((dma_addr == DMA_ERROR_CODE) ||
2121 (dma_addr + size > dma_dom->aperture_size))
2122 return;
2123
2124 flush_addr = dma_addr;
2125 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
2126 dma_addr &= PAGE_MASK;
2127 start = dma_addr;
2128
2129 for (i = 0; i < pages; ++i) {
2130 dma_ops_domain_unmap(dma_dom, start);
2131 start += PAGE_SIZE;
2132 }
2133
2134 SUB_STATS_COUNTER(alloced_io_mem, size);
2135
2136 dma_ops_free_addresses(dma_dom, dma_addr, pages);
2137
2138 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
2139 domain_flush_pages(&dma_dom->domain, flush_addr, size);
2140 dma_dom->need_flush = false;
2141 }
2142}
2143
2144/*
2145 * The exported map_single function for dma_ops.
2146 */
2147static dma_addr_t map_page(struct device *dev, struct page *page,
2148 unsigned long offset, size_t size,
2149 enum dma_data_direction dir,
2150 struct dma_attrs *attrs)
2151{
2152 unsigned long flags;
2153 struct protection_domain *domain;
2154 dma_addr_t addr;
2155 u64 dma_mask;
2156 phys_addr_t paddr = page_to_phys(page) + offset;
2157
2158 INC_STATS_COUNTER(cnt_map_single);
2159
2160 domain = get_domain(dev);
2161 if (PTR_ERR(domain) == -EINVAL)
2162 return (dma_addr_t)paddr;
2163 else if (IS_ERR(domain))
2164 return DMA_ERROR_CODE;
2165
2166 dma_mask = *dev->dma_mask;
2167
2168 spin_lock_irqsave(&domain->lock, flags);
2169
2170 addr = __map_single(dev, domain->priv, paddr, size, dir, false,
2171 dma_mask);
2172 if (addr == DMA_ERROR_CODE)
2173 goto out;
2174
2175 domain_flush_complete(domain);
2176
2177out:
2178 spin_unlock_irqrestore(&domain->lock, flags);
2179
2180 return addr;
2181}
2182
2183/*
2184 * The exported unmap_single function for dma_ops.
2185 */
2186static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
2187 enum dma_data_direction dir, struct dma_attrs *attrs)
2188{
2189 unsigned long flags;
2190 struct protection_domain *domain;
2191
2192 INC_STATS_COUNTER(cnt_unmap_single);
2193
2194 domain = get_domain(dev);
2195 if (IS_ERR(domain))
2196 return;
2197
2198 spin_lock_irqsave(&domain->lock, flags);
2199
2200 __unmap_single(domain->priv, dma_addr, size, dir);
2201
2202 domain_flush_complete(domain);
2203
2204 spin_unlock_irqrestore(&domain->lock, flags);
2205}
2206
2207/*
2208 * This is a special map_sg function which is used if we should map a
2209 * device which is not handled by an AMD IOMMU in the system.
2210 */
2211static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
2212 int nelems, int dir)
2213{
2214 struct scatterlist *s;
2215 int i;
2216
2217 for_each_sg(sglist, s, nelems, i) {
2218 s->dma_address = (dma_addr_t)sg_phys(s);
2219 s->dma_length = s->length;
2220 }
2221
2222 return nelems;
2223}
2224
2225/*
2226 * The exported map_sg function for dma_ops (handles scatter-gather
2227 * lists).
2228 */
2229static int map_sg(struct device *dev, struct scatterlist *sglist,
2230 int nelems, enum dma_data_direction dir,
2231 struct dma_attrs *attrs)
2232{
2233 unsigned long flags;
2234 struct protection_domain *domain;
2235 int i;
2236 struct scatterlist *s;
2237 phys_addr_t paddr;
2238 int mapped_elems = 0;
2239 u64 dma_mask;
2240
2241 INC_STATS_COUNTER(cnt_map_sg);
2242
2243 domain = get_domain(dev);
2244 if (PTR_ERR(domain) == -EINVAL)
2245 return map_sg_no_iommu(dev, sglist, nelems, dir);
2246 else if (IS_ERR(domain))
2247 return 0;
2248
2249 dma_mask = *dev->dma_mask;
2250
2251 spin_lock_irqsave(&domain->lock, flags);
2252
2253 for_each_sg(sglist, s, nelems, i) {
2254 paddr = sg_phys(s);
2255
2256 s->dma_address = __map_single(dev, domain->priv,
2257 paddr, s->length, dir, false,
2258 dma_mask);
2259
2260 if (s->dma_address) {
2261 s->dma_length = s->length;
2262 mapped_elems++;
2263 } else
2264 goto unmap;
2265 }
2266
2267 domain_flush_complete(domain);
2268
2269out:
2270 spin_unlock_irqrestore(&domain->lock, flags);
2271
2272 return mapped_elems;
2273unmap:
2274 for_each_sg(sglist, s, mapped_elems, i) {
2275 if (s->dma_address)
2276 __unmap_single(domain->priv, s->dma_address,
2277 s->dma_length, dir);
2278 s->dma_address = s->dma_length = 0;
2279 }
2280
2281 mapped_elems = 0;
2282
2283 goto out;
2284}
2285
2286/*
2287 * The exported map_sg function for dma_ops (handles scatter-gather
2288 * lists).
2289 */
2290static void unmap_sg(struct device *dev, struct scatterlist *sglist,
2291 int nelems, enum dma_data_direction dir,
2292 struct dma_attrs *attrs)
2293{
2294 unsigned long flags;
2295 struct protection_domain *domain;
2296 struct scatterlist *s;
2297 int i;
2298
2299 INC_STATS_COUNTER(cnt_unmap_sg);
2300
2301 domain = get_domain(dev);
2302 if (IS_ERR(domain))
2303 return;
2304
2305 spin_lock_irqsave(&domain->lock, flags);
2306
2307 for_each_sg(sglist, s, nelems, i) {
2308 __unmap_single(domain->priv, s->dma_address,
2309 s->dma_length, dir);
2310 s->dma_address = s->dma_length = 0;
2311 }
2312
2313 domain_flush_complete(domain);
2314
2315 spin_unlock_irqrestore(&domain->lock, flags);
2316}
2317
2318/*
2319 * The exported alloc_coherent function for dma_ops.
2320 */
2321static void *alloc_coherent(struct device *dev, size_t size,
2322 dma_addr_t *dma_addr, gfp_t flag)
2323{
2324 unsigned long flags;
2325 void *virt_addr;
2326 struct protection_domain *domain;
2327 phys_addr_t paddr;
2328 u64 dma_mask = dev->coherent_dma_mask;
2329
2330 INC_STATS_COUNTER(cnt_alloc_coherent);
2331
2332 domain = get_domain(dev);
2333 if (PTR_ERR(domain) == -EINVAL) {
2334 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2335 *dma_addr = __pa(virt_addr);
2336 return virt_addr;
2337 } else if (IS_ERR(domain))
2338 return NULL;
2339
2340 dma_mask = dev->coherent_dma_mask;
2341 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2342 flag |= __GFP_ZERO;
2343
2344 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2345 if (!virt_addr)
2346 return NULL;
2347
2348 paddr = virt_to_phys(virt_addr);
2349
2350 if (!dma_mask)
2351 dma_mask = *dev->dma_mask;
2352
2353 spin_lock_irqsave(&domain->lock, flags);
2354
2355 *dma_addr = __map_single(dev, domain->priv, paddr,
2356 size, DMA_BIDIRECTIONAL, true, dma_mask);
2357
2358 if (*dma_addr == DMA_ERROR_CODE) {
2359 spin_unlock_irqrestore(&domain->lock, flags);
2360 goto out_free;
2361 }
2362
2363 domain_flush_complete(domain);
2364
2365 spin_unlock_irqrestore(&domain->lock, flags);
2366
2367 return virt_addr;
2368
2369out_free:
2370
2371 free_pages((unsigned long)virt_addr, get_order(size));
2372
2373 return NULL;
2374}
2375
2376/*
2377 * The exported free_coherent function for dma_ops.
2378 */
2379static void free_coherent(struct device *dev, size_t size,
2380 void *virt_addr, dma_addr_t dma_addr)
2381{
2382 unsigned long flags;
2383 struct protection_domain *domain;
2384
2385 INC_STATS_COUNTER(cnt_free_coherent);
2386
2387 domain = get_domain(dev);
2388 if (IS_ERR(domain))
2389 goto free_mem;
2390
2391 spin_lock_irqsave(&domain->lock, flags);
2392
2393 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2394
2395 domain_flush_complete(domain);
2396
2397 spin_unlock_irqrestore(&domain->lock, flags);
2398
2399free_mem:
2400 free_pages((unsigned long)virt_addr, get_order(size));
2401}
2402
2403/*
2404 * This function is called by the DMA layer to find out if we can handle a
2405 * particular device. It is part of the dma_ops.
2406 */
2407static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2408{
2409 return check_device(dev);
2410}
2411
2412/*
2413 * The function for pre-allocating protection domains.
2414 *
2415 * If the driver core informs the DMA layer if a driver grabs a device
2416 * we don't need to preallocate the protection domains anymore.
2417 * For now we have to.
2418 */
2419static void prealloc_protection_domains(void)
2420{
2421 struct pci_dev *dev = NULL;
2422 struct dma_ops_domain *dma_dom;
2423 u16 devid;
2424
2425 for_each_pci_dev(dev) {
2426
2427 /* Do we handle this device? */
2428 if (!check_device(&dev->dev))
2429 continue;
2430
2431 /* Is there already any domain for it? */
2432 if (domain_for_device(&dev->dev))
2433 continue;
2434
2435 devid = get_device_id(&dev->dev);
2436
2437 dma_dom = dma_ops_domain_alloc();
2438 if (!dma_dom)
2439 continue;
2440 init_unity_mappings_for_device(dma_dom, devid);
2441 dma_dom->target_dev = devid;
2442
2443 attach_device(&dev->dev, &dma_dom->domain);
2444
2445 list_add_tail(&dma_dom->list, &iommu_pd_list);
2446 }
2447}
2448
2449static struct dma_map_ops amd_iommu_dma_ops = {
2450 .alloc_coherent = alloc_coherent,
2451 .free_coherent = free_coherent,
2452 .map_page = map_page,
2453 .unmap_page = unmap_page,
2454 .map_sg = map_sg,
2455 .unmap_sg = unmap_sg,
2456 .dma_supported = amd_iommu_dma_supported,
2457};
2458
2459static unsigned device_dma_ops_init(void)
2460{
2461 struct pci_dev *pdev = NULL;
2462 unsigned unhandled = 0;
2463
2464 for_each_pci_dev(pdev) {
2465 if (!check_device(&pdev->dev)) {
2466 unhandled += 1;
2467 continue;
2468 }
2469
2470 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
2471 }
2472
2473 return unhandled;
2474}
2475
2476/*
2477 * The function which clues the AMD IOMMU driver into dma_ops.
2478 */
2479
2480void __init amd_iommu_init_api(void)
2481{
2482 register_iommu(&amd_iommu_ops);
2483}
2484
2485int __init amd_iommu_init_dma_ops(void)
2486{
2487 struct amd_iommu *iommu;
2488 int ret, unhandled;
2489
2490 /*
2491 * first allocate a default protection domain for every IOMMU we
2492 * found in the system. Devices not assigned to any other
2493 * protection domain will be assigned to the default one.
2494 */
2495 for_each_iommu(iommu) {
2496 iommu->default_dom = dma_ops_domain_alloc();
2497 if (iommu->default_dom == NULL)
2498 return -ENOMEM;
2499 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
2500 ret = iommu_init_unity_mappings(iommu);
2501 if (ret)
2502 goto free_domains;
2503 }
2504
2505 /*
2506 * Pre-allocate the protection domains for each device.
2507 */
2508 prealloc_protection_domains();
2509
2510 iommu_detected = 1;
2511 swiotlb = 0;
2512
2513 /* Make the driver finally visible to the drivers */
2514 unhandled = device_dma_ops_init();
2515 if (unhandled && max_pfn > MAX_DMA32_PFN) {
2516 /* There are unhandled devices - initialize swiotlb for them */
2517 swiotlb = 1;
2518 }
2519
2520 amd_iommu_stats_init();
2521
2522 return 0;
2523
2524free_domains:
2525
2526 for_each_iommu(iommu) {
2527 if (iommu->default_dom)
2528 dma_ops_domain_free(iommu->default_dom);
2529 }
2530
2531 return ret;
2532}
2533
2534/*****************************************************************************
2535 *
2536 * The following functions belong to the exported interface of AMD IOMMU
2537 *
2538 * This interface allows access to lower level functions of the IOMMU
2539 * like protection domain handling and assignement of devices to domains
2540 * which is not possible with the dma_ops interface.
2541 *
2542 *****************************************************************************/
2543
2544static void cleanup_domain(struct protection_domain *domain)
2545{
2546 struct iommu_dev_data *dev_data, *next;
2547 unsigned long flags;
2548
2549 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2550
2551 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2552 __detach_device(dev_data);
2553 atomic_set(&dev_data->bind, 0);
2554 }
2555
2556 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2557}
2558
2559static void protection_domain_free(struct protection_domain *domain)
2560{
2561 if (!domain)
2562 return;
2563
2564 del_domain_from_list(domain);
2565
2566 if (domain->id)
2567 domain_id_free(domain->id);
2568
2569 kfree(domain);
2570}
2571
2572static struct protection_domain *protection_domain_alloc(void)
2573{
2574 struct protection_domain *domain;
2575
2576 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2577 if (!domain)
2578 return NULL;
2579
2580 spin_lock_init(&domain->lock);
2581 mutex_init(&domain->api_lock);
2582 domain->id = domain_id_alloc();
2583 if (!domain->id)
2584 goto out_err;
2585 INIT_LIST_HEAD(&domain->dev_list);
2586
2587 add_domain_to_list(domain);
2588
2589 return domain;
2590
2591out_err:
2592 kfree(domain);
2593
2594 return NULL;
2595}
2596
2597static int amd_iommu_domain_init(struct iommu_domain *dom)
2598{
2599 struct protection_domain *domain;
2600
2601 domain = protection_domain_alloc();
2602 if (!domain)
2603 goto out_free;
2604
2605 domain->mode = PAGE_MODE_3_LEVEL;
2606 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
2607 if (!domain->pt_root)
2608 goto out_free;
2609
2610 dom->priv = domain;
2611
2612 return 0;
2613
2614out_free:
2615 protection_domain_free(domain);
2616
2617 return -ENOMEM;
2618}
2619
2620static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2621{
2622 struct protection_domain *domain = dom->priv;
2623
2624 if (!domain)
2625 return;
2626
2627 if (domain->dev_cnt > 0)
2628 cleanup_domain(domain);
2629
2630 BUG_ON(domain->dev_cnt != 0);
2631
2632 free_pagetable(domain);
2633
2634 protection_domain_free(domain);
2635
2636 dom->priv = NULL;
2637}
2638
2639static void amd_iommu_detach_device(struct iommu_domain *dom,
2640 struct device *dev)
2641{
2642 struct iommu_dev_data *dev_data = dev->archdata.iommu;
2643 struct amd_iommu *iommu;
2644 u16 devid;
2645
2646 if (!check_device(dev))
2647 return;
2648
2649 devid = get_device_id(dev);
2650
2651 if (dev_data->domain != NULL)
2652 detach_device(dev);
2653
2654 iommu = amd_iommu_rlookup_table[devid];
2655 if (!iommu)
2656 return;
2657
2658 iommu_completion_wait(iommu);
2659}
2660
2661static int amd_iommu_attach_device(struct iommu_domain *dom,
2662 struct device *dev)
2663{
2664 struct protection_domain *domain = dom->priv;
2665 struct iommu_dev_data *dev_data;
2666 struct amd_iommu *iommu;
2667 int ret;
2668
2669 if (!check_device(dev))
2670 return -EINVAL;
2671
2672 dev_data = dev->archdata.iommu;
2673
2674 iommu = amd_iommu_rlookup_table[dev_data->devid];
2675 if (!iommu)
2676 return -EINVAL;
2677
2678 if (dev_data->domain)
2679 detach_device(dev);
2680
2681 ret = attach_device(dev, domain);
2682
2683 iommu_completion_wait(iommu);
2684
2685 return ret;
2686}
2687
2688static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2689 phys_addr_t paddr, int gfp_order, int iommu_prot)
2690{
2691 unsigned long page_size = 0x1000UL << gfp_order;
2692 struct protection_domain *domain = dom->priv;
2693 int prot = 0;
2694 int ret;
2695
2696 if (iommu_prot & IOMMU_READ)
2697 prot |= IOMMU_PROT_IR;
2698 if (iommu_prot & IOMMU_WRITE)
2699 prot |= IOMMU_PROT_IW;
2700
2701 mutex_lock(&domain->api_lock);
2702 ret = iommu_map_page(domain, iova, paddr, prot, page_size);
2703 mutex_unlock(&domain->api_lock);
2704
2705 return ret;
2706}
2707
2708static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2709 int gfp_order)
2710{
2711 struct protection_domain *domain = dom->priv;
2712 unsigned long page_size, unmap_size;
2713
2714 page_size = 0x1000UL << gfp_order;
2715
2716 mutex_lock(&domain->api_lock);
2717 unmap_size = iommu_unmap_page(domain, iova, page_size);
2718 mutex_unlock(&domain->api_lock);
2719
2720 domain_flush_tlb_pde(domain);
2721
2722 return get_order(unmap_size);
2723}
2724
2725static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2726 unsigned long iova)
2727{
2728 struct protection_domain *domain = dom->priv;
2729 unsigned long offset_mask;
2730 phys_addr_t paddr;
2731 u64 *pte, __pte;
2732
2733 pte = fetch_pte(domain, iova);
2734
2735 if (!pte || !IOMMU_PTE_PRESENT(*pte))
2736 return 0;
2737
2738 if (PM_PTE_LEVEL(*pte) == 0)
2739 offset_mask = PAGE_SIZE - 1;
2740 else
2741 offset_mask = PTE_PAGE_SIZE(*pte) - 1;
2742
2743 __pte = *pte & PM_ADDR_MASK;
2744 paddr = (__pte & ~offset_mask) | (iova & offset_mask);
2745
2746 return paddr;
2747}
2748
2749static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2750 unsigned long cap)
2751{
2752 switch (cap) {
2753 case IOMMU_CAP_CACHE_COHERENCY:
2754 return 1;
2755 }
2756
2757 return 0;
2758}
2759
2760static struct iommu_ops amd_iommu_ops = {
2761 .domain_init = amd_iommu_domain_init,
2762 .domain_destroy = amd_iommu_domain_destroy,
2763 .attach_dev = amd_iommu_attach_device,
2764 .detach_dev = amd_iommu_detach_device,
2765 .map = amd_iommu_map,
2766 .unmap = amd_iommu_unmap,
2767 .iova_to_phys = amd_iommu_iova_to_phys,
2768 .domain_has_cap = amd_iommu_domain_has_cap,
2769};
2770
2771/*****************************************************************************
2772 *
2773 * The next functions do a basic initialization of IOMMU for pass through
2774 * mode
2775 *
2776 * In passthrough mode the IOMMU is initialized and enabled but not used for
2777 * DMA-API translation.
2778 *
2779 *****************************************************************************/
2780
2781int __init amd_iommu_init_passthrough(void)
2782{
2783 struct amd_iommu *iommu;
2784 struct pci_dev *dev = NULL;
2785 u16 devid;
2786
2787 /* allocate passthrough domain */
2788 pt_domain = protection_domain_alloc();
2789 if (!pt_domain)
2790 return -ENOMEM;
2791
2792 pt_domain->mode |= PAGE_MODE_NONE;
2793
2794 for_each_pci_dev(dev) {
2795 if (!check_device(&dev->dev))
2796 continue;
2797
2798 devid = get_device_id(&dev->dev);
2799
2800 iommu = amd_iommu_rlookup_table[devid];
2801 if (!iommu)
2802 continue;
2803
2804 attach_device(&dev->dev, pt_domain);
2805 }
2806
2807 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
2808
2809 return 0;
2810}
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
new file mode 100644
index 000000000000..82d2410f4205
--- /dev/null
+++ b/drivers/iommu/amd_iommu_init.c
@@ -0,0 +1,1574 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/acpi.h>
22#include <linux/list.h>
23#include <linux/slab.h>
24#include <linux/syscore_ops.h>
25#include <linux/interrupt.h>
26#include <linux/msi.h>
27#include <linux/amd-iommu.h>
28#include <asm/pci-direct.h>
29#include <asm/iommu.h>
30#include <asm/gart.h>
31#include <asm/x86_init.h>
32#include <asm/iommu_table.h>
33
34#include "amd_iommu_proto.h"
35#include "amd_iommu_types.h"
36
37/*
38 * definitions for the ACPI scanning code
39 */
40#define IVRS_HEADER_LENGTH 48
41
42#define ACPI_IVHD_TYPE 0x10
43#define ACPI_IVMD_TYPE_ALL 0x20
44#define ACPI_IVMD_TYPE 0x21
45#define ACPI_IVMD_TYPE_RANGE 0x22
46
47#define IVHD_DEV_ALL 0x01
48#define IVHD_DEV_SELECT 0x02
49#define IVHD_DEV_SELECT_RANGE_START 0x03
50#define IVHD_DEV_RANGE_END 0x04
51#define IVHD_DEV_ALIAS 0x42
52#define IVHD_DEV_ALIAS_RANGE 0x43
53#define IVHD_DEV_EXT_SELECT 0x46
54#define IVHD_DEV_EXT_SELECT_RANGE 0x47
55
56#define IVHD_FLAG_HT_TUN_EN_MASK 0x01
57#define IVHD_FLAG_PASSPW_EN_MASK 0x02
58#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04
59#define IVHD_FLAG_ISOC_EN_MASK 0x08
60
61#define IVMD_FLAG_EXCL_RANGE 0x08
62#define IVMD_FLAG_UNITY_MAP 0x01
63
64#define ACPI_DEVFLAG_INITPASS 0x01
65#define ACPI_DEVFLAG_EXTINT 0x02
66#define ACPI_DEVFLAG_NMI 0x04
67#define ACPI_DEVFLAG_SYSMGT1 0x10
68#define ACPI_DEVFLAG_SYSMGT2 0x20
69#define ACPI_DEVFLAG_LINT0 0x40
70#define ACPI_DEVFLAG_LINT1 0x80
71#define ACPI_DEVFLAG_ATSDIS 0x10000000
72
73/*
74 * ACPI table definitions
75 *
76 * These data structures are laid over the table to parse the important values
77 * out of it.
78 */
79
80/*
81 * structure describing one IOMMU in the ACPI table. Typically followed by one
82 * or more ivhd_entrys.
83 */
84struct ivhd_header {
85 u8 type;
86 u8 flags;
87 u16 length;
88 u16 devid;
89 u16 cap_ptr;
90 u64 mmio_phys;
91 u16 pci_seg;
92 u16 info;
93 u32 reserved;
94} __attribute__((packed));
95
96/*
97 * A device entry describing which devices a specific IOMMU translates and
98 * which requestor ids they use.
99 */
100struct ivhd_entry {
101 u8 type;
102 u16 devid;
103 u8 flags;
104 u32 ext;
105} __attribute__((packed));
106
107/*
108 * An AMD IOMMU memory definition structure. It defines things like exclusion
109 * ranges for devices and regions that should be unity mapped.
110 */
111struct ivmd_header {
112 u8 type;
113 u8 flags;
114 u16 length;
115 u16 devid;
116 u16 aux;
117 u64 resv;
118 u64 range_start;
119 u64 range_length;
120} __attribute__((packed));
121
122bool amd_iommu_dump;
123
124static int __initdata amd_iommu_detected;
125static bool __initdata amd_iommu_disabled;
126
127u16 amd_iommu_last_bdf; /* largest PCI device id we have
128 to handle */
129LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
130 we find in ACPI */
131bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
132
133LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
134 system */
135
136/* Array to assign indices to IOMMUs*/
137struct amd_iommu *amd_iommus[MAX_IOMMUS];
138int amd_iommus_present;
139
140/* IOMMUs have a non-present cache? */
141bool amd_iommu_np_cache __read_mostly;
142bool amd_iommu_iotlb_sup __read_mostly = true;
143
144/*
145 * The ACPI table parsing functions set this variable on an error
146 */
147static int __initdata amd_iommu_init_err;
148
149/*
150 * List of protection domains - used during resume
151 */
152LIST_HEAD(amd_iommu_pd_list);
153spinlock_t amd_iommu_pd_lock;
154
155/*
156 * Pointer to the device table which is shared by all AMD IOMMUs
157 * it is indexed by the PCI device id or the HT unit id and contains
158 * information about the domain the device belongs to as well as the
159 * page table root pointer.
160 */
161struct dev_table_entry *amd_iommu_dev_table;
162
163/*
164 * The alias table is a driver specific data structure which contains the
165 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
166 * More than one device can share the same requestor id.
167 */
168u16 *amd_iommu_alias_table;
169
170/*
171 * The rlookup table is used to find the IOMMU which is responsible
172 * for a specific device. It is also indexed by the PCI device id.
173 */
174struct amd_iommu **amd_iommu_rlookup_table;
175
176/*
177 * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
178 * to know which ones are already in use.
179 */
180unsigned long *amd_iommu_pd_alloc_bitmap;
181
182static u32 dev_table_size; /* size of the device table */
183static u32 alias_table_size; /* size of the alias table */
184static u32 rlookup_table_size; /* size if the rlookup table */
185
186/*
187 * This function flushes all internal caches of
188 * the IOMMU used by this driver.
189 */
190extern void iommu_flush_all_caches(struct amd_iommu *iommu);
191
192static inline void update_last_devid(u16 devid)
193{
194 if (devid > amd_iommu_last_bdf)
195 amd_iommu_last_bdf = devid;
196}
197
198static inline unsigned long tbl_size(int entry_size)
199{
200 unsigned shift = PAGE_SHIFT +
201 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
202
203 return 1UL << shift;
204}
205
206/* Access to l1 and l2 indexed register spaces */
207
208static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
209{
210 u32 val;
211
212 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
213 pci_read_config_dword(iommu->dev, 0xfc, &val);
214 return val;
215}
216
217static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
218{
219 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
220 pci_write_config_dword(iommu->dev, 0xfc, val);
221 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
222}
223
224static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
225{
226 u32 val;
227
228 pci_write_config_dword(iommu->dev, 0xf0, address);
229 pci_read_config_dword(iommu->dev, 0xf4, &val);
230 return val;
231}
232
233static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
234{
235 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
236 pci_write_config_dword(iommu->dev, 0xf4, val);
237}
238
239/****************************************************************************
240 *
241 * AMD IOMMU MMIO register space handling functions
242 *
243 * These functions are used to program the IOMMU device registers in
244 * MMIO space required for that driver.
245 *
246 ****************************************************************************/
247
248/*
249 * This function set the exclusion range in the IOMMU. DMA accesses to the
250 * exclusion range are passed through untranslated
251 */
252static void iommu_set_exclusion_range(struct amd_iommu *iommu)
253{
254 u64 start = iommu->exclusion_start & PAGE_MASK;
255 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
256 u64 entry;
257
258 if (!iommu->exclusion_start)
259 return;
260
261 entry = start | MMIO_EXCL_ENABLE_MASK;
262 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
263 &entry, sizeof(entry));
264
265 entry = limit;
266 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
267 &entry, sizeof(entry));
268}
269
270/* Programs the physical address of the device table into the IOMMU hardware */
271static void __init iommu_set_device_table(struct amd_iommu *iommu)
272{
273 u64 entry;
274
275 BUG_ON(iommu->mmio_base == NULL);
276
277 entry = virt_to_phys(amd_iommu_dev_table);
278 entry |= (dev_table_size >> 12) - 1;
279 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
280 &entry, sizeof(entry));
281}
282
283/* Generic functions to enable/disable certain features of the IOMMU. */
284static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
285{
286 u32 ctrl;
287
288 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
289 ctrl |= (1 << bit);
290 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
291}
292
293static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
294{
295 u32 ctrl;
296
297 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
298 ctrl &= ~(1 << bit);
299 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
300}
301
302/* Function to enable the hardware */
303static void iommu_enable(struct amd_iommu *iommu)
304{
305 static const char * const feat_str[] = {
306 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
307 "IA", "GA", "HE", "PC", NULL
308 };
309 int i;
310
311 printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
312 dev_name(&iommu->dev->dev), iommu->cap_ptr);
313
314 if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
315 printk(KERN_CONT " extended features: ");
316 for (i = 0; feat_str[i]; ++i)
317 if (iommu_feature(iommu, (1ULL << i)))
318 printk(KERN_CONT " %s", feat_str[i]);
319 }
320 printk(KERN_CONT "\n");
321
322 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
323}
324
325static void iommu_disable(struct amd_iommu *iommu)
326{
327 /* Disable command buffer */
328 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
329
330 /* Disable event logging and event interrupts */
331 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
332 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
333
334 /* Disable IOMMU hardware itself */
335 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
336}
337
338/*
339 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
340 * the system has one.
341 */
342static u8 * __init iommu_map_mmio_space(u64 address)
343{
344 u8 *ret;
345
346 if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
347 pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
348 address);
349 pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
350 return NULL;
351 }
352
353 ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
354 if (ret != NULL)
355 return ret;
356
357 release_mem_region(address, MMIO_REGION_LENGTH);
358
359 return NULL;
360}
361
362static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
363{
364 if (iommu->mmio_base)
365 iounmap(iommu->mmio_base);
366 release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
367}
368
369/****************************************************************************
370 *
371 * The functions below belong to the first pass of AMD IOMMU ACPI table
372 * parsing. In this pass we try to find out the highest device id this
373 * code has to handle. Upon this information the size of the shared data
374 * structures is determined later.
375 *
376 ****************************************************************************/
377
378/*
379 * This function calculates the length of a given IVHD entry
380 */
381static inline int ivhd_entry_length(u8 *ivhd)
382{
383 return 0x04 << (*ivhd >> 6);
384}
385
386/*
387 * This function reads the last device id the IOMMU has to handle from the PCI
388 * capability header for this IOMMU
389 */
390static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
391{
392 u32 cap;
393
394 cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
395 update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
396
397 return 0;
398}
399
400/*
401 * After reading the highest device id from the IOMMU PCI capability header
402 * this function looks if there is a higher device id defined in the ACPI table
403 */
404static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
405{
406 u8 *p = (void *)h, *end = (void *)h;
407 struct ivhd_entry *dev;
408
409 p += sizeof(*h);
410 end += h->length;
411
412 find_last_devid_on_pci(PCI_BUS(h->devid),
413 PCI_SLOT(h->devid),
414 PCI_FUNC(h->devid),
415 h->cap_ptr);
416
417 while (p < end) {
418 dev = (struct ivhd_entry *)p;
419 switch (dev->type) {
420 case IVHD_DEV_SELECT:
421 case IVHD_DEV_RANGE_END:
422 case IVHD_DEV_ALIAS:
423 case IVHD_DEV_EXT_SELECT:
424 /* all the above subfield types refer to device ids */
425 update_last_devid(dev->devid);
426 break;
427 default:
428 break;
429 }
430 p += ivhd_entry_length(p);
431 }
432
433 WARN_ON(p != end);
434
435 return 0;
436}
437
438/*
439 * Iterate over all IVHD entries in the ACPI table and find the highest device
440 * id which we need to handle. This is the first of three functions which parse
441 * the ACPI table. So we check the checksum here.
442 */
443static int __init find_last_devid_acpi(struct acpi_table_header *table)
444{
445 int i;
446 u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
447 struct ivhd_header *h;
448
449 /*
450 * Validate checksum here so we don't need to do it when
451 * we actually parse the table
452 */
453 for (i = 0; i < table->length; ++i)
454 checksum += p[i];
455 if (checksum != 0) {
456 /* ACPI table corrupt */
457 amd_iommu_init_err = -ENODEV;
458 return 0;
459 }
460
461 p += IVRS_HEADER_LENGTH;
462
463 end += table->length;
464 while (p < end) {
465 h = (struct ivhd_header *)p;
466 switch (h->type) {
467 case ACPI_IVHD_TYPE:
468 find_last_devid_from_ivhd(h);
469 break;
470 default:
471 break;
472 }
473 p += h->length;
474 }
475 WARN_ON(p != end);
476
477 return 0;
478}
479
480/****************************************************************************
481 *
482 * The following functions belong the the code path which parses the ACPI table
483 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
484 * data structures, initialize the device/alias/rlookup table and also
485 * basically initialize the hardware.
486 *
487 ****************************************************************************/
488
489/*
490 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
491 * write commands to that buffer later and the IOMMU will execute them
492 * asynchronously
493 */
494static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
495{
496 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
497 get_order(CMD_BUFFER_SIZE));
498
499 if (cmd_buf == NULL)
500 return NULL;
501
502 iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
503
504 return cmd_buf;
505}
506
507/*
508 * This function resets the command buffer if the IOMMU stopped fetching
509 * commands from it.
510 */
511void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
512{
513 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
514
515 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
516 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
517
518 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
519}
520
521/*
522 * This function writes the command buffer address to the hardware and
523 * enables it.
524 */
525static void iommu_enable_command_buffer(struct amd_iommu *iommu)
526{
527 u64 entry;
528
529 BUG_ON(iommu->cmd_buf == NULL);
530
531 entry = (u64)virt_to_phys(iommu->cmd_buf);
532 entry |= MMIO_CMD_SIZE_512;
533
534 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
535 &entry, sizeof(entry));
536
537 amd_iommu_reset_cmd_buffer(iommu);
538 iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
539}
540
541static void __init free_command_buffer(struct amd_iommu *iommu)
542{
543 free_pages((unsigned long)iommu->cmd_buf,
544 get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
545}
546
547/* allocates the memory where the IOMMU will log its events to */
548static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
549{
550 iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
551 get_order(EVT_BUFFER_SIZE));
552
553 if (iommu->evt_buf == NULL)
554 return NULL;
555
556 iommu->evt_buf_size = EVT_BUFFER_SIZE;
557
558 return iommu->evt_buf;
559}
560
561static void iommu_enable_event_buffer(struct amd_iommu *iommu)
562{
563 u64 entry;
564
565 BUG_ON(iommu->evt_buf == NULL);
566
567 entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
568
569 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
570 &entry, sizeof(entry));
571
572 /* set head and tail to zero manually */
573 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
574 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
575
576 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
577}
578
579static void __init free_event_buffer(struct amd_iommu *iommu)
580{
581 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
582}
583
584/* sets a specific bit in the device table entry. */
585static void set_dev_entry_bit(u16 devid, u8 bit)
586{
587 int i = (bit >> 5) & 0x07;
588 int _bit = bit & 0x1f;
589
590 amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
591}
592
593static int get_dev_entry_bit(u16 devid, u8 bit)
594{
595 int i = (bit >> 5) & 0x07;
596 int _bit = bit & 0x1f;
597
598 return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
599}
600
601
602void amd_iommu_apply_erratum_63(u16 devid)
603{
604 int sysmgt;
605
606 sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
607 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
608
609 if (sysmgt == 0x01)
610 set_dev_entry_bit(devid, DEV_ENTRY_IW);
611}
612
613/* Writes the specific IOMMU for a device into the rlookup table */
614static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
615{
616 amd_iommu_rlookup_table[devid] = iommu;
617}
618
619/*
620 * This function takes the device specific flags read from the ACPI
621 * table and sets up the device table entry with that information
622 */
623static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
624 u16 devid, u32 flags, u32 ext_flags)
625{
626 if (flags & ACPI_DEVFLAG_INITPASS)
627 set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
628 if (flags & ACPI_DEVFLAG_EXTINT)
629 set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
630 if (flags & ACPI_DEVFLAG_NMI)
631 set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
632 if (flags & ACPI_DEVFLAG_SYSMGT1)
633 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
634 if (flags & ACPI_DEVFLAG_SYSMGT2)
635 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
636 if (flags & ACPI_DEVFLAG_LINT0)
637 set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
638 if (flags & ACPI_DEVFLAG_LINT1)
639 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
640
641 amd_iommu_apply_erratum_63(devid);
642
643 set_iommu_for_device(iommu, devid);
644}
645
646/*
647 * Reads the device exclusion range from ACPI and initialize IOMMU with
648 * it
649 */
650static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
651{
652 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
653
654 if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
655 return;
656
657 if (iommu) {
658 /*
659 * We only can configure exclusion ranges per IOMMU, not
660 * per device. But we can enable the exclusion range per
661 * device. This is done here
662 */
663 set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
664 iommu->exclusion_start = m->range_start;
665 iommu->exclusion_length = m->range_length;
666 }
667}
668
669/*
670 * This function reads some important data from the IOMMU PCI space and
671 * initializes the driver data structure with it. It reads the hardware
672 * capabilities and the first/last device entries
673 */
674static void __init init_iommu_from_pci(struct amd_iommu *iommu)
675{
676 int cap_ptr = iommu->cap_ptr;
677 u32 range, misc, low, high;
678 int i, j;
679
680 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
681 &iommu->cap);
682 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
683 &range);
684 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
685 &misc);
686
687 iommu->first_device = calc_devid(MMIO_GET_BUS(range),
688 MMIO_GET_FD(range));
689 iommu->last_device = calc_devid(MMIO_GET_BUS(range),
690 MMIO_GET_LD(range));
691 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
692
693 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
694 amd_iommu_iotlb_sup = false;
695
696 /* read extended feature bits */
697 low = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
698 high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
699
700 iommu->features = ((u64)high << 32) | low;
701
702 if (!is_rd890_iommu(iommu->dev))
703 return;
704
705 /*
706 * Some rd890 systems may not be fully reconfigured by the BIOS, so
707 * it's necessary for us to store this information so it can be
708 * reprogrammed on resume
709 */
710
711 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
712 &iommu->stored_addr_lo);
713 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
714 &iommu->stored_addr_hi);
715
716 /* Low bit locks writes to configuration space */
717 iommu->stored_addr_lo &= ~1;
718
719 for (i = 0; i < 6; i++)
720 for (j = 0; j < 0x12; j++)
721 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
722
723 for (i = 0; i < 0x83; i++)
724 iommu->stored_l2[i] = iommu_read_l2(iommu, i);
725}
726
727/*
728 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
729 * initializes the hardware and our data structures with it.
730 */
731static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
732 struct ivhd_header *h)
733{
734 u8 *p = (u8 *)h;
735 u8 *end = p, flags = 0;
736 u16 devid = 0, devid_start = 0, devid_to = 0;
737 u32 dev_i, ext_flags = 0;
738 bool alias = false;
739 struct ivhd_entry *e;
740
741 /*
742 * First save the recommended feature enable bits from ACPI
743 */
744 iommu->acpi_flags = h->flags;
745
746 /*
747 * Done. Now parse the device entries
748 */
749 p += sizeof(struct ivhd_header);
750 end += h->length;
751
752
753 while (p < end) {
754 e = (struct ivhd_entry *)p;
755 switch (e->type) {
756 case IVHD_DEV_ALL:
757
758 DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
759 " last device %02x:%02x.%x flags: %02x\n",
760 PCI_BUS(iommu->first_device),
761 PCI_SLOT(iommu->first_device),
762 PCI_FUNC(iommu->first_device),
763 PCI_BUS(iommu->last_device),
764 PCI_SLOT(iommu->last_device),
765 PCI_FUNC(iommu->last_device),
766 e->flags);
767
768 for (dev_i = iommu->first_device;
769 dev_i <= iommu->last_device; ++dev_i)
770 set_dev_entry_from_acpi(iommu, dev_i,
771 e->flags, 0);
772 break;
773 case IVHD_DEV_SELECT:
774
775 DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
776 "flags: %02x\n",
777 PCI_BUS(e->devid),
778 PCI_SLOT(e->devid),
779 PCI_FUNC(e->devid),
780 e->flags);
781
782 devid = e->devid;
783 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
784 break;
785 case IVHD_DEV_SELECT_RANGE_START:
786
787 DUMP_printk(" DEV_SELECT_RANGE_START\t "
788 "devid: %02x:%02x.%x flags: %02x\n",
789 PCI_BUS(e->devid),
790 PCI_SLOT(e->devid),
791 PCI_FUNC(e->devid),
792 e->flags);
793
794 devid_start = e->devid;
795 flags = e->flags;
796 ext_flags = 0;
797 alias = false;
798 break;
799 case IVHD_DEV_ALIAS:
800
801 DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
802 "flags: %02x devid_to: %02x:%02x.%x\n",
803 PCI_BUS(e->devid),
804 PCI_SLOT(e->devid),
805 PCI_FUNC(e->devid),
806 e->flags,
807 PCI_BUS(e->ext >> 8),
808 PCI_SLOT(e->ext >> 8),
809 PCI_FUNC(e->ext >> 8));
810
811 devid = e->devid;
812 devid_to = e->ext >> 8;
813 set_dev_entry_from_acpi(iommu, devid , e->flags, 0);
814 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
815 amd_iommu_alias_table[devid] = devid_to;
816 break;
817 case IVHD_DEV_ALIAS_RANGE:
818
819 DUMP_printk(" DEV_ALIAS_RANGE\t\t "
820 "devid: %02x:%02x.%x flags: %02x "
821 "devid_to: %02x:%02x.%x\n",
822 PCI_BUS(e->devid),
823 PCI_SLOT(e->devid),
824 PCI_FUNC(e->devid),
825 e->flags,
826 PCI_BUS(e->ext >> 8),
827 PCI_SLOT(e->ext >> 8),
828 PCI_FUNC(e->ext >> 8));
829
830 devid_start = e->devid;
831 flags = e->flags;
832 devid_to = e->ext >> 8;
833 ext_flags = 0;
834 alias = true;
835 break;
836 case IVHD_DEV_EXT_SELECT:
837
838 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
839 "flags: %02x ext: %08x\n",
840 PCI_BUS(e->devid),
841 PCI_SLOT(e->devid),
842 PCI_FUNC(e->devid),
843 e->flags, e->ext);
844
845 devid = e->devid;
846 set_dev_entry_from_acpi(iommu, devid, e->flags,
847 e->ext);
848 break;
849 case IVHD_DEV_EXT_SELECT_RANGE:
850
851 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
852 "%02x:%02x.%x flags: %02x ext: %08x\n",
853 PCI_BUS(e->devid),
854 PCI_SLOT(e->devid),
855 PCI_FUNC(e->devid),
856 e->flags, e->ext);
857
858 devid_start = e->devid;
859 flags = e->flags;
860 ext_flags = e->ext;
861 alias = false;
862 break;
863 case IVHD_DEV_RANGE_END:
864
865 DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
866 PCI_BUS(e->devid),
867 PCI_SLOT(e->devid),
868 PCI_FUNC(e->devid));
869
870 devid = e->devid;
871 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
872 if (alias) {
873 amd_iommu_alias_table[dev_i] = devid_to;
874 set_dev_entry_from_acpi(iommu,
875 devid_to, flags, ext_flags);
876 }
877 set_dev_entry_from_acpi(iommu, dev_i,
878 flags, ext_flags);
879 }
880 break;
881 default:
882 break;
883 }
884
885 p += ivhd_entry_length(p);
886 }
887}
888
889/* Initializes the device->iommu mapping for the driver */
890static int __init init_iommu_devices(struct amd_iommu *iommu)
891{
892 u32 i;
893
894 for (i = iommu->first_device; i <= iommu->last_device; ++i)
895 set_iommu_for_device(iommu, i);
896
897 return 0;
898}
899
900static void __init free_iommu_one(struct amd_iommu *iommu)
901{
902 free_command_buffer(iommu);
903 free_event_buffer(iommu);
904 iommu_unmap_mmio_space(iommu);
905}
906
907static void __init free_iommu_all(void)
908{
909 struct amd_iommu *iommu, *next;
910
911 for_each_iommu_safe(iommu, next) {
912 list_del(&iommu->list);
913 free_iommu_one(iommu);
914 kfree(iommu);
915 }
916}
917
918/*
919 * This function clues the initialization function for one IOMMU
920 * together and also allocates the command buffer and programs the
921 * hardware. It does NOT enable the IOMMU. This is done afterwards.
922 */
923static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
924{
925 spin_lock_init(&iommu->lock);
926
927 /* Add IOMMU to internal data structures */
928 list_add_tail(&iommu->list, &amd_iommu_list);
929 iommu->index = amd_iommus_present++;
930
931 if (unlikely(iommu->index >= MAX_IOMMUS)) {
932 WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
933 return -ENOSYS;
934 }
935
936 /* Index is fine - add IOMMU to the array */
937 amd_iommus[iommu->index] = iommu;
938
939 /*
940 * Copy data from ACPI table entry to the iommu struct
941 */
942 iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff);
943 if (!iommu->dev)
944 return 1;
945
946 iommu->cap_ptr = h->cap_ptr;
947 iommu->pci_seg = h->pci_seg;
948 iommu->mmio_phys = h->mmio_phys;
949 iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
950 if (!iommu->mmio_base)
951 return -ENOMEM;
952
953 iommu->cmd_buf = alloc_command_buffer(iommu);
954 if (!iommu->cmd_buf)
955 return -ENOMEM;
956
957 iommu->evt_buf = alloc_event_buffer(iommu);
958 if (!iommu->evt_buf)
959 return -ENOMEM;
960
961 iommu->int_enabled = false;
962
963 init_iommu_from_pci(iommu);
964 init_iommu_from_acpi(iommu, h);
965 init_iommu_devices(iommu);
966
967 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
968 amd_iommu_np_cache = true;
969
970 return pci_enable_device(iommu->dev);
971}
972
973/*
974 * Iterates over all IOMMU entries in the ACPI table, allocates the
975 * IOMMU structure and initializes it with init_iommu_one()
976 */
977static int __init init_iommu_all(struct acpi_table_header *table)
978{
979 u8 *p = (u8 *)table, *end = (u8 *)table;
980 struct ivhd_header *h;
981 struct amd_iommu *iommu;
982 int ret;
983
984 end += table->length;
985 p += IVRS_HEADER_LENGTH;
986
987 while (p < end) {
988 h = (struct ivhd_header *)p;
989 switch (*p) {
990 case ACPI_IVHD_TYPE:
991
992 DUMP_printk("device: %02x:%02x.%01x cap: %04x "
993 "seg: %d flags: %01x info %04x\n",
994 PCI_BUS(h->devid), PCI_SLOT(h->devid),
995 PCI_FUNC(h->devid), h->cap_ptr,
996 h->pci_seg, h->flags, h->info);
997 DUMP_printk(" mmio-addr: %016llx\n",
998 h->mmio_phys);
999
1000 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1001 if (iommu == NULL) {
1002 amd_iommu_init_err = -ENOMEM;
1003 return 0;
1004 }
1005
1006 ret = init_iommu_one(iommu, h);
1007 if (ret) {
1008 amd_iommu_init_err = ret;
1009 return 0;
1010 }
1011 break;
1012 default:
1013 break;
1014 }
1015 p += h->length;
1016
1017 }
1018 WARN_ON(p != end);
1019
1020 return 0;
1021}
1022
1023/****************************************************************************
1024 *
1025 * The following functions initialize the MSI interrupts for all IOMMUs
1026 * in the system. Its a bit challenging because there could be multiple
1027 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
1028 * pci_dev.
1029 *
1030 ****************************************************************************/
1031
1032static int iommu_setup_msi(struct amd_iommu *iommu)
1033{
1034 int r;
1035
1036 if (pci_enable_msi(iommu->dev))
1037 return 1;
1038
1039 r = request_threaded_irq(iommu->dev->irq,
1040 amd_iommu_int_handler,
1041 amd_iommu_int_thread,
1042 0, "AMD-Vi",
1043 iommu->dev);
1044
1045 if (r) {
1046 pci_disable_msi(iommu->dev);
1047 return 1;
1048 }
1049
1050 iommu->int_enabled = true;
1051 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
1052
1053 return 0;
1054}
1055
1056static int iommu_init_msi(struct amd_iommu *iommu)
1057{
1058 if (iommu->int_enabled)
1059 return 0;
1060
1061 if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
1062 return iommu_setup_msi(iommu);
1063
1064 return 1;
1065}
1066
1067/****************************************************************************
1068 *
1069 * The next functions belong to the third pass of parsing the ACPI
1070 * table. In this last pass the memory mapping requirements are
1071 * gathered (like exclusion and unity mapping reanges).
1072 *
1073 ****************************************************************************/
1074
1075static void __init free_unity_maps(void)
1076{
1077 struct unity_map_entry *entry, *next;
1078
1079 list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
1080 list_del(&entry->list);
1081 kfree(entry);
1082 }
1083}
1084
1085/* called when we find an exclusion range definition in ACPI */
1086static int __init init_exclusion_range(struct ivmd_header *m)
1087{
1088 int i;
1089
1090 switch (m->type) {
1091 case ACPI_IVMD_TYPE:
1092 set_device_exclusion_range(m->devid, m);
1093 break;
1094 case ACPI_IVMD_TYPE_ALL:
1095 for (i = 0; i <= amd_iommu_last_bdf; ++i)
1096 set_device_exclusion_range(i, m);
1097 break;
1098 case ACPI_IVMD_TYPE_RANGE:
1099 for (i = m->devid; i <= m->aux; ++i)
1100 set_device_exclusion_range(i, m);
1101 break;
1102 default:
1103 break;
1104 }
1105
1106 return 0;
1107}
1108
1109/* called for unity map ACPI definition */
1110static int __init init_unity_map_range(struct ivmd_header *m)
1111{
1112 struct unity_map_entry *e = 0;
1113 char *s;
1114
1115 e = kzalloc(sizeof(*e), GFP_KERNEL);
1116 if (e == NULL)
1117 return -ENOMEM;
1118
1119 switch (m->type) {
1120 default:
1121 kfree(e);
1122 return 0;
1123 case ACPI_IVMD_TYPE:
1124 s = "IVMD_TYPEi\t\t\t";
1125 e->devid_start = e->devid_end = m->devid;
1126 break;
1127 case ACPI_IVMD_TYPE_ALL:
1128 s = "IVMD_TYPE_ALL\t\t";
1129 e->devid_start = 0;
1130 e->devid_end = amd_iommu_last_bdf;
1131 break;
1132 case ACPI_IVMD_TYPE_RANGE:
1133 s = "IVMD_TYPE_RANGE\t\t";
1134 e->devid_start = m->devid;
1135 e->devid_end = m->aux;
1136 break;
1137 }
1138 e->address_start = PAGE_ALIGN(m->range_start);
1139 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
1140 e->prot = m->flags >> 1;
1141
1142 DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
1143 " range_start: %016llx range_end: %016llx flags: %x\n", s,
1144 PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
1145 PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
1146 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
1147 e->address_start, e->address_end, m->flags);
1148
1149 list_add_tail(&e->list, &amd_iommu_unity_map);
1150
1151 return 0;
1152}
1153
1154/* iterates over all memory definitions we find in the ACPI table */
1155static int __init init_memory_definitions(struct acpi_table_header *table)
1156{
1157 u8 *p = (u8 *)table, *end = (u8 *)table;
1158 struct ivmd_header *m;
1159
1160 end += table->length;
1161 p += IVRS_HEADER_LENGTH;
1162
1163 while (p < end) {
1164 m = (struct ivmd_header *)p;
1165 if (m->flags & IVMD_FLAG_EXCL_RANGE)
1166 init_exclusion_range(m);
1167 else if (m->flags & IVMD_FLAG_UNITY_MAP)
1168 init_unity_map_range(m);
1169
1170 p += m->length;
1171 }
1172
1173 return 0;
1174}
1175
1176/*
1177 * Init the device table to not allow DMA access for devices and
1178 * suppress all page faults
1179 */
1180static void init_device_table(void)
1181{
1182 u32 devid;
1183
1184 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
1185 set_dev_entry_bit(devid, DEV_ENTRY_VALID);
1186 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
1187 }
1188}
1189
1190static void iommu_init_flags(struct amd_iommu *iommu)
1191{
1192 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
1193 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
1194 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
1195
1196 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
1197 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
1198 iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
1199
1200 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
1201 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
1202 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
1203
1204 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
1205 iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
1206 iommu_feature_disable(iommu, CONTROL_ISOC_EN);
1207
1208 /*
1209 * make IOMMU memory accesses cache coherent
1210 */
1211 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1212}
1213
1214static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1215{
1216 int i, j;
1217 u32 ioc_feature_control;
1218 struct pci_dev *pdev = NULL;
1219
1220 /* RD890 BIOSes may not have completely reconfigured the iommu */
1221 if (!is_rd890_iommu(iommu->dev))
1222 return;
1223
1224 /*
1225 * First, we need to ensure that the iommu is enabled. This is
1226 * controlled by a register in the northbridge
1227 */
1228 pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
1229
1230 if (!pdev)
1231 return;
1232
1233 /* Select Northbridge indirect register 0x75 and enable writing */
1234 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
1235 pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
1236
1237 /* Enable the iommu */
1238 if (!(ioc_feature_control & 0x1))
1239 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
1240
1241 pci_dev_put(pdev);
1242
1243 /* Restore the iommu BAR */
1244 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1245 iommu->stored_addr_lo);
1246 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
1247 iommu->stored_addr_hi);
1248
1249 /* Restore the l1 indirect regs for each of the 6 l1s */
1250 for (i = 0; i < 6; i++)
1251 for (j = 0; j < 0x12; j++)
1252 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
1253
1254 /* Restore the l2 indirect regs */
1255 for (i = 0; i < 0x83; i++)
1256 iommu_write_l2(iommu, i, iommu->stored_l2[i]);
1257
1258 /* Lock PCI setup registers */
1259 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1260 iommu->stored_addr_lo | 1);
1261}
1262
1263/*
1264 * This function finally enables all IOMMUs found in the system after
1265 * they have been initialized
1266 */
1267static void enable_iommus(void)
1268{
1269 struct amd_iommu *iommu;
1270
1271 for_each_iommu(iommu) {
1272 iommu_disable(iommu);
1273 iommu_init_flags(iommu);
1274 iommu_set_device_table(iommu);
1275 iommu_enable_command_buffer(iommu);
1276 iommu_enable_event_buffer(iommu);
1277 iommu_set_exclusion_range(iommu);
1278 iommu_init_msi(iommu);
1279 iommu_enable(iommu);
1280 iommu_flush_all_caches(iommu);
1281 }
1282}
1283
1284static void disable_iommus(void)
1285{
1286 struct amd_iommu *iommu;
1287
1288 for_each_iommu(iommu)
1289 iommu_disable(iommu);
1290}
1291
1292/*
1293 * Suspend/Resume support
1294 * disable suspend until real resume implemented
1295 */
1296
1297static void amd_iommu_resume(void)
1298{
1299 struct amd_iommu *iommu;
1300
1301 for_each_iommu(iommu)
1302 iommu_apply_resume_quirks(iommu);
1303
1304 /* re-load the hardware */
1305 enable_iommus();
1306
1307 /*
1308 * we have to flush after the IOMMUs are enabled because a
1309 * disabled IOMMU will never execute the commands we send
1310 */
1311 for_each_iommu(iommu)
1312 iommu_flush_all_caches(iommu);
1313}
1314
1315static int amd_iommu_suspend(void)
1316{
1317 /* disable IOMMUs to go out of the way for BIOS */
1318 disable_iommus();
1319
1320 return 0;
1321}
1322
1323static struct syscore_ops amd_iommu_syscore_ops = {
1324 .suspend = amd_iommu_suspend,
1325 .resume = amd_iommu_resume,
1326};
1327
1328/*
1329 * This is the core init function for AMD IOMMU hardware in the system.
1330 * This function is called from the generic x86 DMA layer initialization
1331 * code.
1332 *
1333 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
1334 * three times:
1335 *
1336 * 1 pass) Find the highest PCI device id the driver has to handle.
1337 * Upon this information the size of the data structures is
1338 * determined that needs to be allocated.
1339 *
1340 * 2 pass) Initialize the data structures just allocated with the
1341 * information in the ACPI table about available AMD IOMMUs
1342 * in the system. It also maps the PCI devices in the
1343 * system to specific IOMMUs
1344 *
1345 * 3 pass) After the basic data structures are allocated and
1346 * initialized we update them with information about memory
1347 * remapping requirements parsed out of the ACPI table in
1348 * this last pass.
1349 *
1350 * After that the hardware is initialized and ready to go. In the last
1351 * step we do some Linux specific things like registering the driver in
1352 * the dma_ops interface and initializing the suspend/resume support
1353 * functions. Finally it prints some information about AMD IOMMUs and
1354 * the driver state and enables the hardware.
1355 */
1356static int __init amd_iommu_init(void)
1357{
1358 int i, ret = 0;
1359
1360 /*
1361 * First parse ACPI tables to find the largest Bus/Dev/Func
1362 * we need to handle. Upon this information the shared data
1363 * structures for the IOMMUs in the system will be allocated
1364 */
1365 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
1366 return -ENODEV;
1367
1368 ret = amd_iommu_init_err;
1369 if (ret)
1370 goto out;
1371
1372 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
1373 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
1374 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
1375
1376 ret = -ENOMEM;
1377
1378 /* Device table - directly used by all IOMMUs */
1379 amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1380 get_order(dev_table_size));
1381 if (amd_iommu_dev_table == NULL)
1382 goto out;
1383
1384 /*
1385 * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
1386 * IOMMU see for that device
1387 */
1388 amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
1389 get_order(alias_table_size));
1390 if (amd_iommu_alias_table == NULL)
1391 goto free;
1392
1393 /* IOMMU rlookup table - find the IOMMU for a specific device */
1394 amd_iommu_rlookup_table = (void *)__get_free_pages(
1395 GFP_KERNEL | __GFP_ZERO,
1396 get_order(rlookup_table_size));
1397 if (amd_iommu_rlookup_table == NULL)
1398 goto free;
1399
1400 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
1401 GFP_KERNEL | __GFP_ZERO,
1402 get_order(MAX_DOMAIN_ID/8));
1403 if (amd_iommu_pd_alloc_bitmap == NULL)
1404 goto free;
1405
1406 /* init the device table */
1407 init_device_table();
1408
1409 /*
1410 * let all alias entries point to itself
1411 */
1412 for (i = 0; i <= amd_iommu_last_bdf; ++i)
1413 amd_iommu_alias_table[i] = i;
1414
1415 /*
1416 * never allocate domain 0 because its used as the non-allocated and
1417 * error value placeholder
1418 */
1419 amd_iommu_pd_alloc_bitmap[0] = 1;
1420
1421 spin_lock_init(&amd_iommu_pd_lock);
1422
1423 /*
1424 * now the data structures are allocated and basically initialized
1425 * start the real acpi table scan
1426 */
1427 ret = -ENODEV;
1428 if (acpi_table_parse("IVRS", init_iommu_all) != 0)
1429 goto free;
1430
1431 if (amd_iommu_init_err) {
1432 ret = amd_iommu_init_err;
1433 goto free;
1434 }
1435
1436 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
1437 goto free;
1438
1439 if (amd_iommu_init_err) {
1440 ret = amd_iommu_init_err;
1441 goto free;
1442 }
1443
1444 ret = amd_iommu_init_devices();
1445 if (ret)
1446 goto free;
1447
1448 enable_iommus();
1449
1450 if (iommu_pass_through)
1451 ret = amd_iommu_init_passthrough();
1452 else
1453 ret = amd_iommu_init_dma_ops();
1454
1455 if (ret)
1456 goto free_disable;
1457
1458 amd_iommu_init_api();
1459
1460 amd_iommu_init_notifier();
1461
1462 register_syscore_ops(&amd_iommu_syscore_ops);
1463
1464 if (iommu_pass_through)
1465 goto out;
1466
1467 if (amd_iommu_unmap_flush)
1468 printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
1469 else
1470 printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
1471
1472 x86_platform.iommu_shutdown = disable_iommus;
1473out:
1474 return ret;
1475
1476free_disable:
1477 disable_iommus();
1478
1479free:
1480 amd_iommu_uninit_devices();
1481
1482 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1483 get_order(MAX_DOMAIN_ID/8));
1484
1485 free_pages((unsigned long)amd_iommu_rlookup_table,
1486 get_order(rlookup_table_size));
1487
1488 free_pages((unsigned long)amd_iommu_alias_table,
1489 get_order(alias_table_size));
1490
1491 free_pages((unsigned long)amd_iommu_dev_table,
1492 get_order(dev_table_size));
1493
1494 free_iommu_all();
1495
1496 free_unity_maps();
1497
1498#ifdef CONFIG_GART_IOMMU
1499 /*
1500 * We failed to initialize the AMD IOMMU - try fallback to GART
1501 * if possible.
1502 */
1503 gart_iommu_init();
1504
1505#endif
1506
1507 goto out;
1508}
1509
1510/****************************************************************************
1511 *
1512 * Early detect code. This code runs at IOMMU detection time in the DMA
1513 * layer. It just looks if there is an IVRS ACPI table to detect AMD
1514 * IOMMUs
1515 *
1516 ****************************************************************************/
1517static int __init early_amd_iommu_detect(struct acpi_table_header *table)
1518{
1519 return 0;
1520}
1521
1522int __init amd_iommu_detect(void)
1523{
1524 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1525 return -ENODEV;
1526
1527 if (amd_iommu_disabled)
1528 return -ENODEV;
1529
1530 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
1531 iommu_detected = 1;
1532 amd_iommu_detected = 1;
1533 x86_init.iommu.iommu_init = amd_iommu_init;
1534
1535 /* Make sure ACS will be enabled */
1536 pci_request_acs();
1537 return 1;
1538 }
1539 return -ENODEV;
1540}
1541
1542/****************************************************************************
1543 *
1544 * Parsing functions for the AMD IOMMU specific kernel command line
1545 * options.
1546 *
1547 ****************************************************************************/
1548
1549static int __init parse_amd_iommu_dump(char *str)
1550{
1551 amd_iommu_dump = true;
1552
1553 return 1;
1554}
1555
1556static int __init parse_amd_iommu_options(char *str)
1557{
1558 for (; *str; ++str) {
1559 if (strncmp(str, "fullflush", 9) == 0)
1560 amd_iommu_unmap_flush = true;
1561 if (strncmp(str, "off", 3) == 0)
1562 amd_iommu_disabled = true;
1563 }
1564
1565 return 1;
1566}
1567
1568__setup("amd_iommu_dump", parse_amd_iommu_dump);
1569__setup("amd_iommu=", parse_amd_iommu_options);
1570
1571IOMMU_INIT_FINISH(amd_iommu_detect,
1572 gart_iommu_hole_init,
1573 0,
1574 0);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
new file mode 100644
index 000000000000..7ffaa64410b0
--- /dev/null
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -0,0 +1,54 @@
1/*
2 * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
20#define _ASM_X86_AMD_IOMMU_PROTO_H
21
22#include "amd_iommu_types.h"
23
24extern int amd_iommu_init_dma_ops(void);
25extern int amd_iommu_init_passthrough(void);
26extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
27extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
28extern void amd_iommu_apply_erratum_63(u16 devid);
29extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
30extern int amd_iommu_init_devices(void);
31extern void amd_iommu_uninit_devices(void);
32extern void amd_iommu_init_notifier(void);
33extern void amd_iommu_init_api(void);
34#ifndef CONFIG_AMD_IOMMU_STATS
35
36static inline void amd_iommu_stats_init(void) { }
37
38#endif /* !CONFIG_AMD_IOMMU_STATS */
39
40static inline bool is_rd890_iommu(struct pci_dev *pdev)
41{
42 return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
43 (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
44}
45
46static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
47{
48 if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
49 return false;
50
51 return !!(iommu->features & f);
52}
53
54#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
new file mode 100644
index 000000000000..5b9c5075e81a
--- /dev/null
+++ b/drivers/iommu/amd_iommu_types.h
@@ -0,0 +1,585 @@
1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#ifndef _ASM_X86_AMD_IOMMU_TYPES_H
21#define _ASM_X86_AMD_IOMMU_TYPES_H
22
23#include <linux/types.h>
24#include <linux/mutex.h>
25#include <linux/list.h>
26#include <linux/spinlock.h>
27
28/*
29 * Maximum number of IOMMUs supported
30 */
31#define MAX_IOMMUS 32
32
33/*
34 * some size calculation constants
35 */
36#define DEV_TABLE_ENTRY_SIZE 32
37#define ALIAS_TABLE_ENTRY_SIZE 2
38#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
39
40/* Length of the MMIO region for the AMD IOMMU */
41#define MMIO_REGION_LENGTH 0x4000
42
43/* Capability offsets used by the driver */
44#define MMIO_CAP_HDR_OFFSET 0x00
45#define MMIO_RANGE_OFFSET 0x0c
46#define MMIO_MISC_OFFSET 0x10
47
48/* Masks, shifts and macros to parse the device range capability */
49#define MMIO_RANGE_LD_MASK 0xff000000
50#define MMIO_RANGE_FD_MASK 0x00ff0000
51#define MMIO_RANGE_BUS_MASK 0x0000ff00
52#define MMIO_RANGE_LD_SHIFT 24
53#define MMIO_RANGE_FD_SHIFT 16
54#define MMIO_RANGE_BUS_SHIFT 8
55#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT)
56#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT)
57#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
58#define MMIO_MSI_NUM(x) ((x) & 0x1f)
59
60/* Flag masks for the AMD IOMMU exclusion range */
61#define MMIO_EXCL_ENABLE_MASK 0x01ULL
62#define MMIO_EXCL_ALLOW_MASK 0x02ULL
63
64/* Used offsets into the MMIO space */
65#define MMIO_DEV_TABLE_OFFSET 0x0000
66#define MMIO_CMD_BUF_OFFSET 0x0008
67#define MMIO_EVT_BUF_OFFSET 0x0010
68#define MMIO_CONTROL_OFFSET 0x0018
69#define MMIO_EXCL_BASE_OFFSET 0x0020
70#define MMIO_EXCL_LIMIT_OFFSET 0x0028
71#define MMIO_EXT_FEATURES 0x0030
72#define MMIO_CMD_HEAD_OFFSET 0x2000
73#define MMIO_CMD_TAIL_OFFSET 0x2008
74#define MMIO_EVT_HEAD_OFFSET 0x2010
75#define MMIO_EVT_TAIL_OFFSET 0x2018
76#define MMIO_STATUS_OFFSET 0x2020
77
78
79/* Extended Feature Bits */
80#define FEATURE_PREFETCH (1ULL<<0)
81#define FEATURE_PPR (1ULL<<1)
82#define FEATURE_X2APIC (1ULL<<2)
83#define FEATURE_NX (1ULL<<3)
84#define FEATURE_GT (1ULL<<4)
85#define FEATURE_IA (1ULL<<6)
86#define FEATURE_GA (1ULL<<7)
87#define FEATURE_HE (1ULL<<8)
88#define FEATURE_PC (1ULL<<9)
89
90/* MMIO status bits */
91#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
92
93/* event logging constants */
94#define EVENT_ENTRY_SIZE 0x10
95#define EVENT_TYPE_SHIFT 28
96#define EVENT_TYPE_MASK 0xf
97#define EVENT_TYPE_ILL_DEV 0x1
98#define EVENT_TYPE_IO_FAULT 0x2
99#define EVENT_TYPE_DEV_TAB_ERR 0x3
100#define EVENT_TYPE_PAGE_TAB_ERR 0x4
101#define EVENT_TYPE_ILL_CMD 0x5
102#define EVENT_TYPE_CMD_HARD_ERR 0x6
103#define EVENT_TYPE_IOTLB_INV_TO 0x7
104#define EVENT_TYPE_INV_DEV_REQ 0x8
105#define EVENT_DEVID_MASK 0xffff
106#define EVENT_DEVID_SHIFT 0
107#define EVENT_DOMID_MASK 0xffff
108#define EVENT_DOMID_SHIFT 0
109#define EVENT_FLAGS_MASK 0xfff
110#define EVENT_FLAGS_SHIFT 0x10
111
112/* feature control bits */
113#define CONTROL_IOMMU_EN 0x00ULL
114#define CONTROL_HT_TUN_EN 0x01ULL
115#define CONTROL_EVT_LOG_EN 0x02ULL
116#define CONTROL_EVT_INT_EN 0x03ULL
117#define CONTROL_COMWAIT_EN 0x04ULL
118#define CONTROL_PASSPW_EN 0x08ULL
119#define CONTROL_RESPASSPW_EN 0x09ULL
120#define CONTROL_COHERENT_EN 0x0aULL
121#define CONTROL_ISOC_EN 0x0bULL
122#define CONTROL_CMDBUF_EN 0x0cULL
123#define CONTROL_PPFLOG_EN 0x0dULL
124#define CONTROL_PPFINT_EN 0x0eULL
125
126/* command specific defines */
127#define CMD_COMPL_WAIT 0x01
128#define CMD_INV_DEV_ENTRY 0x02
129#define CMD_INV_IOMMU_PAGES 0x03
130#define CMD_INV_IOTLB_PAGES 0x04
131#define CMD_INV_ALL 0x08
132
133#define CMD_COMPL_WAIT_STORE_MASK 0x01
134#define CMD_COMPL_WAIT_INT_MASK 0x02
135#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
136#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
137
138#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
139
140/* macros and definitions for device table entries */
141#define DEV_ENTRY_VALID 0x00
142#define DEV_ENTRY_TRANSLATION 0x01
143#define DEV_ENTRY_IR 0x3d
144#define DEV_ENTRY_IW 0x3e
145#define DEV_ENTRY_NO_PAGE_FAULT 0x62
146#define DEV_ENTRY_EX 0x67
147#define DEV_ENTRY_SYSMGT1 0x68
148#define DEV_ENTRY_SYSMGT2 0x69
149#define DEV_ENTRY_INIT_PASS 0xb8
150#define DEV_ENTRY_EINT_PASS 0xb9
151#define DEV_ENTRY_NMI_PASS 0xba
152#define DEV_ENTRY_LINT0_PASS 0xbe
153#define DEV_ENTRY_LINT1_PASS 0xbf
154#define DEV_ENTRY_MODE_MASK 0x07
155#define DEV_ENTRY_MODE_SHIFT 0x09
156
157/* constants to configure the command buffer */
158#define CMD_BUFFER_SIZE 8192
159#define CMD_BUFFER_UNINITIALIZED 1
160#define CMD_BUFFER_ENTRIES 512
161#define MMIO_CMD_SIZE_SHIFT 56
162#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
163
164/* constants for event buffer handling */
165#define EVT_BUFFER_SIZE 8192 /* 512 entries */
166#define EVT_LEN_MASK (0x9ULL << 56)
167
168#define PAGE_MODE_NONE 0x00
169#define PAGE_MODE_1_LEVEL 0x01
170#define PAGE_MODE_2_LEVEL 0x02
171#define PAGE_MODE_3_LEVEL 0x03
172#define PAGE_MODE_4_LEVEL 0x04
173#define PAGE_MODE_5_LEVEL 0x05
174#define PAGE_MODE_6_LEVEL 0x06
175
176#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
177#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
178 ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
179 (0xffffffffffffffffULL))
180#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
181#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL)
182#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
183 IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
184#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL)
185
186#define PM_MAP_4k 0
187#define PM_ADDR_MASK 0x000ffffffffff000ULL
188#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \
189 (~((1ULL << (12 + ((lvl) * 9))) - 1)))
190#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr))
191
192/*
193 * Returns the page table level to use for a given page size
194 * Pagesize is expected to be a power-of-two
195 */
196#define PAGE_SIZE_LEVEL(pagesize) \
197 ((__ffs(pagesize) - 12) / 9)
198/*
199 * Returns the number of ptes to use for a given page size
200 * Pagesize is expected to be a power-of-two
201 */
202#define PAGE_SIZE_PTE_COUNT(pagesize) \
203 (1ULL << ((__ffs(pagesize) - 12) % 9))
204
205/*
206 * Aligns a given io-virtual address to a given page size
207 * Pagesize is expected to be a power-of-two
208 */
209#define PAGE_SIZE_ALIGN(address, pagesize) \
210 ((address) & ~((pagesize) - 1))
211/*
212 * Creates an IOMMU PTE for an address an a given pagesize
213 * The PTE has no permission bits set
214 * Pagesize is expected to be a power-of-two larger than 4096
215 */
216#define PAGE_SIZE_PTE(address, pagesize) \
217 (((address) | ((pagesize) - 1)) & \
218 (~(pagesize >> 1)) & PM_ADDR_MASK)
219
220/*
221 * Takes a PTE value with mode=0x07 and returns the page size it maps
222 */
223#define PTE_PAGE_SIZE(pte) \
224 (1ULL << (1 + ffz(((pte) | 0xfffULL))))
225
226#define IOMMU_PTE_P (1ULL << 0)
227#define IOMMU_PTE_TV (1ULL << 1)
228#define IOMMU_PTE_U (1ULL << 59)
229#define IOMMU_PTE_FC (1ULL << 60)
230#define IOMMU_PTE_IR (1ULL << 61)
231#define IOMMU_PTE_IW (1ULL << 62)
232
233#define DTE_FLAG_IOTLB 0x01
234
235#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
236#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
237#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
238#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
239
240#define IOMMU_PROT_MASK 0x03
241#define IOMMU_PROT_IR 0x01
242#define IOMMU_PROT_IW 0x02
243
244/* IOMMU capabilities */
245#define IOMMU_CAP_IOTLB 24
246#define IOMMU_CAP_NPCACHE 26
247#define IOMMU_CAP_EFR 27
248
249#define MAX_DOMAIN_ID 65536
250
251/* FIXME: move this macro to <linux/pci.h> */
252#define PCI_BUS(x) (((x) >> 8) & 0xff)
253
254/* Protection domain flags */
255#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
256#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
257 domain for an IOMMU */
258#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
259 translation */
260
261extern bool amd_iommu_dump;
262#define DUMP_printk(format, arg...) \
263 do { \
264 if (amd_iommu_dump) \
265 printk(KERN_INFO "AMD-Vi: " format, ## arg); \
266 } while(0);
267
268/* global flag if IOMMUs cache non-present entries */
269extern bool amd_iommu_np_cache;
270/* Only true if all IOMMUs support device IOTLBs */
271extern bool amd_iommu_iotlb_sup;
272
273/*
274 * Make iterating over all IOMMUs easier
275 */
276#define for_each_iommu(iommu) \
277 list_for_each_entry((iommu), &amd_iommu_list, list)
278#define for_each_iommu_safe(iommu, next) \
279 list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
280
281#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
282#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
283#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
284#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
285#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
286#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
287
288/*
289 * This structure contains generic data for IOMMU protection domains
290 * independent of their use.
291 */
292struct protection_domain {
293 struct list_head list; /* for list of all protection domains */
294 struct list_head dev_list; /* List of all devices in this domain */
295 spinlock_t lock; /* mostly used to lock the page table*/
296 struct mutex api_lock; /* protect page tables in the iommu-api path */
297 u16 id; /* the domain id written to the device table */
298 int mode; /* paging mode (0-6 levels) */
299 u64 *pt_root; /* page table root pointer */
300 unsigned long flags; /* flags to find out type of domain */
301 bool updated; /* complete domain flush required */
302 unsigned dev_cnt; /* devices assigned to this domain */
303 unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
304 void *priv; /* private data */
305
306};
307
308/*
309 * This struct contains device specific data for the IOMMU
310 */
311struct iommu_dev_data {
312 struct list_head list; /* For domain->dev_list */
313 struct list_head dev_data_list; /* For global dev_data_list */
314 struct iommu_dev_data *alias_data;/* The alias dev_data */
315 struct protection_domain *domain; /* Domain the device is bound to */
316 atomic_t bind; /* Domain attach reverent count */
317 u16 devid; /* PCI Device ID */
318 struct {
319 bool enabled;
320 int qdep;
321 } ats; /* ATS state */
322};
323
324/*
325 * For dynamic growth the aperture size is split into ranges of 128MB of
326 * DMA address space each. This struct represents one such range.
327 */
328struct aperture_range {
329
330 /* address allocation bitmap */
331 unsigned long *bitmap;
332
333 /*
334 * Array of PTE pages for the aperture. In this array we save all the
335 * leaf pages of the domain page table used for the aperture. This way
336 * we don't need to walk the page table to find a specific PTE. We can
337 * just calculate its address in constant time.
338 */
339 u64 *pte_pages[64];
340
341 unsigned long offset;
342};
343
344/*
345 * Data container for a dma_ops specific protection domain
346 */
347struct dma_ops_domain {
348 struct list_head list;
349
350 /* generic protection domain information */
351 struct protection_domain domain;
352
353 /* size of the aperture for the mappings */
354 unsigned long aperture_size;
355
356 /* address we start to search for free addresses */
357 unsigned long next_address;
358
359 /* address space relevant data */
360 struct aperture_range *aperture[APERTURE_MAX_RANGES];
361
362 /* This will be set to true when TLB needs to be flushed */
363 bool need_flush;
364
365 /*
366 * if this is a preallocated domain, keep the device for which it was
367 * preallocated in this variable
368 */
369 u16 target_dev;
370};
371
372/*
373 * Structure where we save information about one hardware AMD IOMMU in the
374 * system.
375 */
376struct amd_iommu {
377 struct list_head list;
378
379 /* Index within the IOMMU array */
380 int index;
381
382 /* locks the accesses to the hardware */
383 spinlock_t lock;
384
385 /* Pointer to PCI device of this IOMMU */
386 struct pci_dev *dev;
387
388 /* physical address of MMIO space */
389 u64 mmio_phys;
390 /* virtual address of MMIO space */
391 u8 *mmio_base;
392
393 /* capabilities of that IOMMU read from ACPI */
394 u32 cap;
395
396 /* flags read from acpi table */
397 u8 acpi_flags;
398
399 /* Extended features */
400 u64 features;
401
402 /*
403 * Capability pointer. There could be more than one IOMMU per PCI
404 * device function if there are more than one AMD IOMMU capability
405 * pointers.
406 */
407 u16 cap_ptr;
408
409 /* pci domain of this IOMMU */
410 u16 pci_seg;
411
412 /* first device this IOMMU handles. read from PCI */
413 u16 first_device;
414 /* last device this IOMMU handles. read from PCI */
415 u16 last_device;
416
417 /* start of exclusion range of that IOMMU */
418 u64 exclusion_start;
419 /* length of exclusion range of that IOMMU */
420 u64 exclusion_length;
421
422 /* command buffer virtual address */
423 u8 *cmd_buf;
424 /* size of command buffer */
425 u32 cmd_buf_size;
426
427 /* size of event buffer */
428 u32 evt_buf_size;
429 /* event buffer virtual address */
430 u8 *evt_buf;
431 /* MSI number for event interrupt */
432 u16 evt_msi_num;
433
434 /* true if interrupts for this IOMMU are already enabled */
435 bool int_enabled;
436
437 /* if one, we need to send a completion wait command */
438 bool need_sync;
439
440 /* default dma_ops domain for that IOMMU */
441 struct dma_ops_domain *default_dom;
442
443 /*
444 * We can't rely on the BIOS to restore all values on reinit, so we
445 * need to stash them
446 */
447
448 /* The iommu BAR */
449 u32 stored_addr_lo;
450 u32 stored_addr_hi;
451
452 /*
453 * Each iommu has 6 l1s, each of which is documented as having 0x12
454 * registers
455 */
456 u32 stored_l1[6][0x12];
457
458 /* The l2 indirect registers */
459 u32 stored_l2[0x83];
460};
461
462/*
463 * List with all IOMMUs in the system. This list is not locked because it is
464 * only written and read at driver initialization or suspend time
465 */
466extern struct list_head amd_iommu_list;
467
468/*
469 * Array with pointers to each IOMMU struct
470 * The indices are referenced in the protection domains
471 */
472extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
473
474/* Number of IOMMUs present in the system */
475extern int amd_iommus_present;
476
477/*
478 * Declarations for the global list of all protection domains
479 */
480extern spinlock_t amd_iommu_pd_lock;
481extern struct list_head amd_iommu_pd_list;
482
483/*
484 * Structure defining one entry in the device table
485 */
486struct dev_table_entry {
487 u32 data[8];
488};
489
490/*
491 * One entry for unity mappings parsed out of the ACPI table.
492 */
493struct unity_map_entry {
494 struct list_head list;
495
496 /* starting device id this entry is used for (including) */
497 u16 devid_start;
498 /* end device id this entry is used for (including) */
499 u16 devid_end;
500
501 /* start address to unity map (including) */
502 u64 address_start;
503 /* end address to unity map (including) */
504 u64 address_end;
505
506 /* required protection */
507 int prot;
508};
509
510/*
511 * List of all unity mappings. It is not locked because as runtime it is only
512 * read. It is created at ACPI table parsing time.
513 */
514extern struct list_head amd_iommu_unity_map;
515
516/*
517 * Data structures for device handling
518 */
519
520/*
521 * Device table used by hardware. Read and write accesses by software are
522 * locked with the amd_iommu_pd_table lock.
523 */
524extern struct dev_table_entry *amd_iommu_dev_table;
525
526/*
527 * Alias table to find requestor ids to device ids. Not locked because only
528 * read on runtime.
529 */
530extern u16 *amd_iommu_alias_table;
531
532/*
533 * Reverse lookup table to find the IOMMU which translates a specific device.
534 */
535extern struct amd_iommu **amd_iommu_rlookup_table;
536
537/* size of the dma_ops aperture as power of 2 */
538extern unsigned amd_iommu_aperture_order;
539
540/* largest PCI device id we expect translation requests for */
541extern u16 amd_iommu_last_bdf;
542
543/* allocation bitmap for domain ids */
544extern unsigned long *amd_iommu_pd_alloc_bitmap;
545
546/*
547 * If true, the addresses will be flushed on unmap time, not when
548 * they are reused
549 */
550extern bool amd_iommu_unmap_flush;
551
552/* takes bus and device/function and returns the device id
553 * FIXME: should that be in generic PCI code? */
554static inline u16 calc_devid(u8 bus, u8 devfn)
555{
556 return (((u16)bus) << 8) | devfn;
557}
558
559#ifdef CONFIG_AMD_IOMMU_STATS
560
561struct __iommu_counter {
562 char *name;
563 struct dentry *dent;
564 u64 value;
565};
566
567#define DECLARE_STATS_COUNTER(nm) \
568 static struct __iommu_counter nm = { \
569 .name = #nm, \
570 }
571
572#define INC_STATS_COUNTER(name) name.value += 1
573#define ADD_STATS_COUNTER(name, x) name.value += (x)
574#define SUB_STATS_COUNTER(name, x) name.value -= (x)
575
576#else /* CONFIG_AMD_IOMMU_STATS */
577
578#define DECLARE_STATS_COUNTER(name)
579#define INC_STATS_COUNTER(name)
580#define ADD_STATS_COUNTER(name, x)
581#define SUB_STATS_COUNTER(name, x)
582
583#endif /* CONFIG_AMD_IOMMU_STATS */
584
585#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/drivers/pci/dmar.c b/drivers/iommu/dmar.c
index 3dc9befa5aec..3dc9befa5aec 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/iommu/dmar.c
diff --git a/drivers/pci/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f02c34d26d1b..c621c98c99da 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -42,7 +42,6 @@
42#include <linux/pci-ats.h> 42#include <linux/pci-ats.h>
43#include <asm/cacheflush.h> 43#include <asm/cacheflush.h>
44#include <asm/iommu.h> 44#include <asm/iommu.h>
45#include "pci.h"
46 45
47#define ROOT_SIZE VTD_PAGE_SIZE 46#define ROOT_SIZE VTD_PAGE_SIZE
48#define CONTEXT_SIZE VTD_PAGE_SIZE 47#define CONTEXT_SIZE VTD_PAGE_SIZE
diff --git a/drivers/pci/intr_remapping.c b/drivers/iommu/intr_remapping.c
index 3607faf28a4d..1a89d4a2cadf 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/iommu/intr_remapping.c
@@ -13,7 +13,6 @@
13#include "intr_remapping.h" 13#include "intr_remapping.h"
14#include <acpi/acpi.h> 14#include <acpi/acpi.h>
15#include <asm/pci-direct.h> 15#include <asm/pci-direct.h>
16#include "pci.h"
17 16
18static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; 17static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
19static struct hpet_scope ir_hpet[MAX_HPET_TBS]; 18static struct hpet_scope ir_hpet[MAX_HPET_TBS];
diff --git a/drivers/pci/intr_remapping.h b/drivers/iommu/intr_remapping.h
index 5662fecfee60..5662fecfee60 100644
--- a/drivers/pci/intr_remapping.h
+++ b/drivers/iommu/intr_remapping.h
diff --git a/drivers/base/iommu.c b/drivers/iommu/iommu.c
index 6e6b6a11b3ce..6e6b6a11b3ce 100644
--- a/drivers/base/iommu.c
+++ b/drivers/iommu/iommu.c
diff --git a/drivers/pci/iova.c b/drivers/iommu/iova.c
index c5c274ab5c5a..c5c274ab5c5a 100644
--- a/drivers/pci/iova.c
+++ b/drivers/iommu/iova.c
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
new file mode 100644
index 000000000000..1a584e077c61
--- /dev/null
+++ b/drivers/iommu/msm_iommu.c
@@ -0,0 +1,731 @@
1/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/platform_device.h>
22#include <linux/errno.h>
23#include <linux/io.h>
24#include <linux/interrupt.h>
25#include <linux/list.h>
26#include <linux/spinlock.h>
27#include <linux/slab.h>
28#include <linux/iommu.h>
29#include <linux/clk.h>
30
31#include <asm/cacheflush.h>
32#include <asm/sizes.h>
33
34#include <mach/iommu_hw-8xxx.h>
35#include <mach/iommu.h>
36
37#define MRC(reg, processor, op1, crn, crm, op2) \
38__asm__ __volatile__ ( \
39" mrc " #processor "," #op1 ", %0," #crn "," #crm "," #op2 "\n" \
40: "=r" (reg))
41
42#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0)
43#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1)
44
45static int msm_iommu_tex_class[4];
46
47DEFINE_SPINLOCK(msm_iommu_lock);
48
49struct msm_priv {
50 unsigned long *pgtable;
51 struct list_head list_attached;
52};
53
54static int __enable_clocks(struct msm_iommu_drvdata *drvdata)
55{
56 int ret;
57
58 ret = clk_enable(drvdata->pclk);
59 if (ret)
60 goto fail;
61
62 if (drvdata->clk) {
63 ret = clk_enable(drvdata->clk);
64 if (ret)
65 clk_disable(drvdata->pclk);
66 }
67fail:
68 return ret;
69}
70
71static void __disable_clocks(struct msm_iommu_drvdata *drvdata)
72{
73 if (drvdata->clk)
74 clk_disable(drvdata->clk);
75 clk_disable(drvdata->pclk);
76}
77
78static int __flush_iotlb(struct iommu_domain *domain)
79{
80 struct msm_priv *priv = domain->priv;
81 struct msm_iommu_drvdata *iommu_drvdata;
82 struct msm_iommu_ctx_drvdata *ctx_drvdata;
83 int ret = 0;
84#ifndef CONFIG_IOMMU_PGTABLES_L2
85 unsigned long *fl_table = priv->pgtable;
86 int i;
87
88 if (!list_empty(&priv->list_attached)) {
89 dmac_flush_range(fl_table, fl_table + SZ_16K);
90
91 for (i = 0; i < NUM_FL_PTE; i++)
92 if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
93 void *sl_table = __va(fl_table[i] &
94 FL_BASE_MASK);
95 dmac_flush_range(sl_table, sl_table + SZ_4K);
96 }
97 }
98#endif
99
100 list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
101 if (!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent)
102 BUG();
103
104 iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
105 BUG_ON(!iommu_drvdata);
106
107 ret = __enable_clocks(iommu_drvdata);
108 if (ret)
109 goto fail;
110
111 SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0);
112 __disable_clocks(iommu_drvdata);
113 }
114fail:
115 return ret;
116}
117
118static void __reset_context(void __iomem *base, int ctx)
119{
120 SET_BPRCOSH(base, ctx, 0);
121 SET_BPRCISH(base, ctx, 0);
122 SET_BPRCNSH(base, ctx, 0);
123 SET_BPSHCFG(base, ctx, 0);
124 SET_BPMTCFG(base, ctx, 0);
125 SET_ACTLR(base, ctx, 0);
126 SET_SCTLR(base, ctx, 0);
127 SET_FSRRESTORE(base, ctx, 0);
128 SET_TTBR0(base, ctx, 0);
129 SET_TTBR1(base, ctx, 0);
130 SET_TTBCR(base, ctx, 0);
131 SET_BFBCR(base, ctx, 0);
132 SET_PAR(base, ctx, 0);
133 SET_FAR(base, ctx, 0);
134 SET_CTX_TLBIALL(base, ctx, 0);
135 SET_TLBFLPTER(base, ctx, 0);
136 SET_TLBSLPTER(base, ctx, 0);
137 SET_TLBLKCR(base, ctx, 0);
138 SET_PRRR(base, ctx, 0);
139 SET_NMRR(base, ctx, 0);
140}
141
142static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
143{
144 unsigned int prrr, nmrr;
145 __reset_context(base, ctx);
146
147 /* Set up HTW mode */
148 /* TLB miss configuration: perform HTW on miss */
149 SET_TLBMCFG(base, ctx, 0x3);
150
151 /* V2P configuration: HTW for access */
152 SET_V2PCFG(base, ctx, 0x3);
153
154 SET_TTBCR(base, ctx, 0);
155 SET_TTBR0_PA(base, ctx, (pgtable >> 14));
156
157 /* Invalidate the TLB for this context */
158 SET_CTX_TLBIALL(base, ctx, 0);
159
160 /* Set interrupt number to "secure" interrupt */
161 SET_IRPTNDX(base, ctx, 0);
162
163 /* Enable context fault interrupt */
164 SET_CFEIE(base, ctx, 1);
165
166 /* Stall access on a context fault and let the handler deal with it */
167 SET_CFCFG(base, ctx, 1);
168
169 /* Redirect all cacheable requests to L2 slave port. */
170 SET_RCISH(base, ctx, 1);
171 SET_RCOSH(base, ctx, 1);
172 SET_RCNSH(base, ctx, 1);
173
174 /* Turn on TEX Remap */
175 SET_TRE(base, ctx, 1);
176
177 /* Set TEX remap attributes */
178 RCP15_PRRR(prrr);
179 RCP15_NMRR(nmrr);
180 SET_PRRR(base, ctx, prrr);
181 SET_NMRR(base, ctx, nmrr);
182
183 /* Turn on BFB prefetch */
184 SET_BFBDFE(base, ctx, 1);
185
186#ifdef CONFIG_IOMMU_PGTABLES_L2
187 /* Configure page tables as inner-cacheable and shareable to reduce
188 * the TLB miss penalty.
189 */
190 SET_TTBR0_SH(base, ctx, 1);
191 SET_TTBR1_SH(base, ctx, 1);
192
193 SET_TTBR0_NOS(base, ctx, 1);
194 SET_TTBR1_NOS(base, ctx, 1);
195
196 SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */
197 SET_TTBR0_IRGNL(base, ctx, 1);
198
199 SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */
200 SET_TTBR1_IRGNL(base, ctx, 1);
201
202 SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */
203 SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */
204#endif
205
206 /* Enable the MMU */
207 SET_M(base, ctx, 1);
208}
209
210static int msm_iommu_domain_init(struct iommu_domain *domain)
211{
212 struct msm_priv *priv = kzalloc(sizeof(*priv), GFP_KERNEL);
213
214 if (!priv)
215 goto fail_nomem;
216
217 INIT_LIST_HEAD(&priv->list_attached);
218 priv->pgtable = (unsigned long *)__get_free_pages(GFP_KERNEL,
219 get_order(SZ_16K));
220
221 if (!priv->pgtable)
222 goto fail_nomem;
223
224 memset(priv->pgtable, 0, SZ_16K);
225 domain->priv = priv;
226 return 0;
227
228fail_nomem:
229 kfree(priv);
230 return -ENOMEM;
231}
232
233static void msm_iommu_domain_destroy(struct iommu_domain *domain)
234{
235 struct msm_priv *priv;
236 unsigned long flags;
237 unsigned long *fl_table;
238 int i;
239
240 spin_lock_irqsave(&msm_iommu_lock, flags);
241 priv = domain->priv;
242 domain->priv = NULL;
243
244 if (priv) {
245 fl_table = priv->pgtable;
246
247 for (i = 0; i < NUM_FL_PTE; i++)
248 if ((fl_table[i] & 0x03) == FL_TYPE_TABLE)
249 free_page((unsigned long) __va(((fl_table[i]) &
250 FL_BASE_MASK)));
251
252 free_pages((unsigned long)priv->pgtable, get_order(SZ_16K));
253 priv->pgtable = NULL;
254 }
255
256 kfree(priv);
257 spin_unlock_irqrestore(&msm_iommu_lock, flags);
258}
259
260static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
261{
262 struct msm_priv *priv;
263 struct msm_iommu_ctx_dev *ctx_dev;
264 struct msm_iommu_drvdata *iommu_drvdata;
265 struct msm_iommu_ctx_drvdata *ctx_drvdata;
266 struct msm_iommu_ctx_drvdata *tmp_drvdata;
267 int ret = 0;
268 unsigned long flags;
269
270 spin_lock_irqsave(&msm_iommu_lock, flags);
271
272 priv = domain->priv;
273
274 if (!priv || !dev) {
275 ret = -EINVAL;
276 goto fail;
277 }
278
279 iommu_drvdata = dev_get_drvdata(dev->parent);
280 ctx_drvdata = dev_get_drvdata(dev);
281 ctx_dev = dev->platform_data;
282
283 if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) {
284 ret = -EINVAL;
285 goto fail;
286 }
287
288 if (!list_empty(&ctx_drvdata->attached_elm)) {
289 ret = -EBUSY;
290 goto fail;
291 }
292
293 list_for_each_entry(tmp_drvdata, &priv->list_attached, attached_elm)
294 if (tmp_drvdata == ctx_drvdata) {
295 ret = -EBUSY;
296 goto fail;
297 }
298
299 ret = __enable_clocks(iommu_drvdata);
300 if (ret)
301 goto fail;
302
303 __program_context(iommu_drvdata->base, ctx_dev->num,
304 __pa(priv->pgtable));
305
306 __disable_clocks(iommu_drvdata);
307 list_add(&(ctx_drvdata->attached_elm), &priv->list_attached);
308 ret = __flush_iotlb(domain);
309
310fail:
311 spin_unlock_irqrestore(&msm_iommu_lock, flags);
312 return ret;
313}
314
315static void msm_iommu_detach_dev(struct iommu_domain *domain,
316 struct device *dev)
317{
318 struct msm_priv *priv;
319 struct msm_iommu_ctx_dev *ctx_dev;
320 struct msm_iommu_drvdata *iommu_drvdata;
321 struct msm_iommu_ctx_drvdata *ctx_drvdata;
322 unsigned long flags;
323 int ret;
324
325 spin_lock_irqsave(&msm_iommu_lock, flags);
326 priv = domain->priv;
327
328 if (!priv || !dev)
329 goto fail;
330
331 iommu_drvdata = dev_get_drvdata(dev->parent);
332 ctx_drvdata = dev_get_drvdata(dev);
333 ctx_dev = dev->platform_data;
334
335 if (!iommu_drvdata || !ctx_drvdata || !ctx_dev)
336 goto fail;
337
338 ret = __flush_iotlb(domain);
339 if (ret)
340 goto fail;
341
342 ret = __enable_clocks(iommu_drvdata);
343 if (ret)
344 goto fail;
345
346 __reset_context(iommu_drvdata->base, ctx_dev->num);
347 __disable_clocks(iommu_drvdata);
348 list_del_init(&ctx_drvdata->attached_elm);
349
350fail:
351 spin_unlock_irqrestore(&msm_iommu_lock, flags);
352}
353
354static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
355 phys_addr_t pa, int order, int prot)
356{
357 struct msm_priv *priv;
358 unsigned long flags;
359 unsigned long *fl_table;
360 unsigned long *fl_pte;
361 unsigned long fl_offset;
362 unsigned long *sl_table;
363 unsigned long *sl_pte;
364 unsigned long sl_offset;
365 unsigned int pgprot;
366 size_t len = 0x1000UL << order;
367 int ret = 0, tex, sh;
368
369 spin_lock_irqsave(&msm_iommu_lock, flags);
370
371 sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0;
372 tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK];
373
374 if (tex < 0 || tex > NUM_TEX_CLASS - 1) {
375 ret = -EINVAL;
376 goto fail;
377 }
378
379 priv = domain->priv;
380 if (!priv) {
381 ret = -EINVAL;
382 goto fail;
383 }
384
385 fl_table = priv->pgtable;
386
387 if (len != SZ_16M && len != SZ_1M &&
388 len != SZ_64K && len != SZ_4K) {
389 pr_debug("Bad size: %d\n", len);
390 ret = -EINVAL;
391 goto fail;
392 }
393
394 if (!fl_table) {
395 pr_debug("Null page table\n");
396 ret = -EINVAL;
397 goto fail;
398 }
399
400 if (len == SZ_16M || len == SZ_1M) {
401 pgprot = sh ? FL_SHARED : 0;
402 pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
403 pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
404 pgprot |= tex & 0x04 ? FL_TEX0 : 0;
405 } else {
406 pgprot = sh ? SL_SHARED : 0;
407 pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
408 pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
409 pgprot |= tex & 0x04 ? SL_TEX0 : 0;
410 }
411
412 fl_offset = FL_OFFSET(va); /* Upper 12 bits */
413 fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */
414
415 if (len == SZ_16M) {
416 int i = 0;
417 for (i = 0; i < 16; i++)
418 *(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
419 FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
420 FL_SHARED | FL_NG | pgprot;
421 }
422
423 if (len == SZ_1M)
424 *fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG |
425 FL_TYPE_SECT | FL_SHARED | pgprot;
426
427 /* Need a 2nd level table */
428 if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) {
429 unsigned long *sl;
430 sl = (unsigned long *) __get_free_pages(GFP_ATOMIC,
431 get_order(SZ_4K));
432
433 if (!sl) {
434 pr_debug("Could not allocate second level table\n");
435 ret = -ENOMEM;
436 goto fail;
437 }
438
439 memset(sl, 0, SZ_4K);
440 *fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | FL_TYPE_TABLE);
441 }
442
443 sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
444 sl_offset = SL_OFFSET(va);
445 sl_pte = sl_table + sl_offset;
446
447
448 if (len == SZ_4K)
449 *sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG |
450 SL_SHARED | SL_TYPE_SMALL | pgprot;
451
452 if (len == SZ_64K) {
453 int i;
454
455 for (i = 0; i < 16; i++)
456 *(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
457 SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
458 }
459
460 ret = __flush_iotlb(domain);
461fail:
462 spin_unlock_irqrestore(&msm_iommu_lock, flags);
463 return ret;
464}
465
466static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
467 int order)
468{
469 struct msm_priv *priv;
470 unsigned long flags;
471 unsigned long *fl_table;
472 unsigned long *fl_pte;
473 unsigned long fl_offset;
474 unsigned long *sl_table;
475 unsigned long *sl_pte;
476 unsigned long sl_offset;
477 size_t len = 0x1000UL << order;
478 int i, ret = 0;
479
480 spin_lock_irqsave(&msm_iommu_lock, flags);
481
482 priv = domain->priv;
483
484 if (!priv) {
485 ret = -ENODEV;
486 goto fail;
487 }
488
489 fl_table = priv->pgtable;
490
491 if (len != SZ_16M && len != SZ_1M &&
492 len != SZ_64K && len != SZ_4K) {
493 pr_debug("Bad length: %d\n", len);
494 ret = -EINVAL;
495 goto fail;
496 }
497
498 if (!fl_table) {
499 pr_debug("Null page table\n");
500 ret = -EINVAL;
501 goto fail;
502 }
503
504 fl_offset = FL_OFFSET(va); /* Upper 12 bits */
505 fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */
506
507 if (*fl_pte == 0) {
508 pr_debug("First level PTE is 0\n");
509 ret = -ENODEV;
510 goto fail;
511 }
512
513 /* Unmap supersection */
514 if (len == SZ_16M)
515 for (i = 0; i < 16; i++)
516 *(fl_pte+i) = 0;
517
518 if (len == SZ_1M)
519 *fl_pte = 0;
520
521 sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
522 sl_offset = SL_OFFSET(va);
523 sl_pte = sl_table + sl_offset;
524
525 if (len == SZ_64K) {
526 for (i = 0; i < 16; i++)
527 *(sl_pte+i) = 0;
528 }
529
530 if (len == SZ_4K)
531 *sl_pte = 0;
532
533 if (len == SZ_4K || len == SZ_64K) {
534 int used = 0;
535
536 for (i = 0; i < NUM_SL_PTE; i++)
537 if (sl_table[i])
538 used = 1;
539 if (!used) {
540 free_page((unsigned long)sl_table);
541 *fl_pte = 0;
542 }
543 }
544
545 ret = __flush_iotlb(domain);
546fail:
547 spin_unlock_irqrestore(&msm_iommu_lock, flags);
548 return ret;
549}
550
551static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
552 unsigned long va)
553{
554 struct msm_priv *priv;
555 struct msm_iommu_drvdata *iommu_drvdata;
556 struct msm_iommu_ctx_drvdata *ctx_drvdata;
557 unsigned int par;
558 unsigned long flags;
559 void __iomem *base;
560 phys_addr_t ret = 0;
561 int ctx;
562
563 spin_lock_irqsave(&msm_iommu_lock, flags);
564
565 priv = domain->priv;
566 if (list_empty(&priv->list_attached))
567 goto fail;
568
569 ctx_drvdata = list_entry(priv->list_attached.next,
570 struct msm_iommu_ctx_drvdata, attached_elm);
571 iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
572
573 base = iommu_drvdata->base;
574 ctx = ctx_drvdata->num;
575
576 ret = __enable_clocks(iommu_drvdata);
577 if (ret)
578 goto fail;
579
580 /* Invalidate context TLB */
581 SET_CTX_TLBIALL(base, ctx, 0);
582 SET_V2PPR(base, ctx, va & V2Pxx_VA);
583
584 par = GET_PAR(base, ctx);
585
586 /* We are dealing with a supersection */
587 if (GET_NOFAULT_SS(base, ctx))
588 ret = (par & 0xFF000000) | (va & 0x00FFFFFF);
589 else /* Upper 20 bits from PAR, lower 12 from VA */
590 ret = (par & 0xFFFFF000) | (va & 0x00000FFF);
591
592 if (GET_FAULT(base, ctx))
593 ret = 0;
594
595 __disable_clocks(iommu_drvdata);
596fail:
597 spin_unlock_irqrestore(&msm_iommu_lock, flags);
598 return ret;
599}
600
601static int msm_iommu_domain_has_cap(struct iommu_domain *domain,
602 unsigned long cap)
603{
604 return 0;
605}
606
607static void print_ctx_regs(void __iomem *base, int ctx)
608{
609 unsigned int fsr = GET_FSR(base, ctx);
610 pr_err("FAR = %08x PAR = %08x\n",
611 GET_FAR(base, ctx), GET_PAR(base, ctx));
612 pr_err("FSR = %08x [%s%s%s%s%s%s%s%s%s%s]\n", fsr,
613 (fsr & 0x02) ? "TF " : "",
614 (fsr & 0x04) ? "AFF " : "",
615 (fsr & 0x08) ? "APF " : "",
616 (fsr & 0x10) ? "TLBMF " : "",
617 (fsr & 0x20) ? "HTWDEEF " : "",
618 (fsr & 0x40) ? "HTWSEEF " : "",
619 (fsr & 0x80) ? "MHF " : "",
620 (fsr & 0x10000) ? "SL " : "",
621 (fsr & 0x40000000) ? "SS " : "",
622 (fsr & 0x80000000) ? "MULTI " : "");
623
624 pr_err("FSYNR0 = %08x FSYNR1 = %08x\n",
625 GET_FSYNR0(base, ctx), GET_FSYNR1(base, ctx));
626 pr_err("TTBR0 = %08x TTBR1 = %08x\n",
627 GET_TTBR0(base, ctx), GET_TTBR1(base, ctx));
628 pr_err("SCTLR = %08x ACTLR = %08x\n",
629 GET_SCTLR(base, ctx), GET_ACTLR(base, ctx));
630 pr_err("PRRR = %08x NMRR = %08x\n",
631 GET_PRRR(base, ctx), GET_NMRR(base, ctx));
632}
633
634irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
635{
636 struct msm_iommu_drvdata *drvdata = dev_id;
637 void __iomem *base;
638 unsigned int fsr;
639 int i, ret;
640
641 spin_lock(&msm_iommu_lock);
642
643 if (!drvdata) {
644 pr_err("Invalid device ID in context interrupt handler\n");
645 goto fail;
646 }
647
648 base = drvdata->base;
649
650 pr_err("Unexpected IOMMU page fault!\n");
651 pr_err("base = %08x\n", (unsigned int) base);
652
653 ret = __enable_clocks(drvdata);
654 if (ret)
655 goto fail;
656
657 for (i = 0; i < drvdata->ncb; i++) {
658 fsr = GET_FSR(base, i);
659 if (fsr) {
660 pr_err("Fault occurred in context %d.\n", i);
661 pr_err("Interesting registers:\n");
662 print_ctx_regs(base, i);
663 SET_FSR(base, i, 0x4000000F);
664 }
665 }
666 __disable_clocks(drvdata);
667fail:
668 spin_unlock(&msm_iommu_lock);
669 return 0;
670}
671
672static struct iommu_ops msm_iommu_ops = {
673 .domain_init = msm_iommu_domain_init,
674 .domain_destroy = msm_iommu_domain_destroy,
675 .attach_dev = msm_iommu_attach_dev,
676 .detach_dev = msm_iommu_detach_dev,
677 .map = msm_iommu_map,
678 .unmap = msm_iommu_unmap,
679 .iova_to_phys = msm_iommu_iova_to_phys,
680 .domain_has_cap = msm_iommu_domain_has_cap
681};
682
683static int __init get_tex_class(int icp, int ocp, int mt, int nos)
684{
685 int i = 0;
686 unsigned int prrr = 0;
687 unsigned int nmrr = 0;
688 int c_icp, c_ocp, c_mt, c_nos;
689
690 RCP15_PRRR(prrr);
691 RCP15_NMRR(nmrr);
692
693 for (i = 0; i < NUM_TEX_CLASS; i++) {
694 c_nos = PRRR_NOS(prrr, i);
695 c_mt = PRRR_MT(prrr, i);
696 c_icp = NMRR_ICP(nmrr, i);
697 c_ocp = NMRR_OCP(nmrr, i);
698
699 if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos)
700 return i;
701 }
702
703 return -ENODEV;
704}
705
706static void __init setup_iommu_tex_classes(void)
707{
708 msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] =
709 get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1);
710
711 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] =
712 get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1);
713
714 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] =
715 get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1);
716
717 msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] =
718 get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1);
719}
720
721static int __init msm_iommu_init(void)
722{
723 setup_iommu_tex_classes();
724 register_iommu(&msm_iommu_ops);
725 return 0;
726}
727
728subsys_initcall(msm_iommu_init);
729
730MODULE_LICENSE("GPL v2");
731MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
new file mode 100644
index 000000000000..8e8fb079852d
--- /dev/null
+++ b/drivers/iommu/msm_iommu_dev.c
@@ -0,0 +1,422 @@
1/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/platform_device.h>
23#include <linux/io.h>
24#include <linux/clk.h>
25#include <linux/iommu.h>
26#include <linux/interrupt.h>
27#include <linux/err.h>
28#include <linux/slab.h>
29
30#include <mach/iommu_hw-8xxx.h>
31#include <mach/iommu.h>
32#include <mach/clk.h>
33
34struct iommu_ctx_iter_data {
35 /* input */
36 const char *name;
37
38 /* output */
39 struct device *dev;
40};
41
42static struct platform_device *msm_iommu_root_dev;
43
44static int each_iommu_ctx(struct device *dev, void *data)
45{
46 struct iommu_ctx_iter_data *res = data;
47 struct msm_iommu_ctx_dev *c = dev->platform_data;
48
49 if (!res || !c || !c->name || !res->name)
50 return -EINVAL;
51
52 if (!strcmp(res->name, c->name)) {
53 res->dev = dev;
54 return 1;
55 }
56 return 0;
57}
58
59static int each_iommu(struct device *dev, void *data)
60{
61 return device_for_each_child(dev, data, each_iommu_ctx);
62}
63
64struct device *msm_iommu_get_ctx(const char *ctx_name)
65{
66 struct iommu_ctx_iter_data r;
67 int found;
68
69 if (!msm_iommu_root_dev) {
70 pr_err("No root IOMMU device.\n");
71 goto fail;
72 }
73
74 r.name = ctx_name;
75 found = device_for_each_child(&msm_iommu_root_dev->dev, &r, each_iommu);
76
77 if (!found) {
78 pr_err("Could not find context <%s>\n", ctx_name);
79 goto fail;
80 }
81
82 return r.dev;
83fail:
84 return NULL;
85}
86EXPORT_SYMBOL(msm_iommu_get_ctx);
87
88static void msm_iommu_reset(void __iomem *base, int ncb)
89{
90 int ctx;
91
92 SET_RPUE(base, 0);
93 SET_RPUEIE(base, 0);
94 SET_ESRRESTORE(base, 0);
95 SET_TBE(base, 0);
96 SET_CR(base, 0);
97 SET_SPDMBE(base, 0);
98 SET_TESTBUSCR(base, 0);
99 SET_TLBRSW(base, 0);
100 SET_GLOBAL_TLBIALL(base, 0);
101 SET_RPU_ACR(base, 0);
102 SET_TLBLKCRWE(base, 1);
103
104 for (ctx = 0; ctx < ncb; ctx++) {
105 SET_BPRCOSH(base, ctx, 0);
106 SET_BPRCISH(base, ctx, 0);
107 SET_BPRCNSH(base, ctx, 0);
108 SET_BPSHCFG(base, ctx, 0);
109 SET_BPMTCFG(base, ctx, 0);
110 SET_ACTLR(base, ctx, 0);
111 SET_SCTLR(base, ctx, 0);
112 SET_FSRRESTORE(base, ctx, 0);
113 SET_TTBR0(base, ctx, 0);
114 SET_TTBR1(base, ctx, 0);
115 SET_TTBCR(base, ctx, 0);
116 SET_BFBCR(base, ctx, 0);
117 SET_PAR(base, ctx, 0);
118 SET_FAR(base, ctx, 0);
119 SET_CTX_TLBIALL(base, ctx, 0);
120 SET_TLBFLPTER(base, ctx, 0);
121 SET_TLBSLPTER(base, ctx, 0);
122 SET_TLBLKCR(base, ctx, 0);
123 SET_PRRR(base, ctx, 0);
124 SET_NMRR(base, ctx, 0);
125 SET_CONTEXTIDR(base, ctx, 0);
126 }
127}
128
129static int msm_iommu_probe(struct platform_device *pdev)
130{
131 struct resource *r, *r2;
132 struct clk *iommu_clk;
133 struct clk *iommu_pclk;
134 struct msm_iommu_drvdata *drvdata;
135 struct msm_iommu_dev *iommu_dev = pdev->dev.platform_data;
136 void __iomem *regs_base;
137 resource_size_t len;
138 int ret, irq, par;
139
140 if (pdev->id == -1) {
141 msm_iommu_root_dev = pdev;
142 return 0;
143 }
144
145 drvdata = kzalloc(sizeof(*drvdata), GFP_KERNEL);
146
147 if (!drvdata) {
148 ret = -ENOMEM;
149 goto fail;
150 }
151
152 if (!iommu_dev) {
153 ret = -ENODEV;
154 goto fail;
155 }
156
157 iommu_pclk = clk_get(NULL, "smmu_pclk");
158 if (IS_ERR(iommu_pclk)) {
159 ret = -ENODEV;
160 goto fail;
161 }
162
163 ret = clk_enable(iommu_pclk);
164 if (ret)
165 goto fail_enable;
166
167 iommu_clk = clk_get(&pdev->dev, "iommu_clk");
168
169 if (!IS_ERR(iommu_clk)) {
170 if (clk_get_rate(iommu_clk) == 0)
171 clk_set_min_rate(iommu_clk, 1);
172
173 ret = clk_enable(iommu_clk);
174 if (ret) {
175 clk_put(iommu_clk);
176 goto fail_pclk;
177 }
178 } else
179 iommu_clk = NULL;
180
181 r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase");
182
183 if (!r) {
184 ret = -ENODEV;
185 goto fail_clk;
186 }
187
188 len = resource_size(r);
189
190 r2 = request_mem_region(r->start, len, r->name);
191 if (!r2) {
192 pr_err("Could not request memory region: start=%p, len=%d\n",
193 (void *) r->start, len);
194 ret = -EBUSY;
195 goto fail_clk;
196 }
197
198 regs_base = ioremap(r2->start, len);
199
200 if (!regs_base) {
201 pr_err("Could not ioremap: start=%p, len=%d\n",
202 (void *) r2->start, len);
203 ret = -EBUSY;
204 goto fail_mem;
205 }
206
207 irq = platform_get_irq_byname(pdev, "secure_irq");
208 if (irq < 0) {
209 ret = -ENODEV;
210 goto fail_io;
211 }
212
213 msm_iommu_reset(regs_base, iommu_dev->ncb);
214
215 SET_M(regs_base, 0, 1);
216 SET_PAR(regs_base, 0, 0);
217 SET_V2PCFG(regs_base, 0, 1);
218 SET_V2PPR(regs_base, 0, 0);
219 par = GET_PAR(regs_base, 0);
220 SET_V2PCFG(regs_base, 0, 0);
221 SET_M(regs_base, 0, 0);
222
223 if (!par) {
224 pr_err("%s: Invalid PAR value detected\n", iommu_dev->name);
225 ret = -ENODEV;
226 goto fail_io;
227 }
228
229 ret = request_irq(irq, msm_iommu_fault_handler, 0,
230 "msm_iommu_secure_irpt_handler", drvdata);
231 if (ret) {
232 pr_err("Request IRQ %d failed with ret=%d\n", irq, ret);
233 goto fail_io;
234 }
235
236
237 drvdata->pclk = iommu_pclk;
238 drvdata->clk = iommu_clk;
239 drvdata->base = regs_base;
240 drvdata->irq = irq;
241 drvdata->ncb = iommu_dev->ncb;
242
243 pr_info("device %s mapped at %p, irq %d with %d ctx banks\n",
244 iommu_dev->name, regs_base, irq, iommu_dev->ncb);
245
246 platform_set_drvdata(pdev, drvdata);
247
248 if (iommu_clk)
249 clk_disable(iommu_clk);
250
251 clk_disable(iommu_pclk);
252
253 return 0;
254fail_io:
255 iounmap(regs_base);
256fail_mem:
257 release_mem_region(r->start, len);
258fail_clk:
259 if (iommu_clk) {
260 clk_disable(iommu_clk);
261 clk_put(iommu_clk);
262 }
263fail_pclk:
264 clk_disable(iommu_pclk);
265fail_enable:
266 clk_put(iommu_pclk);
267fail:
268 kfree(drvdata);
269 return ret;
270}
271
272static int msm_iommu_remove(struct platform_device *pdev)
273{
274 struct msm_iommu_drvdata *drv = NULL;
275
276 drv = platform_get_drvdata(pdev);
277 if (drv) {
278 if (drv->clk)
279 clk_put(drv->clk);
280 clk_put(drv->pclk);
281 memset(drv, 0, sizeof(*drv));
282 kfree(drv);
283 platform_set_drvdata(pdev, NULL);
284 }
285 return 0;
286}
287
288static int msm_iommu_ctx_probe(struct platform_device *pdev)
289{
290 struct msm_iommu_ctx_dev *c = pdev->dev.platform_data;
291 struct msm_iommu_drvdata *drvdata;
292 struct msm_iommu_ctx_drvdata *ctx_drvdata = NULL;
293 int i, ret;
294 if (!c || !pdev->dev.parent) {
295 ret = -EINVAL;
296 goto fail;
297 }
298
299 drvdata = dev_get_drvdata(pdev->dev.parent);
300
301 if (!drvdata) {
302 ret = -ENODEV;
303 goto fail;
304 }
305
306 ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL);
307 if (!ctx_drvdata) {
308 ret = -ENOMEM;
309 goto fail;
310 }
311 ctx_drvdata->num = c->num;
312 ctx_drvdata->pdev = pdev;
313
314 INIT_LIST_HEAD(&ctx_drvdata->attached_elm);
315 platform_set_drvdata(pdev, ctx_drvdata);
316
317 ret = clk_enable(drvdata->pclk);
318 if (ret)
319 goto fail;
320
321 if (drvdata->clk) {
322 ret = clk_enable(drvdata->clk);
323 if (ret) {
324 clk_disable(drvdata->pclk);
325 goto fail;
326 }
327 }
328
329 /* Program the M2V tables for this context */
330 for (i = 0; i < MAX_NUM_MIDS; i++) {
331 int mid = c->mids[i];
332 if (mid == -1)
333 break;
334
335 SET_M2VCBR_N(drvdata->base, mid, 0);
336 SET_CBACR_N(drvdata->base, c->num, 0);
337
338 /* Set VMID = 0 */
339 SET_VMID(drvdata->base, mid, 0);
340
341 /* Set the context number for that MID to this context */
342 SET_CBNDX(drvdata->base, mid, c->num);
343
344 /* Set MID associated with this context bank to 0*/
345 SET_CBVMID(drvdata->base, c->num, 0);
346
347 /* Set the ASID for TLB tagging for this context */
348 SET_CONTEXTIDR_ASID(drvdata->base, c->num, c->num);
349
350 /* Set security bit override to be Non-secure */
351 SET_NSCFG(drvdata->base, mid, 3);
352 }
353
354 if (drvdata->clk)
355 clk_disable(drvdata->clk);
356 clk_disable(drvdata->pclk);
357
358 dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num);
359 return 0;
360fail:
361 kfree(ctx_drvdata);
362 return ret;
363}
364
365static int msm_iommu_ctx_remove(struct platform_device *pdev)
366{
367 struct msm_iommu_ctx_drvdata *drv = NULL;
368 drv = platform_get_drvdata(pdev);
369 if (drv) {
370 memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata));
371 kfree(drv);
372 platform_set_drvdata(pdev, NULL);
373 }
374 return 0;
375}
376
377static struct platform_driver msm_iommu_driver = {
378 .driver = {
379 .name = "msm_iommu",
380 },
381 .probe = msm_iommu_probe,
382 .remove = msm_iommu_remove,
383};
384
385static struct platform_driver msm_iommu_ctx_driver = {
386 .driver = {
387 .name = "msm_iommu_ctx",
388 },
389 .probe = msm_iommu_ctx_probe,
390 .remove = msm_iommu_ctx_remove,
391};
392
393static int __init msm_iommu_driver_init(void)
394{
395 int ret;
396 ret = platform_driver_register(&msm_iommu_driver);
397 if (ret != 0) {
398 pr_err("Failed to register IOMMU driver\n");
399 goto error;
400 }
401
402 ret = platform_driver_register(&msm_iommu_ctx_driver);
403 if (ret != 0) {
404 pr_err("Failed to register IOMMU context driver\n");
405 goto error;
406 }
407
408error:
409 return ret;
410}
411
412static void __exit msm_iommu_driver_exit(void)
413{
414 platform_driver_unregister(&msm_iommu_ctx_driver);
415 platform_driver_unregister(&msm_iommu_driver);
416}
417
418subsys_initcall(msm_iommu_driver_init);
419module_exit(msm_iommu_driver_exit);
420
421MODULE_LICENSE("GPL v2");
422MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 094308e41be5..825c02b40daa 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -29,11 +29,6 @@ obj-$(CONFIG_PCI_MSI) += msi.o
29# Build the Hypertransport interrupt support 29# Build the Hypertransport interrupt support
30obj-$(CONFIG_HT_IRQ) += htirq.o 30obj-$(CONFIG_HT_IRQ) += htirq.o
31 31
32# Build Intel IOMMU support
33obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
34
35obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
36
37obj-$(CONFIG_PCI_IOV) += iov.o 32obj-$(CONFIG_PCI_IOV) += iov.o
38 33
39# 34#
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 731e20265ace..b7bf11dd546a 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -184,8 +184,6 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
184 return NULL; 184 return NULL;
185} 185}
186 186
187struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
188
189/* PCI slot sysfs helper code */ 187/* PCI slot sysfs helper code */
190#define to_pci_slot(s) container_of(s, struct pci_slot, kobj) 188#define to_pci_slot(s) container_of(s, struct pci_slot, kobj)
191 189