aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-23 21:27:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-23 21:27:19 -0400
commit6eae81a5e2d6646a61146501fd3032a340863c1d (patch)
treec1c8a5fd7930f756d4d124870e0076903e3b4ba0
parent54245ed870c8cf9ff87fdf78955ffbc93b261e9f (diff)
parent5ffde2f67181195d457b95df44b8f88e8d969d89 (diff)
Merge tag 'iommu-updates-v4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel: "This time with bigger changes than usual: - A new IOMMU driver for the ARM SMMUv3. This IOMMU is pretty different from SMMUv1 and v2 in that it is configured through in-memory structures and not through the MMIO register region. The ARM SMMUv3 also supports IO demand paging for PCI devices with PRI/PASID capabilities, but this is not implemented in the driver yet. - Lots of cleanups and device-tree support for the Exynos IOMMU driver. This is part of the effort to bring Exynos DRM support upstream. - Introduction of default domains into the IOMMU core code. The rationale behind this is to move functionalily out of the IOMMU drivers to common code to get to a unified behavior between different drivers. The patches here introduce a default domain for iommu-groups (isolation groups). A device will now always be attached to a domain, either the default domain or another domain handled by the device driver. The IOMMU drivers have to be modified to make use of that feature. So long the AMD IOMMU driver is converted, with others to follow. - Patches for the Intel VT-d drvier to fix DMAR faults that happen when a kdump kernel boots. When the kdump kernel boots it re-initializes the IOMMU hardware, which destroys all mappings from the crashed kernel. As this happens before the endpoint devices are re-initialized, any in-flight DMA causes a DMAR fault. These faults cause PCI master aborts, which some devices can't handle properly and go into an undefined state, so that the device driver in the kdump kernel fails to initialize them and the dump fails. This is now fixed by copying over the mapping structures (only context tables and interrupt remapping tables) from the old kernel and keep the old mappings in place until the device driver of the new kernel takes over. This emulates the the behavior without an IOMMU to the best degree possible. - A couple of other small fixes and cleanups" * tag 'iommu-updates-v4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (69 commits) iommu/amd: Handle large pages correctly in free_pagetable iommu/vt-d: Don't disable IR when it was previously enabled iommu/vt-d: Make sure copied over IR entries are not reused iommu/vt-d: Copy IR table from old kernel when in kdump mode iommu/vt-d: Set IRTA in intel_setup_irq_remapping iommu/vt-d: Disable IRQ remapping in intel_prepare_irq_remapping iommu/vt-d: Move QI initializationt to intel_setup_irq_remapping iommu/vt-d: Move EIM detection to intel_prepare_irq_remapping iommu/vt-d: Enable Translation only if it was previously disabled iommu/vt-d: Don't disable translation prior to OS handover iommu/vt-d: Don't copy translation tables if RTT bit needs to be changed iommu/vt-d: Don't do early domain assignment if kdump kernel iommu/vt-d: Allocate si_domain in init_dmars() iommu/vt-d: Mark copied context entries iommu/vt-d: Do not re-use domain-ids from the old kernel iommu/vt-d: Copy translation tables from old kernel iommu/vt-d: Detect pre enabled translation iommu/vt-d: Make root entry visible for hardware right after allocation iommu/vt-d: Init QI before root entry is allocated iommu/vt-d: Cleanup log messages ...
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt37
-rw-r--r--MAINTAINERS3
-rw-r--r--drivers/iommu/Kconfig13
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd_iommu.c589
-rw-r--r--drivers/iommu/amd_iommu_init.c34
-rw-r--r--drivers/iommu/amd_iommu_proto.h2
-rw-r--r--drivers/iommu/amd_iommu_types.h11
-rw-r--r--drivers/iommu/arm-smmu-v3.c2670
-rw-r--r--drivers/iommu/arm-smmu.c23
-rw-r--r--drivers/iommu/dmar.c28
-rw-r--r--drivers/iommu/exynos-iommu.c527
-rw-r--r--drivers/iommu/intel-iommu.c495
-rw-r--r--drivers/iommu/intel_irq_remapping.c252
-rw-r--r--drivers/iommu/iommu.c373
-rw-r--r--drivers/iommu/iova.c4
-rw-r--r--drivers/iommu/rockchip-iommu.c27
-rw-r--r--drivers/vfio/Kconfig2
-rw-r--r--include/linux/intel-iommu.h5
-rw-r--r--include/linux/iommu.h44
20 files changed, 4184 insertions, 956 deletions
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
new file mode 100644
index 000000000000..c03eec116872
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
@@ -0,0 +1,37 @@
1* ARM SMMUv3 Architecture Implementation
2
3The SMMUv3 architecture is a significant deparature from previous
4revisions, replacing the MMIO register interface with in-memory command
5and event queues and adding support for the ATS and PRI components of
6the PCIe specification.
7
8** SMMUv3 required properties:
9
10- compatible : Should include:
11
12 * "arm,smmu-v3" for any SMMUv3 compliant
13 implementation. This entry should be last in the
14 compatible list.
15
16- reg : Base address and size of the SMMU.
17
18- interrupts : Non-secure interrupt list describing the wired
19 interrupt sources corresponding to entries in
20 interrupt-names. If no wired interrupts are
21 present then this property may be omitted.
22
23- interrupt-names : When the interrupts property is present, should
24 include the following:
25 * "eventq" - Event Queue not empty
26 * "priq" - PRI Queue not empty
27 * "cmdq-sync" - CMD_SYNC complete
28 * "gerror" - Global Error activated
29
30** SMMUv3 optional properties:
31
32- dma-coherent : Present if DMA operations made by the SMMU (page
33 table walks, stream table accesses etc) are cache
34 coherent with the CPU.
35
36 NOTE: this only applies to the SMMU itself, not
37 masters connected upstream of the SMMU.
diff --git a/MAINTAINERS b/MAINTAINERS
index e46cf6f0e5b0..acb64894c594 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1637,11 +1637,12 @@ F: drivers/i2c/busses/i2c-cadence.c
1637F: drivers/mmc/host/sdhci-of-arasan.c 1637F: drivers/mmc/host/sdhci-of-arasan.c
1638F: drivers/edac/synopsys_edac.c 1638F: drivers/edac/synopsys_edac.c
1639 1639
1640ARM SMMU DRIVER 1640ARM SMMU DRIVERS
1641M: Will Deacon <will.deacon@arm.com> 1641M: Will Deacon <will.deacon@arm.com>
1642L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 1642L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
1643S: Maintained 1643S: Maintained
1644F: drivers/iommu/arm-smmu.c 1644F: drivers/iommu/arm-smmu.c
1645F: drivers/iommu/arm-smmu-v3.c
1645F: drivers/iommu/io-pgtable-arm.c 1646F: drivers/iommu/io-pgtable-arm.c
1646 1647
1647ARM64 PORT (AARCH64 ARCHITECTURE) 1648ARM64 PORT (AARCH64 ARCHITECTURE)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 1ae4e547b419..40f37a2b4a8a 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -339,6 +339,7 @@ config SPAPR_TCE_IOMMU
339 Enables bits of IOMMU API required by VFIO. The iommu_ops 339 Enables bits of IOMMU API required by VFIO. The iommu_ops
340 is not implemented as it is not necessary for VFIO. 340 is not implemented as it is not necessary for VFIO.
341 341
342# ARM IOMMU support
342config ARM_SMMU 343config ARM_SMMU
343 bool "ARM Ltd. System MMU (SMMU) Support" 344 bool "ARM Ltd. System MMU (SMMU) Support"
344 depends on (ARM64 || ARM) && MMU 345 depends on (ARM64 || ARM) && MMU
@@ -352,4 +353,16 @@ config ARM_SMMU
352 Say Y here if your SoC includes an IOMMU device implementing 353 Say Y here if your SoC includes an IOMMU device implementing
353 the ARM SMMU architecture. 354 the ARM SMMU architecture.
354 355
356config ARM_SMMU_V3
357 bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
358 depends on ARM64 && PCI
359 select IOMMU_API
360 select IOMMU_IO_PGTABLE_LPAE
361 help
362 Support for implementations of the ARM System MMU architecture
363 version 3 providing translation support to a PCIe root complex.
364
365 Say Y here if your system includes an IOMMU device implementing
366 the ARM SMMUv3 architecture.
367
355endif # IOMMU_SUPPORT 368endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 080ffab4ed1c..c6dcc513d711 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
9obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o 9obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
10obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o 10obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
11obj-$(CONFIG_ARM_SMMU) += arm-smmu.o 11obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
12obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
12obj-$(CONFIG_DMAR_TABLE) += dmar.o 13obj-$(CONFIG_DMAR_TABLE) += dmar.o
13obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o 14obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o
14obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o 15obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index fffea87a014f..d3e5e9abe3b6 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -65,10 +65,6 @@
65 65
66static DEFINE_RWLOCK(amd_iommu_devtable_lock); 66static DEFINE_RWLOCK(amd_iommu_devtable_lock);
67 67
68/* A list of preallocated protection domains */
69static LIST_HEAD(iommu_pd_list);
70static DEFINE_SPINLOCK(iommu_pd_list_lock);
71
72/* List of all available dev_data structures */ 68/* List of all available dev_data structures */
73static LIST_HEAD(dev_data_list); 69static LIST_HEAD(dev_data_list);
74static DEFINE_SPINLOCK(dev_data_list_lock); 70static DEFINE_SPINLOCK(dev_data_list_lock);
@@ -120,7 +116,7 @@ struct iommu_cmd {
120struct kmem_cache *amd_iommu_irq_cache; 116struct kmem_cache *amd_iommu_irq_cache;
121 117
122static void update_domain(struct protection_domain *domain); 118static void update_domain(struct protection_domain *domain);
123static int __init alloc_passthrough_domain(void); 119static int alloc_passthrough_domain(void);
124 120
125/**************************************************************************** 121/****************************************************************************
126 * 122 *
@@ -235,31 +231,38 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
235} 231}
236 232
237/* 233/*
238 * In this function the list of preallocated protection domains is traversed to 234 * This function actually applies the mapping to the page table of the
239 * find the domain for a specific device 235 * dma_ops domain.
240 */ 236 */
241static struct dma_ops_domain *find_protection_domain(u16 devid) 237static void alloc_unity_mapping(struct dma_ops_domain *dma_dom,
238 struct unity_map_entry *e)
242{ 239{
243 struct dma_ops_domain *entry, *ret = NULL; 240 u64 addr;
244 unsigned long flags;
245 u16 alias = amd_iommu_alias_table[devid];
246
247 if (list_empty(&iommu_pd_list))
248 return NULL;
249
250 spin_lock_irqsave(&iommu_pd_list_lock, flags);
251 241
252 list_for_each_entry(entry, &iommu_pd_list, list) { 242 for (addr = e->address_start; addr < e->address_end;
253 if (entry->target_dev == devid || 243 addr += PAGE_SIZE) {
254 entry->target_dev == alias) { 244 if (addr < dma_dom->aperture_size)
255 ret = entry; 245 __set_bit(addr >> PAGE_SHIFT,
256 break; 246 dma_dom->aperture[0]->bitmap);
257 }
258 } 247 }
248}
249
250/*
251 * Inits the unity mappings required for a specific device
252 */
253static void init_unity_mappings_for_device(struct device *dev,
254 struct dma_ops_domain *dma_dom)
255{
256 struct unity_map_entry *e;
257 u16 devid;
259 258
260 spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 259 devid = get_device_id(dev);
261 260
262 return ret; 261 list_for_each_entry(e, &amd_iommu_unity_map, list) {
262 if (!(devid >= e->devid_start && devid <= e->devid_end))
263 continue;
264 alloc_unity_mapping(dma_dom, e);
265 }
263} 266}
264 267
265/* 268/*
@@ -291,11 +294,23 @@ static bool check_device(struct device *dev)
291 294
292static void init_iommu_group(struct device *dev) 295static void init_iommu_group(struct device *dev)
293{ 296{
297 struct dma_ops_domain *dma_domain;
298 struct iommu_domain *domain;
294 struct iommu_group *group; 299 struct iommu_group *group;
295 300
296 group = iommu_group_get_for_dev(dev); 301 group = iommu_group_get_for_dev(dev);
297 if (!IS_ERR(group)) 302 if (IS_ERR(group))
298 iommu_group_put(group); 303 return;
304
305 domain = iommu_group_default_domain(group);
306 if (!domain)
307 goto out;
308
309 dma_domain = to_pdomain(domain)->priv;
310
311 init_unity_mappings_for_device(dev, dma_domain);
312out:
313 iommu_group_put(group);
299} 314}
300 315
301static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) 316static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
@@ -435,64 +450,15 @@ static void iommu_uninit_device(struct device *dev)
435 /* Unlink from alias, it may change if another device is re-plugged */ 450 /* Unlink from alias, it may change if another device is re-plugged */
436 dev_data->alias_data = NULL; 451 dev_data->alias_data = NULL;
437 452
453 /* Remove dma-ops */
454 dev->archdata.dma_ops = NULL;
455
438 /* 456 /*
439 * We keep dev_data around for unplugged devices and reuse it when the 457 * We keep dev_data around for unplugged devices and reuse it when the
440 * device is re-plugged - not doing so would introduce a ton of races. 458 * device is re-plugged - not doing so would introduce a ton of races.
441 */ 459 */
442} 460}
443 461
444void __init amd_iommu_uninit_devices(void)
445{
446 struct iommu_dev_data *dev_data, *n;
447 struct pci_dev *pdev = NULL;
448
449 for_each_pci_dev(pdev) {
450
451 if (!check_device(&pdev->dev))
452 continue;
453
454 iommu_uninit_device(&pdev->dev);
455 }
456
457 /* Free all of our dev_data structures */
458 list_for_each_entry_safe(dev_data, n, &dev_data_list, dev_data_list)
459 free_dev_data(dev_data);
460}
461
462int __init amd_iommu_init_devices(void)
463{
464 struct pci_dev *pdev = NULL;
465 int ret = 0;
466
467 for_each_pci_dev(pdev) {
468
469 if (!check_device(&pdev->dev))
470 continue;
471
472 ret = iommu_init_device(&pdev->dev);
473 if (ret == -ENOTSUPP)
474 iommu_ignore_device(&pdev->dev);
475 else if (ret)
476 goto out_free;
477 }
478
479 /*
480 * Initialize IOMMU groups only after iommu_init_device() has
481 * had a chance to populate any IVRS defined aliases.
482 */
483 for_each_pci_dev(pdev) {
484 if (check_device(&pdev->dev))
485 init_iommu_group(&pdev->dev);
486 }
487
488 return 0;
489
490out_free:
491
492 amd_iommu_uninit_devices();
493
494 return ret;
495}
496#ifdef CONFIG_AMD_IOMMU_STATS 462#ifdef CONFIG_AMD_IOMMU_STATS
497 463
498/* 464/*
@@ -1464,94 +1430,6 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom,
1464 return unmapped; 1430 return unmapped;
1465} 1431}
1466 1432
1467/*
1468 * This function checks if a specific unity mapping entry is needed for
1469 * this specific IOMMU.
1470 */
1471static int iommu_for_unity_map(struct amd_iommu *iommu,
1472 struct unity_map_entry *entry)
1473{
1474 u16 bdf, i;
1475
1476 for (i = entry->devid_start; i <= entry->devid_end; ++i) {
1477 bdf = amd_iommu_alias_table[i];
1478 if (amd_iommu_rlookup_table[bdf] == iommu)
1479 return 1;
1480 }
1481
1482 return 0;
1483}
1484
1485/*
1486 * This function actually applies the mapping to the page table of the
1487 * dma_ops domain.
1488 */
1489static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
1490 struct unity_map_entry *e)
1491{
1492 u64 addr;
1493 int ret;
1494
1495 for (addr = e->address_start; addr < e->address_end;
1496 addr += PAGE_SIZE) {
1497 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
1498 PAGE_SIZE);
1499 if (ret)
1500 return ret;
1501 /*
1502 * if unity mapping is in aperture range mark the page
1503 * as allocated in the aperture
1504 */
1505 if (addr < dma_dom->aperture_size)
1506 __set_bit(addr >> PAGE_SHIFT,
1507 dma_dom->aperture[0]->bitmap);
1508 }
1509
1510 return 0;
1511}
1512
1513/*
1514 * Init the unity mappings for a specific IOMMU in the system
1515 *
1516 * Basically iterates over all unity mapping entries and applies them to
1517 * the default domain DMA of that IOMMU if necessary.
1518 */
1519static int iommu_init_unity_mappings(struct amd_iommu *iommu)
1520{
1521 struct unity_map_entry *entry;
1522 int ret;
1523
1524 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
1525 if (!iommu_for_unity_map(iommu, entry))
1526 continue;
1527 ret = dma_ops_unity_map(iommu->default_dom, entry);
1528 if (ret)
1529 return ret;
1530 }
1531
1532 return 0;
1533}
1534
1535/*
1536 * Inits the unity mappings required for a specific device
1537 */
1538static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
1539 u16 devid)
1540{
1541 struct unity_map_entry *e;
1542 int ret;
1543
1544 list_for_each_entry(e, &amd_iommu_unity_map, list) {
1545 if (!(devid >= e->devid_start && devid <= e->devid_end))
1546 continue;
1547 ret = dma_ops_unity_map(dma_dom, e);
1548 if (ret)
1549 return ret;
1550 }
1551
1552 return 0;
1553}
1554
1555/**************************************************************************** 1433/****************************************************************************
1556 * 1434 *
1557 * The next functions belong to the address allocator for the dma_ops 1435 * The next functions belong to the address allocator for the dma_ops
@@ -1705,14 +1583,16 @@ static unsigned long dma_ops_area_alloc(struct device *dev,
1705 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; 1583 unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
1706 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; 1584 int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
1707 int i = start >> APERTURE_RANGE_SHIFT; 1585 int i = start >> APERTURE_RANGE_SHIFT;
1708 unsigned long boundary_size; 1586 unsigned long boundary_size, mask;
1709 unsigned long address = -1; 1587 unsigned long address = -1;
1710 unsigned long limit; 1588 unsigned long limit;
1711 1589
1712 next_bit >>= PAGE_SHIFT; 1590 next_bit >>= PAGE_SHIFT;
1713 1591
1714 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 1592 mask = dma_get_seg_boundary(dev);
1715 PAGE_SIZE) >> PAGE_SHIFT; 1593
1594 boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
1595 1UL << (BITS_PER_LONG - PAGE_SHIFT);
1716 1596
1717 for (;i < max_index; ++i) { 1597 for (;i < max_index; ++i) {
1718 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; 1598 unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
@@ -1870,9 +1750,15 @@ static void free_pt_##LVL (unsigned long __pt) \
1870 pt = (u64 *)__pt; \ 1750 pt = (u64 *)__pt; \
1871 \ 1751 \
1872 for (i = 0; i < 512; ++i) { \ 1752 for (i = 0; i < 512; ++i) { \
1753 /* PTE present? */ \
1873 if (!IOMMU_PTE_PRESENT(pt[i])) \ 1754 if (!IOMMU_PTE_PRESENT(pt[i])) \
1874 continue; \ 1755 continue; \
1875 \ 1756 \
1757 /* Large PTE? */ \
1758 if (PM_PTE_LEVEL(pt[i]) == 0 || \
1759 PM_PTE_LEVEL(pt[i]) == 7) \
1760 continue; \
1761 \
1876 p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ 1762 p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
1877 FN(p); \ 1763 FN(p); \
1878 } \ 1764 } \
@@ -2009,7 +1895,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
2009 goto free_dma_dom; 1895 goto free_dma_dom;
2010 1896
2011 dma_dom->need_flush = false; 1897 dma_dom->need_flush = false;
2012 dma_dom->target_dev = 0xffff;
2013 1898
2014 add_domain_to_list(&dma_dom->domain); 1899 add_domain_to_list(&dma_dom->domain);
2015 1900
@@ -2374,110 +2259,67 @@ static void detach_device(struct device *dev)
2374 dev_data->ats.enabled = false; 2259 dev_data->ats.enabled = false;
2375} 2260}
2376 2261
2377/* 2262static int amd_iommu_add_device(struct device *dev)
2378 * Find out the protection domain structure for a given PCI device. This
2379 * will give us the pointer to the page table root for example.
2380 */
2381static struct protection_domain *domain_for_device(struct device *dev)
2382{
2383 struct iommu_dev_data *dev_data;
2384 struct protection_domain *dom = NULL;
2385 unsigned long flags;
2386
2387 dev_data = get_dev_data(dev);
2388
2389 if (dev_data->domain)
2390 return dev_data->domain;
2391
2392 if (dev_data->alias_data != NULL) {
2393 struct iommu_dev_data *alias_data = dev_data->alias_data;
2394
2395 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
2396 if (alias_data->domain != NULL) {
2397 __attach_device(dev_data, alias_data->domain);
2398 dom = alias_data->domain;
2399 }
2400 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2401 }
2402
2403 return dom;
2404}
2405
2406static int device_change_notifier(struct notifier_block *nb,
2407 unsigned long action, void *data)
2408{ 2263{
2409 struct dma_ops_domain *dma_domain;
2410 struct protection_domain *domain;
2411 struct iommu_dev_data *dev_data; 2264 struct iommu_dev_data *dev_data;
2412 struct device *dev = data; 2265 struct iommu_domain *domain;
2413 struct amd_iommu *iommu; 2266 struct amd_iommu *iommu;
2414 unsigned long flags;
2415 u16 devid; 2267 u16 devid;
2268 int ret;
2416 2269
2417 if (!check_device(dev)) 2270 if (!check_device(dev) || get_dev_data(dev))
2418 return 0; 2271 return 0;
2419 2272
2420 devid = get_device_id(dev); 2273 devid = get_device_id(dev);
2421 iommu = amd_iommu_rlookup_table[devid]; 2274 iommu = amd_iommu_rlookup_table[devid];
2422 dev_data = get_dev_data(dev);
2423
2424 switch (action) {
2425 case BUS_NOTIFY_ADD_DEVICE:
2426 2275
2427 iommu_init_device(dev); 2276 ret = iommu_init_device(dev);
2428 init_iommu_group(dev); 2277 if (ret) {
2278 if (ret != -ENOTSUPP)
2279 pr_err("Failed to initialize device %s - trying to proceed anyway\n",
2280 dev_name(dev));
2429 2281
2430 /* 2282 iommu_ignore_device(dev);
2431 * dev_data is still NULL and 2283 dev->archdata.dma_ops = &nommu_dma_ops;
2432 * got initialized in iommu_init_device 2284 goto out;
2433 */ 2285 }
2434 dev_data = get_dev_data(dev); 2286 init_iommu_group(dev);
2435 2287
2436 if (iommu_pass_through || dev_data->iommu_v2) { 2288 dev_data = get_dev_data(dev);
2437 dev_data->passthrough = true;
2438 attach_device(dev, pt_domain);
2439 break;
2440 }
2441 2289
2442 domain = domain_for_device(dev); 2290 BUG_ON(!dev_data);
2443 2291
2444 /* allocate a protection domain if a device is added */ 2292 if (dev_data->iommu_v2)
2445 dma_domain = find_protection_domain(devid); 2293 iommu_request_dm_for_dev(dev);
2446 if (!dma_domain) {
2447 dma_domain = dma_ops_domain_alloc();
2448 if (!dma_domain)
2449 goto out;
2450 dma_domain->target_dev = devid;
2451
2452 spin_lock_irqsave(&iommu_pd_list_lock, flags);
2453 list_add_tail(&dma_domain->list, &iommu_pd_list);
2454 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
2455 }
2456 2294
2295 /* Domains are initialized for this device - have a look what we ended up with */
2296 domain = iommu_get_domain_for_dev(dev);
2297 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2298 dev_data->passthrough = true;
2299 dev->archdata.dma_ops = &nommu_dma_ops;
2300 } else {
2457 dev->archdata.dma_ops = &amd_iommu_dma_ops; 2301 dev->archdata.dma_ops = &amd_iommu_dma_ops;
2458
2459 break;
2460 case BUS_NOTIFY_REMOVED_DEVICE:
2461
2462 iommu_uninit_device(dev);
2463
2464 default:
2465 goto out;
2466 } 2302 }
2467 2303
2304out:
2468 iommu_completion_wait(iommu); 2305 iommu_completion_wait(iommu);
2469 2306
2470out:
2471 return 0; 2307 return 0;
2472} 2308}
2473 2309
2474static struct notifier_block device_nb = { 2310static void amd_iommu_remove_device(struct device *dev)
2475 .notifier_call = device_change_notifier,
2476};
2477
2478void amd_iommu_init_notifier(void)
2479{ 2311{
2480 bus_register_notifier(&pci_bus_type, &device_nb); 2312 struct amd_iommu *iommu;
2313 u16 devid;
2314
2315 if (!check_device(dev))
2316 return;
2317
2318 devid = get_device_id(dev);
2319 iommu = amd_iommu_rlookup_table[devid];
2320
2321 iommu_uninit_device(dev);
2322 iommu_completion_wait(iommu);
2481} 2323}
2482 2324
2483/***************************************************************************** 2325/*****************************************************************************
@@ -2496,28 +2338,20 @@ void amd_iommu_init_notifier(void)
2496static struct protection_domain *get_domain(struct device *dev) 2338static struct protection_domain *get_domain(struct device *dev)
2497{ 2339{
2498 struct protection_domain *domain; 2340 struct protection_domain *domain;
2499 struct dma_ops_domain *dma_dom; 2341 struct iommu_domain *io_domain;
2500 u16 devid = get_device_id(dev);
2501 2342
2502 if (!check_device(dev)) 2343 if (!check_device(dev))
2503 return ERR_PTR(-EINVAL); 2344 return ERR_PTR(-EINVAL);
2504 2345
2505 domain = domain_for_device(dev); 2346 io_domain = iommu_get_domain_for_dev(dev);
2506 if (domain != NULL && !dma_ops_domain(domain)) 2347 if (!io_domain)
2507 return ERR_PTR(-EBUSY); 2348 return NULL;
2508
2509 if (domain != NULL)
2510 return domain;
2511 2349
2512 /* Device not bound yet - bind it */ 2350 domain = to_pdomain(io_domain);
2513 dma_dom = find_protection_domain(devid); 2351 if (!dma_ops_domain(domain))
2514 if (!dma_dom) 2352 return ERR_PTR(-EBUSY);
2515 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
2516 attach_device(dev, &dma_dom->domain);
2517 DUMP_printk("Using protection domain %d for device %s\n",
2518 dma_dom->domain.id, dev_name(dev));
2519 2353
2520 return &dma_dom->domain; 2354 return domain;
2521} 2355}
2522 2356
2523static void update_device_table(struct protection_domain *domain) 2357static void update_device_table(struct protection_domain *domain)
@@ -3013,54 +2847,6 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
3013 return check_device(dev); 2847 return check_device(dev);
3014} 2848}
3015 2849
3016/*
3017 * The function for pre-allocating protection domains.
3018 *
3019 * If the driver core informs the DMA layer if a driver grabs a device
3020 * we don't need to preallocate the protection domains anymore.
3021 * For now we have to.
3022 */
3023static void __init prealloc_protection_domains(void)
3024{
3025 struct iommu_dev_data *dev_data;
3026 struct dma_ops_domain *dma_dom;
3027 struct pci_dev *dev = NULL;
3028 u16 devid;
3029
3030 for_each_pci_dev(dev) {
3031
3032 /* Do we handle this device? */
3033 if (!check_device(&dev->dev))
3034 continue;
3035
3036 dev_data = get_dev_data(&dev->dev);
3037 if (!amd_iommu_force_isolation && dev_data->iommu_v2) {
3038 /* Make sure passthrough domain is allocated */
3039 alloc_passthrough_domain();
3040 dev_data->passthrough = true;
3041 attach_device(&dev->dev, pt_domain);
3042 pr_info("AMD-Vi: Using passthrough domain for device %s\n",
3043 dev_name(&dev->dev));
3044 }
3045
3046 /* Is there already any domain for it? */
3047 if (domain_for_device(&dev->dev))
3048 continue;
3049
3050 devid = get_device_id(&dev->dev);
3051
3052 dma_dom = dma_ops_domain_alloc();
3053 if (!dma_dom)
3054 continue;
3055 init_unity_mappings_for_device(dma_dom, devid);
3056 dma_dom->target_dev = devid;
3057
3058 attach_device(&dev->dev, &dma_dom->domain);
3059
3060 list_add_tail(&dma_dom->list, &iommu_pd_list);
3061 }
3062}
3063
3064static struct dma_map_ops amd_iommu_dma_ops = { 2850static struct dma_map_ops amd_iommu_dma_ops = {
3065 .alloc = alloc_coherent, 2851 .alloc = alloc_coherent,
3066 .free = free_coherent, 2852 .free = free_coherent,
@@ -3071,76 +2857,16 @@ static struct dma_map_ops amd_iommu_dma_ops = {
3071 .dma_supported = amd_iommu_dma_supported, 2857 .dma_supported = amd_iommu_dma_supported,
3072}; 2858};
3073 2859
3074static unsigned device_dma_ops_init(void) 2860int __init amd_iommu_init_api(void)
3075{
3076 struct iommu_dev_data *dev_data;
3077 struct pci_dev *pdev = NULL;
3078 unsigned unhandled = 0;
3079
3080 for_each_pci_dev(pdev) {
3081 if (!check_device(&pdev->dev)) {
3082
3083 iommu_ignore_device(&pdev->dev);
3084
3085 unhandled += 1;
3086 continue;
3087 }
3088
3089 dev_data = get_dev_data(&pdev->dev);
3090
3091 if (!dev_data->passthrough)
3092 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
3093 else
3094 pdev->dev.archdata.dma_ops = &nommu_dma_ops;
3095 }
3096
3097 return unhandled;
3098}
3099
3100/*
3101 * The function which clues the AMD IOMMU driver into dma_ops.
3102 */
3103
3104void __init amd_iommu_init_api(void)
3105{ 2861{
3106 bus_set_iommu(&pci_bus_type, &amd_iommu_ops); 2862 return bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
3107} 2863}
3108 2864
3109int __init amd_iommu_init_dma_ops(void) 2865int __init amd_iommu_init_dma_ops(void)
3110{ 2866{
3111 struct amd_iommu *iommu;
3112 int ret, unhandled;
3113
3114 /*
3115 * first allocate a default protection domain for every IOMMU we
3116 * found in the system. Devices not assigned to any other
3117 * protection domain will be assigned to the default one.
3118 */
3119 for_each_iommu(iommu) {
3120 iommu->default_dom = dma_ops_domain_alloc();
3121 if (iommu->default_dom == NULL)
3122 return -ENOMEM;
3123 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
3124 ret = iommu_init_unity_mappings(iommu);
3125 if (ret)
3126 goto free_domains;
3127 }
3128
3129 /*
3130 * Pre-allocate the protection domains for each device.
3131 */
3132 prealloc_protection_domains();
3133
3134 iommu_detected = 1; 2867 iommu_detected = 1;
3135 swiotlb = 0; 2868 swiotlb = 0;
3136 2869
3137 /* Make the driver finally visible to the drivers */
3138 unhandled = device_dma_ops_init();
3139 if (unhandled && max_pfn > MAX_DMA32_PFN) {
3140 /* There are unhandled devices - initialize swiotlb for them */
3141 swiotlb = 1;
3142 }
3143
3144 amd_iommu_stats_init(); 2870 amd_iommu_stats_init();
3145 2871
3146 if (amd_iommu_unmap_flush) 2872 if (amd_iommu_unmap_flush)
@@ -3149,14 +2875,6 @@ int __init amd_iommu_init_dma_ops(void)
3149 pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n"); 2875 pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n");
3150 2876
3151 return 0; 2877 return 0;
3152
3153free_domains:
3154
3155 for_each_iommu(iommu) {
3156 dma_ops_domain_free(iommu->default_dom);
3157 }
3158
3159 return ret;
3160} 2878}
3161 2879
3162/***************************************************************************** 2880/*****************************************************************************
@@ -3223,7 +2941,7 @@ out_err:
3223 return NULL; 2941 return NULL;
3224} 2942}
3225 2943
3226static int __init alloc_passthrough_domain(void) 2944static int alloc_passthrough_domain(void)
3227{ 2945{
3228 if (pt_domain != NULL) 2946 if (pt_domain != NULL)
3229 return 0; 2947 return 0;
@@ -3241,30 +2959,46 @@ static int __init alloc_passthrough_domain(void)
3241static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) 2959static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
3242{ 2960{
3243 struct protection_domain *pdomain; 2961 struct protection_domain *pdomain;
2962 struct dma_ops_domain *dma_domain;
3244 2963
3245 /* We only support unmanaged domains for now */ 2964 switch (type) {
3246 if (type != IOMMU_DOMAIN_UNMANAGED) 2965 case IOMMU_DOMAIN_UNMANAGED:
3247 return NULL; 2966 pdomain = protection_domain_alloc();
3248 2967 if (!pdomain)
3249 pdomain = protection_domain_alloc(); 2968 return NULL;
3250 if (!pdomain)
3251 goto out_free;
3252 2969
3253 pdomain->mode = PAGE_MODE_3_LEVEL; 2970 pdomain->mode = PAGE_MODE_3_LEVEL;
3254 pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); 2971 pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
3255 if (!pdomain->pt_root) 2972 if (!pdomain->pt_root) {
3256 goto out_free; 2973 protection_domain_free(pdomain);
2974 return NULL;
2975 }
3257 2976
3258 pdomain->domain.geometry.aperture_start = 0; 2977 pdomain->domain.geometry.aperture_start = 0;
3259 pdomain->domain.geometry.aperture_end = ~0ULL; 2978 pdomain->domain.geometry.aperture_end = ~0ULL;
3260 pdomain->domain.geometry.force_aperture = true; 2979 pdomain->domain.geometry.force_aperture = true;
3261 2980
3262 return &pdomain->domain; 2981 break;
2982 case IOMMU_DOMAIN_DMA:
2983 dma_domain = dma_ops_domain_alloc();
2984 if (!dma_domain) {
2985 pr_err("AMD-Vi: Failed to allocate\n");
2986 return NULL;
2987 }
2988 pdomain = &dma_domain->domain;
2989 break;
2990 case IOMMU_DOMAIN_IDENTITY:
2991 pdomain = protection_domain_alloc();
2992 if (!pdomain)
2993 return NULL;
3263 2994
3264out_free: 2995 pdomain->mode = PAGE_MODE_NONE;
3265 protection_domain_free(pdomain); 2996 break;
2997 default:
2998 return NULL;
2999 }
3266 3000
3267 return NULL; 3001 return &pdomain->domain;
3268} 3002}
3269 3003
3270static void amd_iommu_domain_free(struct iommu_domain *dom) 3004static void amd_iommu_domain_free(struct iommu_domain *dom)
@@ -3414,6 +3148,47 @@ static bool amd_iommu_capable(enum iommu_cap cap)
3414 return false; 3148 return false;
3415} 3149}
3416 3150
3151static void amd_iommu_get_dm_regions(struct device *dev,
3152 struct list_head *head)
3153{
3154 struct unity_map_entry *entry;
3155 u16 devid;
3156
3157 devid = get_device_id(dev);
3158
3159 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
3160 struct iommu_dm_region *region;
3161
3162 if (devid < entry->devid_start || devid > entry->devid_end)
3163 continue;
3164
3165 region = kzalloc(sizeof(*region), GFP_KERNEL);
3166 if (!region) {
3167 pr_err("Out of memory allocating dm-regions for %s\n",
3168 dev_name(dev));
3169 return;
3170 }
3171
3172 region->start = entry->address_start;
3173 region->length = entry->address_end - entry->address_start;
3174 if (entry->prot & IOMMU_PROT_IR)
3175 region->prot |= IOMMU_READ;
3176 if (entry->prot & IOMMU_PROT_IW)
3177 region->prot |= IOMMU_WRITE;
3178
3179 list_add_tail(&region->list, head);
3180 }
3181}
3182
3183static void amd_iommu_put_dm_regions(struct device *dev,
3184 struct list_head *head)
3185{
3186 struct iommu_dm_region *entry, *next;
3187
3188 list_for_each_entry_safe(entry, next, head, list)
3189 kfree(entry);
3190}
3191
3417static const struct iommu_ops amd_iommu_ops = { 3192static const struct iommu_ops amd_iommu_ops = {
3418 .capable = amd_iommu_capable, 3193 .capable = amd_iommu_capable,
3419 .domain_alloc = amd_iommu_domain_alloc, 3194 .domain_alloc = amd_iommu_domain_alloc,
@@ -3424,6 +3199,10 @@ static const struct iommu_ops amd_iommu_ops = {
3424 .unmap = amd_iommu_unmap, 3199 .unmap = amd_iommu_unmap,
3425 .map_sg = default_iommu_map_sg, 3200 .map_sg = default_iommu_map_sg,
3426 .iova_to_phys = amd_iommu_iova_to_phys, 3201 .iova_to_phys = amd_iommu_iova_to_phys,
3202 .add_device = amd_iommu_add_device,
3203 .remove_device = amd_iommu_remove_device,
3204 .get_dm_regions = amd_iommu_get_dm_regions,
3205 .put_dm_regions = amd_iommu_put_dm_regions,
3427 .pgsize_bitmap = AMD_IOMMU_PGSIZES, 3206 .pgsize_bitmap = AMD_IOMMU_PGSIZES,
3428}; 3207};
3429 3208
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index c17df04d7a7f..dbda9ae68c5d 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -226,6 +226,7 @@ static enum iommu_init_state init_state = IOMMU_START_STATE;
226 226
227static int amd_iommu_enable_interrupts(void); 227static int amd_iommu_enable_interrupts(void);
228static int __init iommu_go_to_state(enum iommu_init_state state); 228static int __init iommu_go_to_state(enum iommu_init_state state);
229static void init_device_table_dma(void);
229 230
230static inline void update_last_devid(u16 devid) 231static inline void update_last_devid(u16 devid)
231{ 232{
@@ -1389,9 +1390,15 @@ static int __init amd_iommu_init_pci(void)
1389 break; 1390 break;
1390 } 1391 }
1391 1392
1392 ret = amd_iommu_init_devices(); 1393 init_device_table_dma();
1394
1395 for_each_iommu(iommu)
1396 iommu_flush_all_caches(iommu);
1397
1398 ret = amd_iommu_init_api();
1393 1399
1394 print_iommu_info(); 1400 if (!ret)
1401 print_iommu_info();
1395 1402
1396 return ret; 1403 return ret;
1397} 1404}
@@ -1829,8 +1836,6 @@ static bool __init check_ioapic_information(void)
1829 1836
1830static void __init free_dma_resources(void) 1837static void __init free_dma_resources(void)
1831{ 1838{
1832 amd_iommu_uninit_devices();
1833
1834 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1839 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1835 get_order(MAX_DOMAIN_ID/8)); 1840 get_order(MAX_DOMAIN_ID/8));
1836 1841
@@ -2023,27 +2028,10 @@ static bool detect_ivrs(void)
2023 2028
2024static int amd_iommu_init_dma(void) 2029static int amd_iommu_init_dma(void)
2025{ 2030{
2026 struct amd_iommu *iommu;
2027 int ret;
2028
2029 if (iommu_pass_through) 2031 if (iommu_pass_through)
2030 ret = amd_iommu_init_passthrough(); 2032 return amd_iommu_init_passthrough();
2031 else 2033 else
2032 ret = amd_iommu_init_dma_ops(); 2034 return amd_iommu_init_dma_ops();
2033
2034 if (ret)
2035 return ret;
2036
2037 init_device_table_dma();
2038
2039 for_each_iommu(iommu)
2040 iommu_flush_all_caches(iommu);
2041
2042 amd_iommu_init_api();
2043
2044 amd_iommu_init_notifier();
2045
2046 return 0;
2047} 2035}
2048 2036
2049/**************************************************************************** 2037/****************************************************************************
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 0a21142d3639..0bd9eb374462 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -30,7 +30,7 @@ extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
30extern int amd_iommu_init_devices(void); 30extern int amd_iommu_init_devices(void);
31extern void amd_iommu_uninit_devices(void); 31extern void amd_iommu_uninit_devices(void);
32extern void amd_iommu_init_notifier(void); 32extern void amd_iommu_init_notifier(void);
33extern void amd_iommu_init_api(void); 33extern int amd_iommu_init_api(void);
34 34
35/* Needed for interrupt remapping */ 35/* Needed for interrupt remapping */
36extern int amd_iommu_prepare(void); 36extern int amd_iommu_prepare(void);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 6533e874c9d7..f65908841be0 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -447,8 +447,6 @@ struct aperture_range {
447 * Data container for a dma_ops specific protection domain 447 * Data container for a dma_ops specific protection domain
448 */ 448 */
449struct dma_ops_domain { 449struct dma_ops_domain {
450 struct list_head list;
451
452 /* generic protection domain information */ 450 /* generic protection domain information */
453 struct protection_domain domain; 451 struct protection_domain domain;
454 452
@@ -463,12 +461,6 @@ struct dma_ops_domain {
463 461
464 /* This will be set to true when TLB needs to be flushed */ 462 /* This will be set to true when TLB needs to be flushed */
465 bool need_flush; 463 bool need_flush;
466
467 /*
468 * if this is a preallocated domain, keep the device for which it was
469 * preallocated in this variable
470 */
471 u16 target_dev;
472}; 464};
473 465
474/* 466/*
@@ -553,9 +545,6 @@ struct amd_iommu {
553 /* if one, we need to send a completion wait command */ 545 /* if one, we need to send a completion wait command */
554 bool need_sync; 546 bool need_sync;
555 547
556 /* default dma_ops domain for that IOMMU */
557 struct dma_ops_domain *default_dom;
558
559 /* IOMMU sysfs device */ 548 /* IOMMU sysfs device */
560 struct device *iommu_dev; 549 struct device *iommu_dev;
561 550
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
new file mode 100644
index 000000000000..f14130121298
--- /dev/null
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -0,0 +1,2670 @@
1/*
2 * IOMMU API for ARM architected SMMUv3 implementations.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *
16 * Copyright (C) 2015 ARM Limited
17 *
18 * Author: Will Deacon <will.deacon@arm.com>
19 *
20 * This driver is powered by bad coffee and bombay mix.
21 */
22
23#include <linux/delay.h>
24#include <linux/err.h>
25#include <linux/interrupt.h>
26#include <linux/iommu.h>
27#include <linux/iopoll.h>
28#include <linux/module.h>
29#include <linux/of.h>
30#include <linux/of_address.h>
31#include <linux/pci.h>
32#include <linux/platform_device.h>
33
34#include "io-pgtable.h"
35
36/* MMIO registers */
37#define ARM_SMMU_IDR0 0x0
38#define IDR0_ST_LVL_SHIFT 27
39#define IDR0_ST_LVL_MASK 0x3
40#define IDR0_ST_LVL_2LVL (1 << IDR0_ST_LVL_SHIFT)
41#define IDR0_STALL_MODEL (3 << 24)
42#define IDR0_TTENDIAN_SHIFT 21
43#define IDR0_TTENDIAN_MASK 0x3
44#define IDR0_TTENDIAN_LE (2 << IDR0_TTENDIAN_SHIFT)
45#define IDR0_TTENDIAN_BE (3 << IDR0_TTENDIAN_SHIFT)
46#define IDR0_TTENDIAN_MIXED (0 << IDR0_TTENDIAN_SHIFT)
47#define IDR0_CD2L (1 << 19)
48#define IDR0_VMID16 (1 << 18)
49#define IDR0_PRI (1 << 16)
50#define IDR0_SEV (1 << 14)
51#define IDR0_MSI (1 << 13)
52#define IDR0_ASID16 (1 << 12)
53#define IDR0_ATS (1 << 10)
54#define IDR0_HYP (1 << 9)
55#define IDR0_COHACC (1 << 4)
56#define IDR0_TTF_SHIFT 2
57#define IDR0_TTF_MASK 0x3
58#define IDR0_TTF_AARCH64 (2 << IDR0_TTF_SHIFT)
59#define IDR0_S1P (1 << 1)
60#define IDR0_S2P (1 << 0)
61
62#define ARM_SMMU_IDR1 0x4
63#define IDR1_TABLES_PRESET (1 << 30)
64#define IDR1_QUEUES_PRESET (1 << 29)
65#define IDR1_REL (1 << 28)
66#define IDR1_CMDQ_SHIFT 21
67#define IDR1_CMDQ_MASK 0x1f
68#define IDR1_EVTQ_SHIFT 16
69#define IDR1_EVTQ_MASK 0x1f
70#define IDR1_PRIQ_SHIFT 11
71#define IDR1_PRIQ_MASK 0x1f
72#define IDR1_SSID_SHIFT 6
73#define IDR1_SSID_MASK 0x1f
74#define IDR1_SID_SHIFT 0
75#define IDR1_SID_MASK 0x3f
76
77#define ARM_SMMU_IDR5 0x14
78#define IDR5_STALL_MAX_SHIFT 16
79#define IDR5_STALL_MAX_MASK 0xffff
80#define IDR5_GRAN64K (1 << 6)
81#define IDR5_GRAN16K (1 << 5)
82#define IDR5_GRAN4K (1 << 4)
83#define IDR5_OAS_SHIFT 0
84#define IDR5_OAS_MASK 0x7
85#define IDR5_OAS_32_BIT (0 << IDR5_OAS_SHIFT)
86#define IDR5_OAS_36_BIT (1 << IDR5_OAS_SHIFT)
87#define IDR5_OAS_40_BIT (2 << IDR5_OAS_SHIFT)
88#define IDR5_OAS_42_BIT (3 << IDR5_OAS_SHIFT)
89#define IDR5_OAS_44_BIT (4 << IDR5_OAS_SHIFT)
90#define IDR5_OAS_48_BIT (5 << IDR5_OAS_SHIFT)
91
92#define ARM_SMMU_CR0 0x20
93#define CR0_CMDQEN (1 << 3)
94#define CR0_EVTQEN (1 << 2)
95#define CR0_PRIQEN (1 << 1)
96#define CR0_SMMUEN (1 << 0)
97
98#define ARM_SMMU_CR0ACK 0x24
99
100#define ARM_SMMU_CR1 0x28
101#define CR1_SH_NSH 0
102#define CR1_SH_OSH 2
103#define CR1_SH_ISH 3
104#define CR1_CACHE_NC 0
105#define CR1_CACHE_WB 1
106#define CR1_CACHE_WT 2
107#define CR1_TABLE_SH_SHIFT 10
108#define CR1_TABLE_OC_SHIFT 8
109#define CR1_TABLE_IC_SHIFT 6
110#define CR1_QUEUE_SH_SHIFT 4
111#define CR1_QUEUE_OC_SHIFT 2
112#define CR1_QUEUE_IC_SHIFT 0
113
114#define ARM_SMMU_CR2 0x2c
115#define CR2_PTM (1 << 2)
116#define CR2_RECINVSID (1 << 1)
117#define CR2_E2H (1 << 0)
118
119#define ARM_SMMU_IRQ_CTRL 0x50
120#define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
121#define IRQ_CTRL_GERROR_IRQEN (1 << 0)
122
123#define ARM_SMMU_IRQ_CTRLACK 0x54
124
125#define ARM_SMMU_GERROR 0x60
126#define GERROR_SFM_ERR (1 << 8)
127#define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
128#define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
129#define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
130#define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
131#define GERROR_PRIQ_ABT_ERR (1 << 3)
132#define GERROR_EVTQ_ABT_ERR (1 << 2)
133#define GERROR_CMDQ_ERR (1 << 0)
134#define GERROR_ERR_MASK 0xfd
135
136#define ARM_SMMU_GERRORN 0x64
137
138#define ARM_SMMU_GERROR_IRQ_CFG0 0x68
139#define ARM_SMMU_GERROR_IRQ_CFG1 0x70
140#define ARM_SMMU_GERROR_IRQ_CFG2 0x74
141
142#define ARM_SMMU_STRTAB_BASE 0x80
143#define STRTAB_BASE_RA (1UL << 62)
144#define STRTAB_BASE_ADDR_SHIFT 6
145#define STRTAB_BASE_ADDR_MASK 0x3ffffffffffUL
146
147#define ARM_SMMU_STRTAB_BASE_CFG 0x88
148#define STRTAB_BASE_CFG_LOG2SIZE_SHIFT 0
149#define STRTAB_BASE_CFG_LOG2SIZE_MASK 0x3f
150#define STRTAB_BASE_CFG_SPLIT_SHIFT 6
151#define STRTAB_BASE_CFG_SPLIT_MASK 0x1f
152#define STRTAB_BASE_CFG_FMT_SHIFT 16
153#define STRTAB_BASE_CFG_FMT_MASK 0x3
154#define STRTAB_BASE_CFG_FMT_LINEAR (0 << STRTAB_BASE_CFG_FMT_SHIFT)
155#define STRTAB_BASE_CFG_FMT_2LVL (1 << STRTAB_BASE_CFG_FMT_SHIFT)
156
157#define ARM_SMMU_CMDQ_BASE 0x90
158#define ARM_SMMU_CMDQ_PROD 0x98
159#define ARM_SMMU_CMDQ_CONS 0x9c
160
161#define ARM_SMMU_EVTQ_BASE 0xa0
162#define ARM_SMMU_EVTQ_PROD 0x100a8
163#define ARM_SMMU_EVTQ_CONS 0x100ac
164#define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
165#define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
166#define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
167
168#define ARM_SMMU_PRIQ_BASE 0xc0
169#define ARM_SMMU_PRIQ_PROD 0x100c8
170#define ARM_SMMU_PRIQ_CONS 0x100cc
171#define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
172#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
173#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
174
175/* Common MSI config fields */
176#define MSI_CFG0_SH_SHIFT 60
177#define MSI_CFG0_SH_NSH (0UL << MSI_CFG0_SH_SHIFT)
178#define MSI_CFG0_SH_OSH (2UL << MSI_CFG0_SH_SHIFT)
179#define MSI_CFG0_SH_ISH (3UL << MSI_CFG0_SH_SHIFT)
180#define MSI_CFG0_MEMATTR_SHIFT 56
181#define MSI_CFG0_MEMATTR_DEVICE_nGnRE (0x1 << MSI_CFG0_MEMATTR_SHIFT)
182#define MSI_CFG0_ADDR_SHIFT 2
183#define MSI_CFG0_ADDR_MASK 0x3fffffffffffUL
184
185#define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
186#define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
187#define Q_OVERFLOW_FLAG (1 << 31)
188#define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
189#define Q_ENT(q, p) ((q)->base + \
190 Q_IDX(q, p) * (q)->ent_dwords)
191
192#define Q_BASE_RWA (1UL << 62)
193#define Q_BASE_ADDR_SHIFT 5
194#define Q_BASE_ADDR_MASK 0xfffffffffffUL
195#define Q_BASE_LOG2SIZE_SHIFT 0
196#define Q_BASE_LOG2SIZE_MASK 0x1fUL
197
198/*
199 * Stream table.
200 *
201 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
202 * 2lvl: 8k L1 entries, 256 lazy entries per table (each table covers a PCI bus)
203 */
204#define STRTAB_L1_SZ_SHIFT 16
205#define STRTAB_SPLIT 8
206
207#define STRTAB_L1_DESC_DWORDS 1
208#define STRTAB_L1_DESC_SPAN_SHIFT 0
209#define STRTAB_L1_DESC_SPAN_MASK 0x1fUL
210#define STRTAB_L1_DESC_L2PTR_SHIFT 6
211#define STRTAB_L1_DESC_L2PTR_MASK 0x3ffffffffffUL
212
213#define STRTAB_STE_DWORDS 8
214#define STRTAB_STE_0_V (1UL << 0)
215#define STRTAB_STE_0_CFG_SHIFT 1
216#define STRTAB_STE_0_CFG_MASK 0x7UL
217#define STRTAB_STE_0_CFG_ABORT (0UL << STRTAB_STE_0_CFG_SHIFT)
218#define STRTAB_STE_0_CFG_BYPASS (4UL << STRTAB_STE_0_CFG_SHIFT)
219#define STRTAB_STE_0_CFG_S1_TRANS (5UL << STRTAB_STE_0_CFG_SHIFT)
220#define STRTAB_STE_0_CFG_S2_TRANS (6UL << STRTAB_STE_0_CFG_SHIFT)
221
222#define STRTAB_STE_0_S1FMT_SHIFT 4
223#define STRTAB_STE_0_S1FMT_LINEAR (0UL << STRTAB_STE_0_S1FMT_SHIFT)
224#define STRTAB_STE_0_S1CTXPTR_SHIFT 6
225#define STRTAB_STE_0_S1CTXPTR_MASK 0x3ffffffffffUL
226#define STRTAB_STE_0_S1CDMAX_SHIFT 59
227#define STRTAB_STE_0_S1CDMAX_MASK 0x1fUL
228
229#define STRTAB_STE_1_S1C_CACHE_NC 0UL
230#define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
231#define STRTAB_STE_1_S1C_CACHE_WT 2UL
232#define STRTAB_STE_1_S1C_CACHE_WB 3UL
233#define STRTAB_STE_1_S1C_SH_NSH 0UL
234#define STRTAB_STE_1_S1C_SH_OSH 2UL
235#define STRTAB_STE_1_S1C_SH_ISH 3UL
236#define STRTAB_STE_1_S1CIR_SHIFT 2
237#define STRTAB_STE_1_S1COR_SHIFT 4
238#define STRTAB_STE_1_S1CSH_SHIFT 6
239
240#define STRTAB_STE_1_S1STALLD (1UL << 27)
241
242#define STRTAB_STE_1_EATS_ABT 0UL
243#define STRTAB_STE_1_EATS_TRANS 1UL
244#define STRTAB_STE_1_EATS_S1CHK 2UL
245#define STRTAB_STE_1_EATS_SHIFT 28
246
247#define STRTAB_STE_1_STRW_NSEL1 0UL
248#define STRTAB_STE_1_STRW_EL2 2UL
249#define STRTAB_STE_1_STRW_SHIFT 30
250
251#define STRTAB_STE_2_S2VMID_SHIFT 0
252#define STRTAB_STE_2_S2VMID_MASK 0xffffUL
253#define STRTAB_STE_2_VTCR_SHIFT 32
254#define STRTAB_STE_2_VTCR_MASK 0x7ffffUL
255#define STRTAB_STE_2_S2AA64 (1UL << 51)
256#define STRTAB_STE_2_S2ENDI (1UL << 52)
257#define STRTAB_STE_2_S2PTW (1UL << 54)
258#define STRTAB_STE_2_S2R (1UL << 58)
259
260#define STRTAB_STE_3_S2TTB_SHIFT 4
261#define STRTAB_STE_3_S2TTB_MASK 0xfffffffffffUL
262
263/* Context descriptor (stage-1 only) */
264#define CTXDESC_CD_DWORDS 8
265#define CTXDESC_CD_0_TCR_T0SZ_SHIFT 0
266#define ARM64_TCR_T0SZ_SHIFT 0
267#define ARM64_TCR_T0SZ_MASK 0x1fUL
268#define CTXDESC_CD_0_TCR_TG0_SHIFT 6
269#define ARM64_TCR_TG0_SHIFT 14
270#define ARM64_TCR_TG0_MASK 0x3UL
271#define CTXDESC_CD_0_TCR_IRGN0_SHIFT 8
272#define ARM64_TCR_IRGN0_SHIFT 24
273#define ARM64_TCR_IRGN0_MASK 0x3UL
274#define CTXDESC_CD_0_TCR_ORGN0_SHIFT 10
275#define ARM64_TCR_ORGN0_SHIFT 26
276#define ARM64_TCR_ORGN0_MASK 0x3UL
277#define CTXDESC_CD_0_TCR_SH0_SHIFT 12
278#define ARM64_TCR_SH0_SHIFT 12
279#define ARM64_TCR_SH0_MASK 0x3UL
280#define CTXDESC_CD_0_TCR_EPD0_SHIFT 14
281#define ARM64_TCR_EPD0_SHIFT 7
282#define ARM64_TCR_EPD0_MASK 0x1UL
283#define CTXDESC_CD_0_TCR_EPD1_SHIFT 30
284#define ARM64_TCR_EPD1_SHIFT 23
285#define ARM64_TCR_EPD1_MASK 0x1UL
286
287#define CTXDESC_CD_0_ENDI (1UL << 15)
288#define CTXDESC_CD_0_V (1UL << 31)
289
290#define CTXDESC_CD_0_TCR_IPS_SHIFT 32
291#define ARM64_TCR_IPS_SHIFT 32
292#define ARM64_TCR_IPS_MASK 0x7UL
293#define CTXDESC_CD_0_TCR_TBI0_SHIFT 38
294#define ARM64_TCR_TBI0_SHIFT 37
295#define ARM64_TCR_TBI0_MASK 0x1UL
296
297#define CTXDESC_CD_0_AA64 (1UL << 41)
298#define CTXDESC_CD_0_R (1UL << 45)
299#define CTXDESC_CD_0_A (1UL << 46)
300#define CTXDESC_CD_0_ASET_SHIFT 47
301#define CTXDESC_CD_0_ASET_SHARED (0UL << CTXDESC_CD_0_ASET_SHIFT)
302#define CTXDESC_CD_0_ASET_PRIVATE (1UL << CTXDESC_CD_0_ASET_SHIFT)
303#define CTXDESC_CD_0_ASID_SHIFT 48
304#define CTXDESC_CD_0_ASID_MASK 0xffffUL
305
306#define CTXDESC_CD_1_TTB0_SHIFT 4
307#define CTXDESC_CD_1_TTB0_MASK 0xfffffffffffUL
308
309#define CTXDESC_CD_3_MAIR_SHIFT 0
310
311/* Convert between AArch64 (CPU) TCR format and SMMU CD format */
312#define ARM_SMMU_TCR2CD(tcr, fld) \
313 (((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK) \
314 << CTXDESC_CD_0_TCR_##fld##_SHIFT)
315
316/* Command queue */
317#define CMDQ_ENT_DWORDS 2
318#define CMDQ_MAX_SZ_SHIFT 8
319
320#define CMDQ_ERR_SHIFT 24
321#define CMDQ_ERR_MASK 0x7f
322#define CMDQ_ERR_CERROR_NONE_IDX 0
323#define CMDQ_ERR_CERROR_ILL_IDX 1
324#define CMDQ_ERR_CERROR_ABT_IDX 2
325
326#define CMDQ_0_OP_SHIFT 0
327#define CMDQ_0_OP_MASK 0xffUL
328#define CMDQ_0_SSV (1UL << 11)
329
330#define CMDQ_PREFETCH_0_SID_SHIFT 32
331#define CMDQ_PREFETCH_1_SIZE_SHIFT 0
332#define CMDQ_PREFETCH_1_ADDR_MASK ~0xfffUL
333
334#define CMDQ_CFGI_0_SID_SHIFT 32
335#define CMDQ_CFGI_0_SID_MASK 0xffffffffUL
336#define CMDQ_CFGI_1_LEAF (1UL << 0)
337#define CMDQ_CFGI_1_RANGE_SHIFT 0
338#define CMDQ_CFGI_1_RANGE_MASK 0x1fUL
339
340#define CMDQ_TLBI_0_VMID_SHIFT 32
341#define CMDQ_TLBI_0_ASID_SHIFT 48
342#define CMDQ_TLBI_1_LEAF (1UL << 0)
343#define CMDQ_TLBI_1_ADDR_MASK ~0xfffUL
344
345#define CMDQ_PRI_0_SSID_SHIFT 12
346#define CMDQ_PRI_0_SSID_MASK 0xfffffUL
347#define CMDQ_PRI_0_SID_SHIFT 32
348#define CMDQ_PRI_0_SID_MASK 0xffffffffUL
349#define CMDQ_PRI_1_GRPID_SHIFT 0
350#define CMDQ_PRI_1_GRPID_MASK 0x1ffUL
351#define CMDQ_PRI_1_RESP_SHIFT 12
352#define CMDQ_PRI_1_RESP_DENY (0UL << CMDQ_PRI_1_RESP_SHIFT)
353#define CMDQ_PRI_1_RESP_FAIL (1UL << CMDQ_PRI_1_RESP_SHIFT)
354#define CMDQ_PRI_1_RESP_SUCC (2UL << CMDQ_PRI_1_RESP_SHIFT)
355
356#define CMDQ_SYNC_0_CS_SHIFT 12
357#define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT)
358#define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT)
359
360/* Event queue */
361#define EVTQ_ENT_DWORDS 4
362#define EVTQ_MAX_SZ_SHIFT 7
363
364#define EVTQ_0_ID_SHIFT 0
365#define EVTQ_0_ID_MASK 0xffUL
366
367/* PRI queue */
368#define PRIQ_ENT_DWORDS 2
369#define PRIQ_MAX_SZ_SHIFT 8
370
371#define PRIQ_0_SID_SHIFT 0
372#define PRIQ_0_SID_MASK 0xffffffffUL
373#define PRIQ_0_SSID_SHIFT 32
374#define PRIQ_0_SSID_MASK 0xfffffUL
375#define PRIQ_0_OF (1UL << 57)
376#define PRIQ_0_PERM_PRIV (1UL << 58)
377#define PRIQ_0_PERM_EXEC (1UL << 59)
378#define PRIQ_0_PERM_READ (1UL << 60)
379#define PRIQ_0_PERM_WRITE (1UL << 61)
380#define PRIQ_0_PRG_LAST (1UL << 62)
381#define PRIQ_0_SSID_V (1UL << 63)
382
383#define PRIQ_1_PRG_IDX_SHIFT 0
384#define PRIQ_1_PRG_IDX_MASK 0x1ffUL
385#define PRIQ_1_ADDR_SHIFT 12
386#define PRIQ_1_ADDR_MASK 0xfffffffffffffUL
387
388/* High-level queue structures */
389#define ARM_SMMU_POLL_TIMEOUT_US 100
390
391static bool disable_bypass;
392module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
393MODULE_PARM_DESC(disable_bypass,
394 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
395
396enum pri_resp {
397 PRI_RESP_DENY,
398 PRI_RESP_FAIL,
399 PRI_RESP_SUCC,
400};
401
402struct arm_smmu_cmdq_ent {
403 /* Common fields */
404 u8 opcode;
405 bool substream_valid;
406
407 /* Command-specific fields */
408 union {
409 #define CMDQ_OP_PREFETCH_CFG 0x1
410 struct {
411 u32 sid;
412 u8 size;
413 u64 addr;
414 } prefetch;
415
416 #define CMDQ_OP_CFGI_STE 0x3
417 #define CMDQ_OP_CFGI_ALL 0x4
418 struct {
419 u32 sid;
420 union {
421 bool leaf;
422 u8 span;
423 };
424 } cfgi;
425
426 #define CMDQ_OP_TLBI_NH_ASID 0x11
427 #define CMDQ_OP_TLBI_NH_VA 0x12
428 #define CMDQ_OP_TLBI_EL2_ALL 0x20
429 #define CMDQ_OP_TLBI_S12_VMALL 0x28
430 #define CMDQ_OP_TLBI_S2_IPA 0x2a
431 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
432 struct {
433 u16 asid;
434 u16 vmid;
435 bool leaf;
436 u64 addr;
437 } tlbi;
438
439 #define CMDQ_OP_PRI_RESP 0x41
440 struct {
441 u32 sid;
442 u32 ssid;
443 u16 grpid;
444 enum pri_resp resp;
445 } pri;
446
447 #define CMDQ_OP_CMD_SYNC 0x46
448 };
449};
450
451struct arm_smmu_queue {
452 int irq; /* Wired interrupt */
453
454 __le64 *base;
455 dma_addr_t base_dma;
456 u64 q_base;
457
458 size_t ent_dwords;
459 u32 max_n_shift;
460 u32 prod;
461 u32 cons;
462
463 u32 __iomem *prod_reg;
464 u32 __iomem *cons_reg;
465};
466
467struct arm_smmu_cmdq {
468 struct arm_smmu_queue q;
469 spinlock_t lock;
470};
471
472struct arm_smmu_evtq {
473 struct arm_smmu_queue q;
474 u32 max_stalls;
475};
476
477struct arm_smmu_priq {
478 struct arm_smmu_queue q;
479};
480
481/* High-level stream table and context descriptor structures */
482struct arm_smmu_strtab_l1_desc {
483 u8 span;
484
485 __le64 *l2ptr;
486 dma_addr_t l2ptr_dma;
487};
488
489struct arm_smmu_s1_cfg {
490 __le64 *cdptr;
491 dma_addr_t cdptr_dma;
492
493 struct arm_smmu_ctx_desc {
494 u16 asid;
495 u64 ttbr;
496 u64 tcr;
497 u64 mair;
498 } cd;
499};
500
501struct arm_smmu_s2_cfg {
502 u16 vmid;
503 u64 vttbr;
504 u64 vtcr;
505};
506
507struct arm_smmu_strtab_ent {
508 bool valid;
509
510 bool bypass; /* Overrides s1/s2 config */
511 struct arm_smmu_s1_cfg *s1_cfg;
512 struct arm_smmu_s2_cfg *s2_cfg;
513};
514
515struct arm_smmu_strtab_cfg {
516 __le64 *strtab;
517 dma_addr_t strtab_dma;
518 struct arm_smmu_strtab_l1_desc *l1_desc;
519 unsigned int num_l1_ents;
520
521 u64 strtab_base;
522 u32 strtab_base_cfg;
523};
524
525/* An SMMUv3 instance */
526struct arm_smmu_device {
527 struct device *dev;
528 void __iomem *base;
529
530#define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
531#define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
532#define ARM_SMMU_FEAT_TT_LE (1 << 2)
533#define ARM_SMMU_FEAT_TT_BE (1 << 3)
534#define ARM_SMMU_FEAT_PRI (1 << 4)
535#define ARM_SMMU_FEAT_ATS (1 << 5)
536#define ARM_SMMU_FEAT_SEV (1 << 6)
537#define ARM_SMMU_FEAT_MSI (1 << 7)
538#define ARM_SMMU_FEAT_COHERENCY (1 << 8)
539#define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
540#define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
541#define ARM_SMMU_FEAT_STALLS (1 << 11)
542#define ARM_SMMU_FEAT_HYP (1 << 12)
543 u32 features;
544
545 struct arm_smmu_cmdq cmdq;
546 struct arm_smmu_evtq evtq;
547 struct arm_smmu_priq priq;
548
549 int gerr_irq;
550
551 unsigned long ias; /* IPA */
552 unsigned long oas; /* PA */
553
554#define ARM_SMMU_MAX_ASIDS (1 << 16)
555 unsigned int asid_bits;
556 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
557
558#define ARM_SMMU_MAX_VMIDS (1 << 16)
559 unsigned int vmid_bits;
560 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
561
562 unsigned int ssid_bits;
563 unsigned int sid_bits;
564
565 struct arm_smmu_strtab_cfg strtab_cfg;
566 struct list_head list;
567};
568
569/* SMMU private data for an IOMMU group */
570struct arm_smmu_group {
571 struct arm_smmu_device *smmu;
572 struct arm_smmu_domain *domain;
573 int num_sids;
574 u32 *sids;
575 struct arm_smmu_strtab_ent ste;
576};
577
578/* SMMU private data for an IOMMU domain */
579enum arm_smmu_domain_stage {
580 ARM_SMMU_DOMAIN_S1 = 0,
581 ARM_SMMU_DOMAIN_S2,
582 ARM_SMMU_DOMAIN_NESTED,
583};
584
585struct arm_smmu_domain {
586 struct arm_smmu_device *smmu;
587 struct mutex init_mutex; /* Protects smmu pointer */
588
589 struct io_pgtable_ops *pgtbl_ops;
590 spinlock_t pgtbl_lock;
591
592 enum arm_smmu_domain_stage stage;
593 union {
594 struct arm_smmu_s1_cfg s1_cfg;
595 struct arm_smmu_s2_cfg s2_cfg;
596 };
597
598 struct iommu_domain domain;
599};
600
601/* Our list of SMMU instances */
602static DEFINE_SPINLOCK(arm_smmu_devices_lock);
603static LIST_HEAD(arm_smmu_devices);
604
605static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
606{
607 return container_of(dom, struct arm_smmu_domain, domain);
608}
609
610/* Low-level queue manipulation functions */
611static bool queue_full(struct arm_smmu_queue *q)
612{
613 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
614 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
615}
616
617static bool queue_empty(struct arm_smmu_queue *q)
618{
619 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
620 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
621}
622
623static void queue_sync_cons(struct arm_smmu_queue *q)
624{
625 q->cons = readl_relaxed(q->cons_reg);
626}
627
628static void queue_inc_cons(struct arm_smmu_queue *q)
629{
630 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
631
632 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
633 writel(q->cons, q->cons_reg);
634}
635
636static int queue_sync_prod(struct arm_smmu_queue *q)
637{
638 int ret = 0;
639 u32 prod = readl_relaxed(q->prod_reg);
640
641 if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
642 ret = -EOVERFLOW;
643
644 q->prod = prod;
645 return ret;
646}
647
648static void queue_inc_prod(struct arm_smmu_queue *q)
649{
650 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
651
652 q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
653 writel(q->prod, q->prod_reg);
654}
655
656static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
657{
658 if (Q_WRP(q, q->cons) == Q_WRP(q, until))
659 return Q_IDX(q, q->cons) < Q_IDX(q, until);
660
661 return Q_IDX(q, q->cons) >= Q_IDX(q, until);
662}
663
664static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
665{
666 ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
667
668 while (queue_sync_cons(q), __queue_cons_before(q, until)) {
669 if (ktime_compare(ktime_get(), timeout) > 0)
670 return -ETIMEDOUT;
671
672 if (wfe) {
673 wfe();
674 } else {
675 cpu_relax();
676 udelay(1);
677 }
678 }
679
680 return 0;
681}
682
683static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
684{
685 int i;
686
687 for (i = 0; i < n_dwords; ++i)
688 *dst++ = cpu_to_le64(*src++);
689}
690
691static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
692{
693 if (queue_full(q))
694 return -ENOSPC;
695
696 queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
697 queue_inc_prod(q);
698 return 0;
699}
700
701static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
702{
703 int i;
704
705 for (i = 0; i < n_dwords; ++i)
706 *dst++ = le64_to_cpu(*src++);
707}
708
709static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
710{
711 if (queue_empty(q))
712 return -EAGAIN;
713
714 queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
715 queue_inc_cons(q);
716 return 0;
717}
718
719/* High-level queue accessors */
720static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
721{
722 memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
723 cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
724
725 switch (ent->opcode) {
726 case CMDQ_OP_TLBI_EL2_ALL:
727 case CMDQ_OP_TLBI_NSNH_ALL:
728 break;
729 case CMDQ_OP_PREFETCH_CFG:
730 cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
731 cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
732 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
733 break;
734 case CMDQ_OP_CFGI_STE:
735 cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
736 cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
737 break;
738 case CMDQ_OP_CFGI_ALL:
739 /* Cover the entire SID range */
740 cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
741 break;
742 case CMDQ_OP_TLBI_NH_VA:
743 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
744 /* Fallthrough */
745 case CMDQ_OP_TLBI_S2_IPA:
746 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
747 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
748 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_ADDR_MASK;
749 break;
750 case CMDQ_OP_TLBI_NH_ASID:
751 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
752 /* Fallthrough */
753 case CMDQ_OP_TLBI_S12_VMALL:
754 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
755 break;
756 case CMDQ_OP_PRI_RESP:
757 cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
758 cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
759 cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
760 cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
761 switch (ent->pri.resp) {
762 case PRI_RESP_DENY:
763 cmd[1] |= CMDQ_PRI_1_RESP_DENY;
764 break;
765 case PRI_RESP_FAIL:
766 cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
767 break;
768 case PRI_RESP_SUCC:
769 cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
770 break;
771 default:
772 return -EINVAL;
773 }
774 break;
775 case CMDQ_OP_CMD_SYNC:
776 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
777 break;
778 default:
779 return -ENOENT;
780 }
781
782 return 0;
783}
784
785static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
786{
787 static const char *cerror_str[] = {
788 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
789 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
790 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
791 };
792
793 int i;
794 u64 cmd[CMDQ_ENT_DWORDS];
795 struct arm_smmu_queue *q = &smmu->cmdq.q;
796 u32 cons = readl_relaxed(q->cons_reg);
797 u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
798 struct arm_smmu_cmdq_ent cmd_sync = {
799 .opcode = CMDQ_OP_CMD_SYNC,
800 };
801
802 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
803 cerror_str[idx]);
804
805 switch (idx) {
806 case CMDQ_ERR_CERROR_ILL_IDX:
807 break;
808 case CMDQ_ERR_CERROR_ABT_IDX:
809 dev_err(smmu->dev, "retrying command fetch\n");
810 case CMDQ_ERR_CERROR_NONE_IDX:
811 return;
812 }
813
814 /*
815 * We may have concurrent producers, so we need to be careful
816 * not to touch any of the shadow cmdq state.
817 */
818 queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
819 dev_err(smmu->dev, "skipping command in error state:\n");
820 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
821 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
822
823 /* Convert the erroneous command into a CMD_SYNC */
824 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
825 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
826 return;
827 }
828
829 queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
830}
831
832static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
833 struct arm_smmu_cmdq_ent *ent)
834{
835 u32 until;
836 u64 cmd[CMDQ_ENT_DWORDS];
837 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
838 struct arm_smmu_queue *q = &smmu->cmdq.q;
839
840 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
841 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
842 ent->opcode);
843 return;
844 }
845
846 spin_lock(&smmu->cmdq.lock);
847 while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
848 /*
849 * Keep the queue locked, otherwise the producer could wrap
850 * twice and we could see a future consumer pointer that looks
851 * like it's behind us.
852 */
853 if (queue_poll_cons(q, until, wfe))
854 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
855 }
856
857 if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
858 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
859 spin_unlock(&smmu->cmdq.lock);
860}
861
862/* Context descriptor manipulation functions */
863static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
864{
865 u64 val = 0;
866
867 /* Repack the TCR. Just care about TTBR0 for now */
868 val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
869 val |= ARM_SMMU_TCR2CD(tcr, TG0);
870 val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
871 val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
872 val |= ARM_SMMU_TCR2CD(tcr, SH0);
873 val |= ARM_SMMU_TCR2CD(tcr, EPD0);
874 val |= ARM_SMMU_TCR2CD(tcr, EPD1);
875 val |= ARM_SMMU_TCR2CD(tcr, IPS);
876 val |= ARM_SMMU_TCR2CD(tcr, TBI0);
877
878 return val;
879}
880
881static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
882 struct arm_smmu_s1_cfg *cfg)
883{
884 u64 val;
885
886 /*
887 * We don't need to issue any invalidation here, as we'll invalidate
888 * the STE when installing the new entry anyway.
889 */
890 val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
891#ifdef __BIG_ENDIAN
892 CTXDESC_CD_0_ENDI |
893#endif
894 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
895 CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
896 CTXDESC_CD_0_V;
897 cfg->cdptr[0] = cpu_to_le64(val);
898
899 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
900 cfg->cdptr[1] = cpu_to_le64(val);
901
902 cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
903}
904
905/* Stream table manipulation functions */
906static void
907arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
908{
909 u64 val = 0;
910
911 val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
912 << STRTAB_L1_DESC_SPAN_SHIFT;
913 val |= desc->l2ptr_dma &
914 STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
915
916 *dst = cpu_to_le64(val);
917}
918
919static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
920{
921 struct arm_smmu_cmdq_ent cmd = {
922 .opcode = CMDQ_OP_CFGI_STE,
923 .cfgi = {
924 .sid = sid,
925 .leaf = true,
926 },
927 };
928
929 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
930 cmd.opcode = CMDQ_OP_CMD_SYNC;
931 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
932}
933
934static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
935 __le64 *dst, struct arm_smmu_strtab_ent *ste)
936{
937 /*
938 * This is hideously complicated, but we only really care about
939 * three cases at the moment:
940 *
941 * 1. Invalid (all zero) -> bypass (init)
942 * 2. Bypass -> translation (attach)
943 * 3. Translation -> bypass (detach)
944 *
945 * Given that we can't update the STE atomically and the SMMU
946 * doesn't read the thing in a defined order, that leaves us
947 * with the following maintenance requirements:
948 *
949 * 1. Update Config, return (init time STEs aren't live)
950 * 2. Write everything apart from dword 0, sync, write dword 0, sync
951 * 3. Update Config, sync
952 */
953 u64 val = le64_to_cpu(dst[0]);
954 bool ste_live = false;
955 struct arm_smmu_cmdq_ent prefetch_cmd = {
956 .opcode = CMDQ_OP_PREFETCH_CFG,
957 .prefetch = {
958 .sid = sid,
959 },
960 };
961
962 if (val & STRTAB_STE_0_V) {
963 u64 cfg;
964
965 cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
966 switch (cfg) {
967 case STRTAB_STE_0_CFG_BYPASS:
968 break;
969 case STRTAB_STE_0_CFG_S1_TRANS:
970 case STRTAB_STE_0_CFG_S2_TRANS:
971 ste_live = true;
972 break;
973 default:
974 BUG(); /* STE corruption */
975 }
976 }
977
978 /* Nuke the existing Config, as we're going to rewrite it */
979 val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
980
981 if (ste->valid)
982 val |= STRTAB_STE_0_V;
983 else
984 val &= ~STRTAB_STE_0_V;
985
986 if (ste->bypass) {
987 val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
988 : STRTAB_STE_0_CFG_BYPASS;
989 dst[0] = cpu_to_le64(val);
990 dst[2] = 0; /* Nuke the VMID */
991 if (ste_live)
992 arm_smmu_sync_ste_for_sid(smmu, sid);
993 return;
994 }
995
996 if (ste->s1_cfg) {
997 BUG_ON(ste_live);
998 dst[1] = cpu_to_le64(
999 STRTAB_STE_1_S1C_CACHE_WBRA
1000 << STRTAB_STE_1_S1CIR_SHIFT |
1001 STRTAB_STE_1_S1C_CACHE_WBRA
1002 << STRTAB_STE_1_S1COR_SHIFT |
1003 STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1004 STRTAB_STE_1_S1STALLD |
1005#ifdef CONFIG_PCI_ATS
1006 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1007#endif
1008 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1009
1010 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1011 << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1012 STRTAB_STE_0_CFG_S1_TRANS;
1013
1014 }
1015
1016 if (ste->s2_cfg) {
1017 BUG_ON(ste_live);
1018 dst[2] = cpu_to_le64(
1019 ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1020 (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1021 << STRTAB_STE_2_VTCR_SHIFT |
1022#ifdef __BIG_ENDIAN
1023 STRTAB_STE_2_S2ENDI |
1024#endif
1025 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1026 STRTAB_STE_2_S2R);
1027
1028 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1029 STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1030
1031 val |= STRTAB_STE_0_CFG_S2_TRANS;
1032 }
1033
1034 arm_smmu_sync_ste_for_sid(smmu, sid);
1035 dst[0] = cpu_to_le64(val);
1036 arm_smmu_sync_ste_for_sid(smmu, sid);
1037
1038 /* It's likely that we'll want to use the new STE soon */
1039 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1040}
1041
1042static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1043{
1044 unsigned int i;
1045 struct arm_smmu_strtab_ent ste = {
1046 .valid = true,
1047 .bypass = true,
1048 };
1049
1050 for (i = 0; i < nent; ++i) {
1051 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1052 strtab += STRTAB_STE_DWORDS;
1053 }
1054}
1055
1056static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1057{
1058 size_t size;
1059 void *strtab;
1060 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1061 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1062
1063 if (desc->l2ptr)
1064 return 0;
1065
1066 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1067 strtab = &cfg->strtab[sid >> STRTAB_SPLIT << STRTAB_L1_DESC_DWORDS];
1068
1069 desc->span = STRTAB_SPLIT + 1;
1070 desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1071 GFP_KERNEL);
1072 if (!desc->l2ptr) {
1073 dev_err(smmu->dev,
1074 "failed to allocate l2 stream table for SID %u\n",
1075 sid);
1076 return -ENOMEM;
1077 }
1078
1079 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1080 arm_smmu_write_strtab_l1_desc(strtab, desc);
1081 return 0;
1082}
1083
1084/* IRQ and event handlers */
1085static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1086{
1087 int i;
1088 struct arm_smmu_device *smmu = dev;
1089 struct arm_smmu_queue *q = &smmu->evtq.q;
1090 u64 evt[EVTQ_ENT_DWORDS];
1091
1092 while (!queue_remove_raw(q, evt)) {
1093 u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1094
1095 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1096 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1097 dev_info(smmu->dev, "\t0x%016llx\n",
1098 (unsigned long long)evt[i]);
1099 }
1100
1101 /* Sync our overflow flag, as we believe we're up to speed */
1102 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1103 return IRQ_HANDLED;
1104}
1105
1106static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1107{
1108 irqreturn_t ret = IRQ_WAKE_THREAD;
1109 struct arm_smmu_device *smmu = dev;
1110 struct arm_smmu_queue *q = &smmu->evtq.q;
1111
1112 /*
1113 * Not much we can do on overflow, so scream and pretend we're
1114 * trying harder.
1115 */
1116 if (queue_sync_prod(q) == -EOVERFLOW)
1117 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1118 else if (queue_empty(q))
1119 ret = IRQ_NONE;
1120
1121 return ret;
1122}
1123
1124static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1125{
1126 struct arm_smmu_device *smmu = dev;
1127 struct arm_smmu_queue *q = &smmu->priq.q;
1128 u64 evt[PRIQ_ENT_DWORDS];
1129
1130 while (!queue_remove_raw(q, evt)) {
1131 u32 sid, ssid;
1132 u16 grpid;
1133 bool ssv, last;
1134
1135 sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1136 ssv = evt[0] & PRIQ_0_SSID_V;
1137 ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1138 last = evt[0] & PRIQ_0_PRG_LAST;
1139 grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1140
1141 dev_info(smmu->dev, "unexpected PRI request received:\n");
1142 dev_info(smmu->dev,
1143 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1144 sid, ssid, grpid, last ? "L" : "",
1145 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1146 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1147 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1148 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1149 evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1150
1151 if (last) {
1152 struct arm_smmu_cmdq_ent cmd = {
1153 .opcode = CMDQ_OP_PRI_RESP,
1154 .substream_valid = ssv,
1155 .pri = {
1156 .sid = sid,
1157 .ssid = ssid,
1158 .grpid = grpid,
1159 .resp = PRI_RESP_DENY,
1160 },
1161 };
1162
1163 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1164 }
1165 }
1166
1167 /* Sync our overflow flag, as we believe we're up to speed */
1168 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1169 return IRQ_HANDLED;
1170}
1171
1172static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1173{
1174 irqreturn_t ret = IRQ_WAKE_THREAD;
1175 struct arm_smmu_device *smmu = dev;
1176 struct arm_smmu_queue *q = &smmu->priq.q;
1177
1178 /* PRIQ overflow indicates a programming error */
1179 if (queue_sync_prod(q) == -EOVERFLOW)
1180 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1181 else if (queue_empty(q))
1182 ret = IRQ_NONE;
1183
1184 return ret;
1185}
1186
1187static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1188{
1189 /* We don't actually use CMD_SYNC interrupts for anything */
1190 return IRQ_HANDLED;
1191}
1192
1193static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1194
1195static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1196{
1197 u32 gerror, gerrorn;
1198 struct arm_smmu_device *smmu = dev;
1199
1200 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1201 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1202
1203 gerror ^= gerrorn;
1204 if (!(gerror & GERROR_ERR_MASK))
1205 return IRQ_NONE; /* No errors pending */
1206
1207 dev_warn(smmu->dev,
1208 "unexpected global error reported (0x%08x), this could be serious\n",
1209 gerror);
1210
1211 if (gerror & GERROR_SFM_ERR) {
1212 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1213 arm_smmu_device_disable(smmu);
1214 }
1215
1216 if (gerror & GERROR_MSI_GERROR_ABT_ERR)
1217 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1218
1219 if (gerror & GERROR_MSI_PRIQ_ABT_ERR) {
1220 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1221 arm_smmu_priq_handler(irq, smmu->dev);
1222 }
1223
1224 if (gerror & GERROR_MSI_EVTQ_ABT_ERR) {
1225 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1226 arm_smmu_evtq_handler(irq, smmu->dev);
1227 }
1228
1229 if (gerror & GERROR_MSI_CMDQ_ABT_ERR) {
1230 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1231 arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1232 }
1233
1234 if (gerror & GERROR_PRIQ_ABT_ERR)
1235 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1236
1237 if (gerror & GERROR_EVTQ_ABT_ERR)
1238 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1239
1240 if (gerror & GERROR_CMDQ_ERR)
1241 arm_smmu_cmdq_skip_err(smmu);
1242
1243 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1244 return IRQ_HANDLED;
1245}
1246
1247/* IO_PGTABLE API */
1248static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1249{
1250 struct arm_smmu_cmdq_ent cmd;
1251
1252 cmd.opcode = CMDQ_OP_CMD_SYNC;
1253 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1254}
1255
1256static void arm_smmu_tlb_sync(void *cookie)
1257{
1258 struct arm_smmu_domain *smmu_domain = cookie;
1259 __arm_smmu_tlb_sync(smmu_domain->smmu);
1260}
1261
1262static void arm_smmu_tlb_inv_context(void *cookie)
1263{
1264 struct arm_smmu_domain *smmu_domain = cookie;
1265 struct arm_smmu_device *smmu = smmu_domain->smmu;
1266 struct arm_smmu_cmdq_ent cmd;
1267
1268 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1269 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
1270 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1271 cmd.tlbi.vmid = 0;
1272 } else {
1273 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1274 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1275 }
1276
1277 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1278 __arm_smmu_tlb_sync(smmu);
1279}
1280
1281static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1282 bool leaf, void *cookie)
1283{
1284 struct arm_smmu_domain *smmu_domain = cookie;
1285 struct arm_smmu_device *smmu = smmu_domain->smmu;
1286 struct arm_smmu_cmdq_ent cmd = {
1287 .tlbi = {
1288 .leaf = leaf,
1289 .addr = iova,
1290 },
1291 };
1292
1293 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1294 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1295 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1296 } else {
1297 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1298 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1299 }
1300
1301 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1302}
1303
1304static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
1305{
1306 struct arm_smmu_domain *smmu_domain = cookie;
1307 struct arm_smmu_device *smmu = smmu_domain->smmu;
1308 unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
1309
1310 if (smmu->features & ARM_SMMU_FEAT_COHERENCY) {
1311 dsb(ishst);
1312 } else {
1313 dma_addr_t dma_addr;
1314 struct device *dev = smmu->dev;
1315
1316 dma_addr = dma_map_page(dev, virt_to_page(addr), offset, size,
1317 DMA_TO_DEVICE);
1318
1319 if (dma_mapping_error(dev, dma_addr))
1320 dev_err(dev, "failed to flush pgtable at %p\n", addr);
1321 else
1322 dma_unmap_page(dev, dma_addr, size, DMA_TO_DEVICE);
1323 }
1324}
1325
1326static struct iommu_gather_ops arm_smmu_gather_ops = {
1327 .tlb_flush_all = arm_smmu_tlb_inv_context,
1328 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
1329 .tlb_sync = arm_smmu_tlb_sync,
1330 .flush_pgtable = arm_smmu_flush_pgtable,
1331};
1332
1333/* IOMMU API */
1334static bool arm_smmu_capable(enum iommu_cap cap)
1335{
1336 switch (cap) {
1337 case IOMMU_CAP_CACHE_COHERENCY:
1338 return true;
1339 case IOMMU_CAP_INTR_REMAP:
1340 return true; /* MSIs are just memory writes */
1341 case IOMMU_CAP_NOEXEC:
1342 return true;
1343 default:
1344 return false;
1345 }
1346}
1347
1348static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1349{
1350 struct arm_smmu_domain *smmu_domain;
1351
1352 if (type != IOMMU_DOMAIN_UNMANAGED)
1353 return NULL;
1354
1355 /*
1356 * Allocate the domain and initialise some of its data structures.
1357 * We can't really do anything meaningful until we've added a
1358 * master.
1359 */
1360 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1361 if (!smmu_domain)
1362 return NULL;
1363
1364 mutex_init(&smmu_domain->init_mutex);
1365 spin_lock_init(&smmu_domain->pgtbl_lock);
1366 return &smmu_domain->domain;
1367}
1368
1369static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1370{
1371 int idx, size = 1 << span;
1372
1373 do {
1374 idx = find_first_zero_bit(map, size);
1375 if (idx == size)
1376 return -ENOSPC;
1377 } while (test_and_set_bit(idx, map));
1378
1379 return idx;
1380}
1381
1382static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1383{
1384 clear_bit(idx, map);
1385}
1386
1387static void arm_smmu_domain_free(struct iommu_domain *domain)
1388{
1389 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1390 struct arm_smmu_device *smmu = smmu_domain->smmu;
1391
1392 if (smmu_domain->pgtbl_ops)
1393 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1394
1395 /* Free the CD and ASID, if we allocated them */
1396 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1397 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1398
1399 if (cfg->cdptr) {
1400 dma_free_coherent(smmu_domain->smmu->dev,
1401 CTXDESC_CD_DWORDS << 3,
1402 cfg->cdptr,
1403 cfg->cdptr_dma);
1404
1405 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1406 }
1407 } else {
1408 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1409 if (cfg->vmid)
1410 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1411 }
1412
1413 kfree(smmu_domain);
1414}
1415
1416static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1417 struct io_pgtable_cfg *pgtbl_cfg)
1418{
1419 int ret;
1420 u16 asid;
1421 struct arm_smmu_device *smmu = smmu_domain->smmu;
1422 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1423
1424 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1425 if (IS_ERR_VALUE(asid))
1426 return asid;
1427
1428 cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1429 &cfg->cdptr_dma, GFP_KERNEL);
1430 if (!cfg->cdptr) {
1431 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1432 goto out_free_asid;
1433 }
1434
1435 cfg->cd.asid = asid;
1436 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1437 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1438 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1439 return 0;
1440
1441out_free_asid:
1442 arm_smmu_bitmap_free(smmu->asid_map, asid);
1443 return ret;
1444}
1445
1446static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1447 struct io_pgtable_cfg *pgtbl_cfg)
1448{
1449 u16 vmid;
1450 struct arm_smmu_device *smmu = smmu_domain->smmu;
1451 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1452
1453 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1454 if (IS_ERR_VALUE(vmid))
1455 return vmid;
1456
1457 cfg->vmid = vmid;
1458 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1459 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1460 return 0;
1461}
1462
1463static struct iommu_ops arm_smmu_ops;
1464
1465static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1466{
1467 int ret;
1468 unsigned long ias, oas;
1469 enum io_pgtable_fmt fmt;
1470 struct io_pgtable_cfg pgtbl_cfg;
1471 struct io_pgtable_ops *pgtbl_ops;
1472 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1473 struct io_pgtable_cfg *);
1474 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1475 struct arm_smmu_device *smmu = smmu_domain->smmu;
1476
1477 /* Restrict the stage to what we can actually support */
1478 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1479 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1480 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1481 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1482
1483 switch (smmu_domain->stage) {
1484 case ARM_SMMU_DOMAIN_S1:
1485 ias = VA_BITS;
1486 oas = smmu->ias;
1487 fmt = ARM_64_LPAE_S1;
1488 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1489 break;
1490 case ARM_SMMU_DOMAIN_NESTED:
1491 case ARM_SMMU_DOMAIN_S2:
1492 ias = smmu->ias;
1493 oas = smmu->oas;
1494 fmt = ARM_64_LPAE_S2;
1495 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1496 break;
1497 default:
1498 return -EINVAL;
1499 }
1500
1501 pgtbl_cfg = (struct io_pgtable_cfg) {
1502 .pgsize_bitmap = arm_smmu_ops.pgsize_bitmap,
1503 .ias = ias,
1504 .oas = oas,
1505 .tlb = &arm_smmu_gather_ops,
1506 };
1507
1508 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1509 if (!pgtbl_ops)
1510 return -ENOMEM;
1511
1512 arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1513 smmu_domain->pgtbl_ops = pgtbl_ops;
1514
1515 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1516 if (IS_ERR_VALUE(ret))
1517 free_io_pgtable_ops(pgtbl_ops);
1518
1519 return ret;
1520}
1521
1522static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1523{
1524 struct iommu_group *group;
1525 struct arm_smmu_group *smmu_group;
1526
1527 group = iommu_group_get(dev);
1528 if (!group)
1529 return NULL;
1530
1531 smmu_group = iommu_group_get_iommudata(group);
1532 iommu_group_put(group);
1533 return smmu_group;
1534}
1535
1536static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1537{
1538 __le64 *step;
1539 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1540
1541 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1542 struct arm_smmu_strtab_l1_desc *l1_desc;
1543 int idx;
1544
1545 /* Two-level walk */
1546 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1547 l1_desc = &cfg->l1_desc[idx];
1548 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1549 step = &l1_desc->l2ptr[idx];
1550 } else {
1551 /* Simple linear lookup */
1552 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1553 }
1554
1555 return step;
1556}
1557
1558static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1559{
1560 int i;
1561 struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1562 struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1563 struct arm_smmu_device *smmu = smmu_group->smmu;
1564
1565 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1566 ste->s1_cfg = &smmu_domain->s1_cfg;
1567 ste->s2_cfg = NULL;
1568 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1569 } else {
1570 ste->s1_cfg = NULL;
1571 ste->s2_cfg = &smmu_domain->s2_cfg;
1572 }
1573
1574 for (i = 0; i < smmu_group->num_sids; ++i) {
1575 u32 sid = smmu_group->sids[i];
1576 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1577
1578 arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1579 }
1580
1581 return 0;
1582}
1583
1584static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1585{
1586 int ret = 0;
1587 struct arm_smmu_device *smmu;
1588 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1589 struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1590
1591 if (!smmu_group)
1592 return -ENOENT;
1593
1594 /* Already attached to a different domain? */
1595 if (smmu_group->domain && smmu_group->domain != smmu_domain)
1596 return -EEXIST;
1597
1598 smmu = smmu_group->smmu;
1599 mutex_lock(&smmu_domain->init_mutex);
1600
1601 if (!smmu_domain->smmu) {
1602 smmu_domain->smmu = smmu;
1603 ret = arm_smmu_domain_finalise(domain);
1604 if (ret) {
1605 smmu_domain->smmu = NULL;
1606 goto out_unlock;
1607 }
1608 } else if (smmu_domain->smmu != smmu) {
1609 dev_err(dev,
1610 "cannot attach to SMMU %s (upstream of %s)\n",
1611 dev_name(smmu_domain->smmu->dev),
1612 dev_name(smmu->dev));
1613 ret = -ENXIO;
1614 goto out_unlock;
1615 }
1616
1617 /* Group already attached to this domain? */
1618 if (smmu_group->domain)
1619 goto out_unlock;
1620
1621 smmu_group->domain = smmu_domain;
1622 smmu_group->ste.bypass = false;
1623
1624 ret = arm_smmu_install_ste_for_group(smmu_group);
1625 if (IS_ERR_VALUE(ret))
1626 smmu_group->domain = NULL;
1627
1628out_unlock:
1629 mutex_unlock(&smmu_domain->init_mutex);
1630 return ret;
1631}
1632
1633static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
1634{
1635 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1636 struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1637
1638 BUG_ON(!smmu_domain);
1639 BUG_ON(!smmu_group);
1640
1641 mutex_lock(&smmu_domain->init_mutex);
1642 BUG_ON(smmu_group->domain != smmu_domain);
1643
1644 smmu_group->ste.bypass = true;
1645 if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
1646 dev_warn(dev, "failed to install bypass STE\n");
1647
1648 smmu_group->domain = NULL;
1649 mutex_unlock(&smmu_domain->init_mutex);
1650}
1651
1652static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1653 phys_addr_t paddr, size_t size, int prot)
1654{
1655 int ret;
1656 unsigned long flags;
1657 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1658 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1659
1660 if (!ops)
1661 return -ENODEV;
1662
1663 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1664 ret = ops->map(ops, iova, paddr, size, prot);
1665 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1666 return ret;
1667}
1668
1669static size_t
1670arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1671{
1672 size_t ret;
1673 unsigned long flags;
1674 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1675 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1676
1677 if (!ops)
1678 return 0;
1679
1680 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1681 ret = ops->unmap(ops, iova, size);
1682 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1683 return ret;
1684}
1685
1686static phys_addr_t
1687arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1688{
1689 phys_addr_t ret;
1690 unsigned long flags;
1691 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1692 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1693
1694 if (!ops)
1695 return 0;
1696
1697 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1698 ret = ops->iova_to_phys(ops, iova);
1699 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1700
1701 return ret;
1702}
1703
1704static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1705{
1706 *(u32 *)sidp = alias;
1707 return 0; /* Continue walking */
1708}
1709
1710static void __arm_smmu_release_pci_iommudata(void *data)
1711{
1712 kfree(data);
1713}
1714
1715static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1716{
1717 struct device_node *of_node;
1718 struct arm_smmu_device *curr, *smmu = NULL;
1719 struct pci_bus *bus = pdev->bus;
1720
1721 /* Walk up to the root bus */
1722 while (!pci_is_root_bus(bus))
1723 bus = bus->parent;
1724
1725 /* Follow the "iommus" phandle from the host controller */
1726 of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1727 if (!of_node)
1728 return NULL;
1729
1730 /* See if we can find an SMMU corresponding to the phandle */
1731 spin_lock(&arm_smmu_devices_lock);
1732 list_for_each_entry(curr, &arm_smmu_devices, list) {
1733 if (curr->dev->of_node == of_node) {
1734 smmu = curr;
1735 break;
1736 }
1737 }
1738 spin_unlock(&arm_smmu_devices_lock);
1739 of_node_put(of_node);
1740 return smmu;
1741}
1742
1743static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1744{
1745 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1746
1747 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1748 limit *= 1UL << STRTAB_SPLIT;
1749
1750 return sid < limit;
1751}
1752
1753static int arm_smmu_add_device(struct device *dev)
1754{
1755 int i, ret;
1756 u32 sid, *sids;
1757 struct pci_dev *pdev;
1758 struct iommu_group *group;
1759 struct arm_smmu_group *smmu_group;
1760 struct arm_smmu_device *smmu;
1761
1762 /* We only support PCI, for now */
1763 if (!dev_is_pci(dev))
1764 return -ENODEV;
1765
1766 pdev = to_pci_dev(dev);
1767 group = iommu_group_get_for_dev(dev);
1768 if (IS_ERR(group))
1769 return PTR_ERR(group);
1770
1771 smmu_group = iommu_group_get_iommudata(group);
1772 if (!smmu_group) {
1773 smmu = arm_smmu_get_for_pci_dev(pdev);
1774 if (!smmu) {
1775 ret = -ENOENT;
1776 goto out_put_group;
1777 }
1778
1779 smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1780 if (!smmu_group) {
1781 ret = -ENOMEM;
1782 goto out_put_group;
1783 }
1784
1785 smmu_group->ste.valid = true;
1786 smmu_group->smmu = smmu;
1787 iommu_group_set_iommudata(group, smmu_group,
1788 __arm_smmu_release_pci_iommudata);
1789 } else {
1790 smmu = smmu_group->smmu;
1791 }
1792
1793 /* Assume SID == RID until firmware tells us otherwise */
1794 pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1795 for (i = 0; i < smmu_group->num_sids; ++i) {
1796 /* If we already know about this SID, then we're done */
1797 if (smmu_group->sids[i] == sid)
1798 return 0;
1799 }
1800
1801 /* Check the SID is in range of the SMMU and our stream table */
1802 if (!arm_smmu_sid_in_range(smmu, sid)) {
1803 ret = -ERANGE;
1804 goto out_put_group;
1805 }
1806
1807 /* Ensure l2 strtab is initialised */
1808 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1809 ret = arm_smmu_init_l2_strtab(smmu, sid);
1810 if (ret)
1811 goto out_put_group;
1812 }
1813
1814 /* Resize the SID array for the group */
1815 smmu_group->num_sids++;
1816 sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1817 GFP_KERNEL);
1818 if (!sids) {
1819 smmu_group->num_sids--;
1820 ret = -ENOMEM;
1821 goto out_put_group;
1822 }
1823
1824 /* Add the new SID */
1825 sids[smmu_group->num_sids - 1] = sid;
1826 smmu_group->sids = sids;
1827 return 0;
1828
1829out_put_group:
1830 iommu_group_put(group);
1831 return ret;
1832}
1833
1834static void arm_smmu_remove_device(struct device *dev)
1835{
1836 iommu_group_remove_device(dev);
1837}
1838
1839static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1840 enum iommu_attr attr, void *data)
1841{
1842 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1843
1844 switch (attr) {
1845 case DOMAIN_ATTR_NESTING:
1846 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1847 return 0;
1848 default:
1849 return -ENODEV;
1850 }
1851}
1852
1853static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1854 enum iommu_attr attr, void *data)
1855{
1856 int ret = 0;
1857 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1858
1859 mutex_lock(&smmu_domain->init_mutex);
1860
1861 switch (attr) {
1862 case DOMAIN_ATTR_NESTING:
1863 if (smmu_domain->smmu) {
1864 ret = -EPERM;
1865 goto out_unlock;
1866 }
1867
1868 if (*(int *)data)
1869 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1870 else
1871 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1872
1873 break;
1874 default:
1875 ret = -ENODEV;
1876 }
1877
1878out_unlock:
1879 mutex_unlock(&smmu_domain->init_mutex);
1880 return ret;
1881}
1882
1883static struct iommu_ops arm_smmu_ops = {
1884 .capable = arm_smmu_capable,
1885 .domain_alloc = arm_smmu_domain_alloc,
1886 .domain_free = arm_smmu_domain_free,
1887 .attach_dev = arm_smmu_attach_dev,
1888 .detach_dev = arm_smmu_detach_dev,
1889 .map = arm_smmu_map,
1890 .unmap = arm_smmu_unmap,
1891 .iova_to_phys = arm_smmu_iova_to_phys,
1892 .add_device = arm_smmu_add_device,
1893 .remove_device = arm_smmu_remove_device,
1894 .domain_get_attr = arm_smmu_domain_get_attr,
1895 .domain_set_attr = arm_smmu_domain_set_attr,
1896 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1897};
1898
1899/* Probing and initialisation functions */
1900static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1901 struct arm_smmu_queue *q,
1902 unsigned long prod_off,
1903 unsigned long cons_off,
1904 size_t dwords)
1905{
1906 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1907
1908 q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1909 if (!q->base) {
1910 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1911 qsz);
1912 return -ENOMEM;
1913 }
1914
1915 q->prod_reg = smmu->base + prod_off;
1916 q->cons_reg = smmu->base + cons_off;
1917 q->ent_dwords = dwords;
1918
1919 q->q_base = Q_BASE_RWA;
1920 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1921 q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1922 << Q_BASE_LOG2SIZE_SHIFT;
1923
1924 q->prod = q->cons = 0;
1925 return 0;
1926}
1927
1928static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu,
1929 struct arm_smmu_queue *q)
1930{
1931 size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3;
1932
1933 dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma);
1934}
1935
1936static void arm_smmu_free_queues(struct arm_smmu_device *smmu)
1937{
1938 arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
1939 arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
1940
1941 if (smmu->features & ARM_SMMU_FEAT_PRI)
1942 arm_smmu_free_one_queue(smmu, &smmu->priq.q);
1943}
1944
1945static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1946{
1947 int ret;
1948
1949 /* cmdq */
1950 spin_lock_init(&smmu->cmdq.lock);
1951 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1952 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1953 if (ret)
1954 goto out;
1955
1956 /* evtq */
1957 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1958 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
1959 if (ret)
1960 goto out_free_cmdq;
1961
1962 /* priq */
1963 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
1964 return 0;
1965
1966 ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
1967 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
1968 if (ret)
1969 goto out_free_evtq;
1970
1971 return 0;
1972
1973out_free_evtq:
1974 arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
1975out_free_cmdq:
1976 arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
1977out:
1978 return ret;
1979}
1980
1981static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu)
1982{
1983 int i;
1984 size_t size;
1985 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1986
1987 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1988 for (i = 0; i < cfg->num_l1_ents; ++i) {
1989 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i];
1990
1991 if (!desc->l2ptr)
1992 continue;
1993
1994 dma_free_coherent(smmu->dev, size, desc->l2ptr,
1995 desc->l2ptr_dma);
1996 }
1997}
1998
1999static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2000{
2001 unsigned int i;
2002 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2003 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2004 void *strtab = smmu->strtab_cfg.strtab;
2005
2006 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2007 if (!cfg->l1_desc) {
2008 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2009 return -ENOMEM;
2010 }
2011
2012 for (i = 0; i < cfg->num_l1_ents; ++i) {
2013 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2014 strtab += STRTAB_L1_DESC_DWORDS << 3;
2015 }
2016
2017 return 0;
2018}
2019
2020static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2021{
2022 void *strtab;
2023 u64 reg;
2024 u32 size;
2025 int ret;
2026 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2027
2028 /* Calculate the L1 size, capped to the SIDSIZE */
2029 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2030 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2031 if (size + STRTAB_SPLIT < smmu->sid_bits)
2032 dev_warn(smmu->dev,
2033 "2-level strtab only covers %u/%u bits of SID\n",
2034 size + STRTAB_SPLIT, smmu->sid_bits);
2035
2036 cfg->num_l1_ents = 1 << size;
2037 size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2038 strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2039 GFP_KERNEL);
2040 if (!strtab) {
2041 dev_err(smmu->dev,
2042 "failed to allocate l1 stream table (%u bytes)\n",
2043 size);
2044 return -ENOMEM;
2045 }
2046 cfg->strtab = strtab;
2047
2048 /* Configure strtab_base_cfg for 2 levels */
2049 reg = STRTAB_BASE_CFG_FMT_2LVL;
2050 reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2051 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2052 reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2053 << STRTAB_BASE_CFG_SPLIT_SHIFT;
2054 cfg->strtab_base_cfg = reg;
2055
2056 ret = arm_smmu_init_l1_strtab(smmu);
2057 if (ret)
2058 dma_free_coherent(smmu->dev,
2059 cfg->num_l1_ents *
2060 (STRTAB_L1_DESC_DWORDS << 3),
2061 strtab,
2062 cfg->strtab_dma);
2063 return ret;
2064}
2065
2066static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2067{
2068 void *strtab;
2069 u64 reg;
2070 u32 size;
2071 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2072
2073 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2074 strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2075 GFP_KERNEL);
2076 if (!strtab) {
2077 dev_err(smmu->dev,
2078 "failed to allocate linear stream table (%u bytes)\n",
2079 size);
2080 return -ENOMEM;
2081 }
2082 cfg->strtab = strtab;
2083 cfg->num_l1_ents = 1 << smmu->sid_bits;
2084
2085 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2086 reg = STRTAB_BASE_CFG_FMT_LINEAR;
2087 reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2088 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2089 cfg->strtab_base_cfg = reg;
2090
2091 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2092 return 0;
2093}
2094
2095static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2096{
2097 u64 reg;
2098 int ret;
2099
2100 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2101 ret = arm_smmu_init_strtab_2lvl(smmu);
2102 else
2103 ret = arm_smmu_init_strtab_linear(smmu);
2104
2105 if (ret)
2106 return ret;
2107
2108 /* Set the strtab base address */
2109 reg = smmu->strtab_cfg.strtab_dma &
2110 STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2111 reg |= STRTAB_BASE_RA;
2112 smmu->strtab_cfg.strtab_base = reg;
2113
2114 /* Allocate the first VMID for stage-2 bypass STEs */
2115 set_bit(0, smmu->vmid_map);
2116 return 0;
2117}
2118
2119static void arm_smmu_free_strtab(struct arm_smmu_device *smmu)
2120{
2121 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2122 u32 size = cfg->num_l1_ents;
2123
2124 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2125 arm_smmu_free_l2_strtab(smmu);
2126 size *= STRTAB_L1_DESC_DWORDS << 3;
2127 } else {
2128 size *= STRTAB_STE_DWORDS * 3;
2129 }
2130
2131 dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma);
2132}
2133
2134static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2135{
2136 int ret;
2137
2138 ret = arm_smmu_init_queues(smmu);
2139 if (ret)
2140 return ret;
2141
2142 ret = arm_smmu_init_strtab(smmu);
2143 if (ret)
2144 goto out_free_queues;
2145
2146 return 0;
2147
2148out_free_queues:
2149 arm_smmu_free_queues(smmu);
2150 return ret;
2151}
2152
2153static void arm_smmu_free_structures(struct arm_smmu_device *smmu)
2154{
2155 arm_smmu_free_strtab(smmu);
2156 arm_smmu_free_queues(smmu);
2157}
2158
2159static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2160 unsigned int reg_off, unsigned int ack_off)
2161{
2162 u32 reg;
2163
2164 writel_relaxed(val, smmu->base + reg_off);
2165 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2166 1, ARM_SMMU_POLL_TIMEOUT_US);
2167}
2168
2169static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2170{
2171 int ret, irq;
2172
2173 /* Disable IRQs first */
2174 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2175 ARM_SMMU_IRQ_CTRLACK);
2176 if (ret) {
2177 dev_err(smmu->dev, "failed to disable irqs\n");
2178 return ret;
2179 }
2180
2181 /* Clear the MSI address regs */
2182 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2183 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2184
2185 /* Request wired interrupt lines */
2186 irq = smmu->evtq.q.irq;
2187 if (irq) {
2188 ret = devm_request_threaded_irq(smmu->dev, irq,
2189 arm_smmu_evtq_handler,
2190 arm_smmu_evtq_thread,
2191 0, "arm-smmu-v3-evtq", smmu);
2192 if (IS_ERR_VALUE(ret))
2193 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2194 }
2195
2196 irq = smmu->cmdq.q.irq;
2197 if (irq) {
2198 ret = devm_request_irq(smmu->dev, irq,
2199 arm_smmu_cmdq_sync_handler, 0,
2200 "arm-smmu-v3-cmdq-sync", smmu);
2201 if (IS_ERR_VALUE(ret))
2202 dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2203 }
2204
2205 irq = smmu->gerr_irq;
2206 if (irq) {
2207 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2208 0, "arm-smmu-v3-gerror", smmu);
2209 if (IS_ERR_VALUE(ret))
2210 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2211 }
2212
2213 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2214 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2215
2216 irq = smmu->priq.q.irq;
2217 if (irq) {
2218 ret = devm_request_threaded_irq(smmu->dev, irq,
2219 arm_smmu_priq_handler,
2220 arm_smmu_priq_thread,
2221 0, "arm-smmu-v3-priq",
2222 smmu);
2223 if (IS_ERR_VALUE(ret))
2224 dev_warn(smmu->dev,
2225 "failed to enable priq irq\n");
2226 }
2227 }
2228
2229 /* Enable interrupt generation on the SMMU */
2230 ret = arm_smmu_write_reg_sync(smmu,
2231 IRQ_CTRL_EVTQ_IRQEN |
2232 IRQ_CTRL_GERROR_IRQEN,
2233 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2234 if (ret)
2235 dev_warn(smmu->dev, "failed to enable irqs\n");
2236
2237 return 0;
2238}
2239
2240static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2241{
2242 int ret;
2243
2244 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2245 if (ret)
2246 dev_err(smmu->dev, "failed to clear cr0\n");
2247
2248 return ret;
2249}
2250
2251static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2252{
2253 int ret;
2254 u32 reg, enables;
2255 struct arm_smmu_cmdq_ent cmd;
2256
2257 /* Clear CR0 and sync (disables SMMU and queue processing) */
2258 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2259 if (reg & CR0_SMMUEN)
2260 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2261
2262 ret = arm_smmu_device_disable(smmu);
2263 if (ret)
2264 return ret;
2265
2266 /* CR1 (table and queue memory attributes) */
2267 reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2268 (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2269 (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2270 (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2271 (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2272 (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2273 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2274
2275 /* CR2 (random crap) */
2276 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2277 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2278
2279 /* Stream table */
2280 writeq_relaxed(smmu->strtab_cfg.strtab_base,
2281 smmu->base + ARM_SMMU_STRTAB_BASE);
2282 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2283 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2284
2285 /* Command queue */
2286 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2287 writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2288 writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2289
2290 enables = CR0_CMDQEN;
2291 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2292 ARM_SMMU_CR0ACK);
2293 if (ret) {
2294 dev_err(smmu->dev, "failed to enable command queue\n");
2295 return ret;
2296 }
2297
2298 /* Invalidate any cached configuration */
2299 cmd.opcode = CMDQ_OP_CFGI_ALL;
2300 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2301 cmd.opcode = CMDQ_OP_CMD_SYNC;
2302 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2303
2304 /* Invalidate any stale TLB entries */
2305 if (smmu->features & ARM_SMMU_FEAT_HYP) {
2306 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2307 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2308 }
2309
2310 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2311 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2312 cmd.opcode = CMDQ_OP_CMD_SYNC;
2313 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2314
2315 /* Event queue */
2316 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2317 writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2318 writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2319
2320 enables |= CR0_EVTQEN;
2321 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2322 ARM_SMMU_CR0ACK);
2323 if (ret) {
2324 dev_err(smmu->dev, "failed to enable event queue\n");
2325 return ret;
2326 }
2327
2328 /* PRI queue */
2329 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2330 writeq_relaxed(smmu->priq.q.q_base,
2331 smmu->base + ARM_SMMU_PRIQ_BASE);
2332 writel_relaxed(smmu->priq.q.prod,
2333 smmu->base + ARM_SMMU_PRIQ_PROD);
2334 writel_relaxed(smmu->priq.q.cons,
2335 smmu->base + ARM_SMMU_PRIQ_CONS);
2336
2337 enables |= CR0_PRIQEN;
2338 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2339 ARM_SMMU_CR0ACK);
2340 if (ret) {
2341 dev_err(smmu->dev, "failed to enable PRI queue\n");
2342 return ret;
2343 }
2344 }
2345
2346 ret = arm_smmu_setup_irqs(smmu);
2347 if (ret) {
2348 dev_err(smmu->dev, "failed to setup irqs\n");
2349 return ret;
2350 }
2351
2352 /* Enable the SMMU interface */
2353 enables |= CR0_SMMUEN;
2354 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2355 ARM_SMMU_CR0ACK);
2356 if (ret) {
2357 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2358 return ret;
2359 }
2360
2361 return 0;
2362}
2363
2364static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2365{
2366 u32 reg;
2367 bool coherent;
2368 unsigned long pgsize_bitmap = 0;
2369
2370 /* IDR0 */
2371 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2372
2373 /* 2-level structures */
2374 if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2375 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2376
2377 if (reg & IDR0_CD2L)
2378 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2379
2380 /*
2381 * Translation table endianness.
2382 * We currently require the same endianness as the CPU, but this
2383 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2384 */
2385 switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2386 case IDR0_TTENDIAN_MIXED:
2387 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2388 break;
2389#ifdef __BIG_ENDIAN
2390 case IDR0_TTENDIAN_BE:
2391 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2392 break;
2393#else
2394 case IDR0_TTENDIAN_LE:
2395 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2396 break;
2397#endif
2398 default:
2399 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2400 return -ENXIO;
2401 }
2402
2403 /* Boolean feature flags */
2404 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2405 smmu->features |= ARM_SMMU_FEAT_PRI;
2406
2407 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2408 smmu->features |= ARM_SMMU_FEAT_ATS;
2409
2410 if (reg & IDR0_SEV)
2411 smmu->features |= ARM_SMMU_FEAT_SEV;
2412
2413 if (reg & IDR0_MSI)
2414 smmu->features |= ARM_SMMU_FEAT_MSI;
2415
2416 if (reg & IDR0_HYP)
2417 smmu->features |= ARM_SMMU_FEAT_HYP;
2418
2419 /*
2420 * The dma-coherent property is used in preference to the ID
2421 * register, but warn on mismatch.
2422 */
2423 coherent = of_dma_is_coherent(smmu->dev->of_node);
2424 if (coherent)
2425 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2426
2427 if (!!(reg & IDR0_COHACC) != coherent)
2428 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2429 coherent ? "true" : "false");
2430
2431 if (reg & IDR0_STALL_MODEL)
2432 smmu->features |= ARM_SMMU_FEAT_STALLS;
2433
2434 if (reg & IDR0_S1P)
2435 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2436
2437 if (reg & IDR0_S2P)
2438 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2439
2440 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2441 dev_err(smmu->dev, "no translation support!\n");
2442 return -ENXIO;
2443 }
2444
2445 /* We only support the AArch64 table format at present */
2446 if ((reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) < IDR0_TTF_AARCH64) {
2447 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2448 return -ENXIO;
2449 }
2450
2451 /* ASID/VMID sizes */
2452 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2453 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2454
2455 /* IDR1 */
2456 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2457 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2458 dev_err(smmu->dev, "embedded implementation not supported\n");
2459 return -ENXIO;
2460 }
2461
2462 /* Queue sizes, capped at 4k */
2463 smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2464 reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2465 if (!smmu->cmdq.q.max_n_shift) {
2466 /* Odd alignment restrictions on the base, so ignore for now */
2467 dev_err(smmu->dev, "unit-length command queue not supported\n");
2468 return -ENXIO;
2469 }
2470
2471 smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2472 reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2473 smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2474 reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2475
2476 /* SID/SSID sizes */
2477 smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2478 smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2479
2480 /* IDR5 */
2481 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2482
2483 /* Maximum number of outstanding stalls */
2484 smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2485 & IDR5_STALL_MAX_MASK;
2486
2487 /* Page sizes */
2488 if (reg & IDR5_GRAN64K)
2489 pgsize_bitmap |= SZ_64K | SZ_512M;
2490 if (reg & IDR5_GRAN16K)
2491 pgsize_bitmap |= SZ_16K | SZ_32M;
2492 if (reg & IDR5_GRAN4K)
2493 pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2494
2495 arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
2496
2497 /* Output address size */
2498 switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2499 case IDR5_OAS_32_BIT:
2500 smmu->oas = 32;
2501 break;
2502 case IDR5_OAS_36_BIT:
2503 smmu->oas = 36;
2504 break;
2505 case IDR5_OAS_40_BIT:
2506 smmu->oas = 40;
2507 break;
2508 case IDR5_OAS_42_BIT:
2509 smmu->oas = 42;
2510 break;
2511 case IDR5_OAS_44_BIT:
2512 smmu->oas = 44;
2513 break;
2514 case IDR5_OAS_48_BIT:
2515 smmu->oas = 48;
2516 break;
2517 default:
2518 dev_err(smmu->dev, "unknown output address size!\n");
2519 return -ENXIO;
2520 }
2521
2522 /* Set the DMA mask for our table walker */
2523 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2524 dev_warn(smmu->dev,
2525 "failed to set DMA mask for table walker\n");
2526
2527 if (!smmu->ias)
2528 smmu->ias = smmu->oas;
2529
2530 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2531 smmu->ias, smmu->oas, smmu->features);
2532 return 0;
2533}
2534
2535static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2536{
2537 int irq, ret;
2538 struct resource *res;
2539 struct arm_smmu_device *smmu;
2540 struct device *dev = &pdev->dev;
2541
2542 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2543 if (!smmu) {
2544 dev_err(dev, "failed to allocate arm_smmu_device\n");
2545 return -ENOMEM;
2546 }
2547 smmu->dev = dev;
2548
2549 /* Base address */
2550 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2551 if (resource_size(res) + 1 < SZ_128K) {
2552 dev_err(dev, "MMIO region too small (%pr)\n", res);
2553 return -EINVAL;
2554 }
2555
2556 smmu->base = devm_ioremap_resource(dev, res);
2557 if (IS_ERR(smmu->base))
2558 return PTR_ERR(smmu->base);
2559
2560 /* Interrupt lines */
2561 irq = platform_get_irq_byname(pdev, "eventq");
2562 if (irq > 0)
2563 smmu->evtq.q.irq = irq;
2564
2565 irq = platform_get_irq_byname(pdev, "priq");
2566 if (irq > 0)
2567 smmu->priq.q.irq = irq;
2568
2569 irq = platform_get_irq_byname(pdev, "cmdq-sync");
2570 if (irq > 0)
2571 smmu->cmdq.q.irq = irq;
2572
2573 irq = platform_get_irq_byname(pdev, "gerror");
2574 if (irq > 0)
2575 smmu->gerr_irq = irq;
2576
2577 /* Probe the h/w */
2578 ret = arm_smmu_device_probe(smmu);
2579 if (ret)
2580 return ret;
2581
2582 /* Initialise in-memory data structures */
2583 ret = arm_smmu_init_structures(smmu);
2584 if (ret)
2585 return ret;
2586
2587 /* Reset the device */
2588 ret = arm_smmu_device_reset(smmu);
2589 if (ret)
2590 goto out_free_structures;
2591
2592 /* Record our private device structure */
2593 INIT_LIST_HEAD(&smmu->list);
2594 spin_lock(&arm_smmu_devices_lock);
2595 list_add(&smmu->list, &arm_smmu_devices);
2596 spin_unlock(&arm_smmu_devices_lock);
2597 return 0;
2598
2599out_free_structures:
2600 arm_smmu_free_structures(smmu);
2601 return ret;
2602}
2603
2604static int arm_smmu_device_remove(struct platform_device *pdev)
2605{
2606 struct arm_smmu_device *curr, *smmu = NULL;
2607 struct device *dev = &pdev->dev;
2608
2609 spin_lock(&arm_smmu_devices_lock);
2610 list_for_each_entry(curr, &arm_smmu_devices, list) {
2611 if (curr->dev == dev) {
2612 smmu = curr;
2613 list_del(&smmu->list);
2614 break;
2615 }
2616 }
2617 spin_unlock(&arm_smmu_devices_lock);
2618
2619 if (!smmu)
2620 return -ENODEV;
2621
2622 arm_smmu_device_disable(smmu);
2623 arm_smmu_free_structures(smmu);
2624 return 0;
2625}
2626
2627static struct of_device_id arm_smmu_of_match[] = {
2628 { .compatible = "arm,smmu-v3", },
2629 { },
2630};
2631MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2632
2633static struct platform_driver arm_smmu_driver = {
2634 .driver = {
2635 .name = "arm-smmu-v3",
2636 .of_match_table = of_match_ptr(arm_smmu_of_match),
2637 },
2638 .probe = arm_smmu_device_dt_probe,
2639 .remove = arm_smmu_device_remove,
2640};
2641
2642static int __init arm_smmu_init(void)
2643{
2644 struct device_node *np;
2645 int ret;
2646
2647 np = of_find_matching_node(NULL, arm_smmu_of_match);
2648 if (!np)
2649 return 0;
2650
2651 of_node_put(np);
2652
2653 ret = platform_driver_register(&arm_smmu_driver);
2654 if (ret)
2655 return ret;
2656
2657 return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2658}
2659
2660static void __exit arm_smmu_exit(void)
2661{
2662 return platform_driver_unregister(&arm_smmu_driver);
2663}
2664
2665subsys_initcall(arm_smmu_init);
2666module_exit(arm_smmu_exit);
2667
2668MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2669MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2670MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 66a803b9dd3a..dce041b1c139 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -202,8 +202,7 @@
202#define ARM_SMMU_CB_S1_TLBIVAL 0x620 202#define ARM_SMMU_CB_S1_TLBIVAL 0x620
203#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 203#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
204#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 204#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
205#define ARM_SMMU_CB_ATS1PR_LO 0x800 205#define ARM_SMMU_CB_ATS1PR 0x800
206#define ARM_SMMU_CB_ATS1PR_HI 0x804
207#define ARM_SMMU_CB_ATSR 0x8f0 206#define ARM_SMMU_CB_ATSR 0x8f0
208 207
209#define SCTLR_S1_ASIDPNE (1 << 12) 208#define SCTLR_S1_ASIDPNE (1 << 12)
@@ -247,7 +246,7 @@
247#define FSYNR0_WNR (1 << 4) 246#define FSYNR0_WNR (1 << 4)
248 247
249static int force_stage; 248static int force_stage;
250module_param_named(force_stage, force_stage, int, S_IRUGO | S_IWUSR); 249module_param_named(force_stage, force_stage, int, S_IRUGO);
251MODULE_PARM_DESC(force_stage, 250MODULE_PARM_DESC(force_stage,
252 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); 251 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
253 252
@@ -1229,18 +1228,18 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1229 void __iomem *cb_base; 1228 void __iomem *cb_base;
1230 u32 tmp; 1229 u32 tmp;
1231 u64 phys; 1230 u64 phys;
1231 unsigned long va;
1232 1232
1233 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); 1233 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1234 1234
1235 if (smmu->version == 1) { 1235 /* ATS1 registers can only be written atomically */
1236 u32 reg = iova & ~0xfff; 1236 va = iova & ~0xfffUL;
1237 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); 1237#ifdef CONFIG_64BIT
1238 } else { 1238 if (smmu->version == ARM_SMMU_V2)
1239 u32 reg = iova & ~0xfff; 1239 writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1240 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); 1240 else
1241 reg = ((u64)iova & ~0xfff) >> 32; 1241#endif
1242 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI); 1242 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1243 }
1244 1243
1245 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, 1244 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1246 !(tmp & ATSR_ACTIVE), 5, 50)) { 1245 !(tmp & ATSR_ACTIVE), 5, 50)) {
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 536f2d8ea41a..c9db04d4ef39 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -26,7 +26,7 @@
26 * These routines are used by both DMA-remapping and Interrupt-remapping 26 * These routines are used by both DMA-remapping and Interrupt-remapping
27 */ 27 */
28 28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */ 29#define pr_fmt(fmt) "DMAR: " fmt
30 30
31#include <linux/pci.h> 31#include <linux/pci.h>
32#include <linux/dmar.h> 32#include <linux/dmar.h>
@@ -555,7 +555,7 @@ static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
555 break; 555 break;
556 } else if (next > end) { 556 } else if (next > end) {
557 /* Avoid passing table end */ 557 /* Avoid passing table end */
558 pr_warn(FW_BUG "record passes table end\n"); 558 pr_warn(FW_BUG "Record passes table end\n");
559 ret = -EINVAL; 559 ret = -EINVAL;
560 break; 560 break;
561 } 561 }
@@ -802,7 +802,7 @@ int __init dmar_table_init(void)
802 ret = parse_dmar_table(); 802 ret = parse_dmar_table();
803 if (ret < 0) { 803 if (ret < 0) {
804 if (ret != -ENODEV) 804 if (ret != -ENODEV)
805 pr_info("parse DMAR table failure.\n"); 805 pr_info("Parse DMAR table failure.\n");
806 } else if (list_empty(&dmar_drhd_units)) { 806 } else if (list_empty(&dmar_drhd_units)) {
807 pr_info("No DMAR devices found\n"); 807 pr_info("No DMAR devices found\n");
808 ret = -ENODEV; 808 ret = -ENODEV;
@@ -847,7 +847,7 @@ dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
847 else 847 else
848 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE); 848 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
849 if (!addr) { 849 if (!addr) {
850 pr_warn("IOMMU: can't validate: %llx\n", drhd->address); 850 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
851 return -EINVAL; 851 return -EINVAL;
852 } 852 }
853 853
@@ -921,14 +921,14 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
921 iommu->reg_size = VTD_PAGE_SIZE; 921 iommu->reg_size = VTD_PAGE_SIZE;
922 922
923 if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { 923 if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
924 pr_err("IOMMU: can't reserve memory\n"); 924 pr_err("Can't reserve memory\n");
925 err = -EBUSY; 925 err = -EBUSY;
926 goto out; 926 goto out;
927 } 927 }
928 928
929 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); 929 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
930 if (!iommu->reg) { 930 if (!iommu->reg) {
931 pr_err("IOMMU: can't map the region\n"); 931 pr_err("Can't map the region\n");
932 err = -ENOMEM; 932 err = -ENOMEM;
933 goto release; 933 goto release;
934 } 934 }
@@ -952,13 +952,13 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
952 iommu->reg_size = map_size; 952 iommu->reg_size = map_size;
953 if (!request_mem_region(iommu->reg_phys, iommu->reg_size, 953 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
954 iommu->name)) { 954 iommu->name)) {
955 pr_err("IOMMU: can't reserve memory\n"); 955 pr_err("Can't reserve memory\n");
956 err = -EBUSY; 956 err = -EBUSY;
957 goto out; 957 goto out;
958 } 958 }
959 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); 959 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
960 if (!iommu->reg) { 960 if (!iommu->reg) {
961 pr_err("IOMMU: can't map the region\n"); 961 pr_err("Can't map the region\n");
962 err = -ENOMEM; 962 err = -ENOMEM;
963 goto release; 963 goto release;
964 } 964 }
@@ -1014,14 +1014,14 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
1014 return -ENOMEM; 1014 return -ENOMEM;
1015 1015
1016 if (dmar_alloc_seq_id(iommu) < 0) { 1016 if (dmar_alloc_seq_id(iommu) < 0) {
1017 pr_err("IOMMU: failed to allocate seq_id\n"); 1017 pr_err("Failed to allocate seq_id\n");
1018 err = -ENOSPC; 1018 err = -ENOSPC;
1019 goto error; 1019 goto error;
1020 } 1020 }
1021 1021
1022 err = map_iommu(iommu, drhd->reg_base_addr); 1022 err = map_iommu(iommu, drhd->reg_base_addr);
1023 if (err) { 1023 if (err) {
1024 pr_err("IOMMU: failed to map %s\n", iommu->name); 1024 pr_err("Failed to map %s\n", iommu->name);
1025 goto error_free_seq_id; 1025 goto error_free_seq_id;
1026 } 1026 }
1027 1027
@@ -1045,8 +1045,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
1045 iommu->node = -1; 1045 iommu->node = -1;
1046 1046
1047 ver = readl(iommu->reg + DMAR_VER_REG); 1047 ver = readl(iommu->reg + DMAR_VER_REG);
1048 pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", 1048 pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1049 iommu->seq_id, 1049 iommu->name,
1050 (unsigned long long)drhd->reg_base_addr, 1050 (unsigned long long)drhd->reg_base_addr,
1051 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), 1051 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1052 (unsigned long long)iommu->cap, 1052 (unsigned long long)iommu->cap,
@@ -1646,13 +1646,13 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
1646 if (irq > 0) { 1646 if (irq > 0) {
1647 iommu->irq = irq; 1647 iommu->irq = irq;
1648 } else { 1648 } else {
1649 pr_err("IOMMU: no free vectors\n"); 1649 pr_err("No free IRQ vectors\n");
1650 return -EINVAL; 1650 return -EINVAL;
1651 } 1651 }
1652 1652
1653 ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu); 1653 ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1654 if (ret) 1654 if (ret)
1655 pr_err("IOMMU: can't request irq\n"); 1655 pr_err("Can't request irq\n");
1656 return ret; 1656 return ret;
1657} 1657}
1658 1658
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 3e898504a7c4..97c41b8ab5d9 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -12,21 +12,22 @@
12#define DEBUG 12#define DEBUG
13#endif 13#endif
14 14
15#include <linux/io.h>
16#include <linux/interrupt.h>
17#include <linux/platform_device.h>
18#include <linux/slab.h>
19#include <linux/pm_runtime.h>
20#include <linux/clk.h> 15#include <linux/clk.h>
16#include <linux/dma-mapping.h>
21#include <linux/err.h> 17#include <linux/err.h>
22#include <linux/mm.h> 18#include <linux/io.h>
23#include <linux/iommu.h> 19#include <linux/iommu.h>
24#include <linux/errno.h> 20#include <linux/interrupt.h>
25#include <linux/list.h> 21#include <linux/list.h>
26#include <linux/memblock.h> 22#include <linux/of.h>
27#include <linux/export.h> 23#include <linux/of_iommu.h>
24#include <linux/of_platform.h>
25#include <linux/platform_device.h>
26#include <linux/pm_runtime.h>
27#include <linux/slab.h>
28 28
29#include <asm/cacheflush.h> 29#include <asm/cacheflush.h>
30#include <asm/dma-iommu.h>
30#include <asm/pgtable.h> 31#include <asm/pgtable.h>
31 32
32typedef u32 sysmmu_iova_t; 33typedef u32 sysmmu_iova_t;
@@ -184,35 +185,50 @@ static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {
184 "UNKNOWN FAULT" 185 "UNKNOWN FAULT"
185}; 186};
186 187
187/* attached to dev.archdata.iommu of the master device */ 188/*
189 * This structure is attached to dev.archdata.iommu of the master device
190 * on device add, contains a list of SYSMMU controllers defined by device tree,
191 * which are bound to given master device. It is usually referenced by 'owner'
192 * pointer.
193*/
188struct exynos_iommu_owner { 194struct exynos_iommu_owner {
189 struct list_head client; /* entry of exynos_iommu_domain.clients */ 195 struct list_head controllers; /* list of sysmmu_drvdata.owner_node */
190 struct device *dev;
191 struct device *sysmmu;
192 struct iommu_domain *domain;
193 void *vmm_data; /* IO virtual memory manager's data */
194 spinlock_t lock; /* Lock to preserve consistency of System MMU */
195}; 196};
196 197
198/*
199 * This structure exynos specific generalization of struct iommu_domain.
200 * It contains list of SYSMMU controllers from all master devices, which has
201 * been attached to this domain and page tables of IO address space defined by
202 * it. It is usually referenced by 'domain' pointer.
203 */
197struct exynos_iommu_domain { 204struct exynos_iommu_domain {
198 struct list_head clients; /* list of sysmmu_drvdata.node */ 205 struct list_head clients; /* list of sysmmu_drvdata.domain_node */
199 sysmmu_pte_t *pgtable; /* lv1 page table, 16KB */ 206 sysmmu_pte_t *pgtable; /* lv1 page table, 16KB */
200 short *lv2entcnt; /* free lv2 entry counter for each section */ 207 short *lv2entcnt; /* free lv2 entry counter for each section */
201 spinlock_t lock; /* lock for this structure */ 208 spinlock_t lock; /* lock for modyfying list of clients */
202 spinlock_t pgtablelock; /* lock for modifying page table @ pgtable */ 209 spinlock_t pgtablelock; /* lock for modifying page table @ pgtable */
203 struct iommu_domain domain; /* generic domain data structure */ 210 struct iommu_domain domain; /* generic domain data structure */
204}; 211};
205 212
213/*
214 * This structure hold all data of a single SYSMMU controller, this includes
215 * hw resources like registers and clocks, pointers and list nodes to connect
216 * it to all other structures, internal state and parameters read from device
217 * tree. It is usually referenced by 'data' pointer.
218 */
206struct sysmmu_drvdata { 219struct sysmmu_drvdata {
207 struct device *sysmmu; /* System MMU's device descriptor */ 220 struct device *sysmmu; /* SYSMMU controller device */
208 struct device *master; /* Owner of system MMU */ 221 struct device *master; /* master device (owner) */
209 void __iomem *sfrbase; 222 void __iomem *sfrbase; /* our registers */
210 struct clk *clk; 223 struct clk *clk; /* SYSMMU's clock */
211 struct clk *clk_master; 224 struct clk *clk_master; /* master's device clock */
212 int activations; 225 int activations; /* number of calls to sysmmu_enable */
213 spinlock_t lock; 226 spinlock_t lock; /* lock for modyfying state */
214 struct iommu_domain *domain; 227 struct exynos_iommu_domain *domain; /* domain we belong to */
215 phys_addr_t pgtable; 228 struct list_head domain_node; /* node for domain clients list */
229 struct list_head owner_node; /* node for owner controllers list */
230 phys_addr_t pgtable; /* assigned page table structure */
231 unsigned int version; /* our version */
216}; 232};
217 233
218static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom) 234static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom)
@@ -244,11 +260,6 @@ static void sysmmu_unblock(void __iomem *sfrbase)
244 __raw_writel(CTRL_ENABLE, sfrbase + REG_MMU_CTRL); 260 __raw_writel(CTRL_ENABLE, sfrbase + REG_MMU_CTRL);
245} 261}
246 262
247static unsigned int __raw_sysmmu_version(struct sysmmu_drvdata *data)
248{
249 return MMU_RAW_VER(__raw_readl(data->sfrbase + REG_MMU_VERSION));
250}
251
252static bool sysmmu_block(void __iomem *sfrbase) 263static bool sysmmu_block(void __iomem *sfrbase)
253{ 264{
254 int i = 120; 265 int i = 120;
@@ -345,7 +356,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
345 show_fault_information(dev_name(data->sysmmu), 356 show_fault_information(dev_name(data->sysmmu),
346 itype, base, addr); 357 itype, base, addr);
347 if (data->domain) 358 if (data->domain)
348 ret = report_iommu_fault(data->domain, 359 ret = report_iommu_fault(&data->domain->domain,
349 data->master, addr, itype); 360 data->master, addr, itype);
350 } 361 }
351 362
@@ -408,7 +419,7 @@ static void __sysmmu_init_config(struct sysmmu_drvdata *data)
408 unsigned int cfg = CFG_LRU | CFG_QOS(15); 419 unsigned int cfg = CFG_LRU | CFG_QOS(15);
409 unsigned int ver; 420 unsigned int ver;
410 421
411 ver = __raw_sysmmu_version(data); 422 ver = MMU_RAW_VER(__raw_readl(data->sfrbase + REG_MMU_VERSION));
412 if (MMU_MAJ_VER(ver) == 3) { 423 if (MMU_MAJ_VER(ver) == 3) {
413 if (MMU_MIN_VER(ver) >= 2) { 424 if (MMU_MIN_VER(ver) >= 2) {
414 cfg |= CFG_FLPDCACHE; 425 cfg |= CFG_FLPDCACHE;
@@ -422,6 +433,7 @@ static void __sysmmu_init_config(struct sysmmu_drvdata *data)
422 } 433 }
423 434
424 __raw_writel(cfg, data->sfrbase + REG_MMU_CFG); 435 __raw_writel(cfg, data->sfrbase + REG_MMU_CFG);
436 data->version = ver;
425} 437}
426 438
427static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data) 439static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
@@ -442,8 +454,8 @@ static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
442 clk_disable(data->clk_master); 454 clk_disable(data->clk_master);
443} 455}
444 456
445static int __sysmmu_enable(struct sysmmu_drvdata *data, 457static int __sysmmu_enable(struct sysmmu_drvdata *data, phys_addr_t pgtable,
446 phys_addr_t pgtable, struct iommu_domain *domain) 458 struct exynos_iommu_domain *domain)
447{ 459{
448 int ret = 0; 460 int ret = 0;
449 unsigned long flags; 461 unsigned long flags;
@@ -470,77 +482,17 @@ static int __sysmmu_enable(struct sysmmu_drvdata *data,
470 return ret; 482 return ret;
471} 483}
472 484
473/* __exynos_sysmmu_enable: Enables System MMU
474 *
475 * returns -error if an error occurred and System MMU is not enabled,
476 * 0 if the System MMU has been just enabled and 1 if System MMU was already
477 * enabled before.
478 */
479static int __exynos_sysmmu_enable(struct device *dev, phys_addr_t pgtable,
480 struct iommu_domain *domain)
481{
482 int ret = 0;
483 unsigned long flags;
484 struct exynos_iommu_owner *owner = dev->archdata.iommu;
485 struct sysmmu_drvdata *data;
486
487 BUG_ON(!has_sysmmu(dev));
488
489 spin_lock_irqsave(&owner->lock, flags);
490
491 data = dev_get_drvdata(owner->sysmmu);
492
493 ret = __sysmmu_enable(data, pgtable, domain);
494 if (ret >= 0)
495 data->master = dev;
496
497 spin_unlock_irqrestore(&owner->lock, flags);
498
499 return ret;
500}
501
502int exynos_sysmmu_enable(struct device *dev, phys_addr_t pgtable)
503{
504 BUG_ON(!memblock_is_memory(pgtable));
505
506 return __exynos_sysmmu_enable(dev, pgtable, NULL);
507}
508
509static bool exynos_sysmmu_disable(struct device *dev)
510{
511 unsigned long flags;
512 bool disabled = true;
513 struct exynos_iommu_owner *owner = dev->archdata.iommu;
514 struct sysmmu_drvdata *data;
515
516 BUG_ON(!has_sysmmu(dev));
517
518 spin_lock_irqsave(&owner->lock, flags);
519
520 data = dev_get_drvdata(owner->sysmmu);
521
522 disabled = __sysmmu_disable(data);
523 if (disabled)
524 data->master = NULL;
525
526 spin_unlock_irqrestore(&owner->lock, flags);
527
528 return disabled;
529}
530
531static void __sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, 485static void __sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
532 sysmmu_iova_t iova) 486 sysmmu_iova_t iova)
533{ 487{
534 if (__raw_sysmmu_version(data) == MAKE_MMU_VER(3, 3)) 488 if (data->version == MAKE_MMU_VER(3, 3))
535 __raw_writel(iova | 0x1, data->sfrbase + REG_MMU_FLUSH_ENTRY); 489 __raw_writel(iova | 0x1, data->sfrbase + REG_MMU_FLUSH_ENTRY);
536} 490}
537 491
538static void sysmmu_tlb_invalidate_flpdcache(struct device *dev, 492static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
539 sysmmu_iova_t iova) 493 sysmmu_iova_t iova)
540{ 494{
541 unsigned long flags; 495 unsigned long flags;
542 struct exynos_iommu_owner *owner = dev->archdata.iommu;
543 struct sysmmu_drvdata *data = dev_get_drvdata(owner->sysmmu);
544 496
545 if (!IS_ERR(data->clk_master)) 497 if (!IS_ERR(data->clk_master))
546 clk_enable(data->clk_master); 498 clk_enable(data->clk_master);
@@ -554,14 +506,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct device *dev,
554 clk_disable(data->clk_master); 506 clk_disable(data->clk_master);
555} 507}
556 508
557static void sysmmu_tlb_invalidate_entry(struct device *dev, sysmmu_iova_t iova, 509static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
558 size_t size) 510 sysmmu_iova_t iova, size_t size)
559{ 511{
560 struct exynos_iommu_owner *owner = dev->archdata.iommu;
561 unsigned long flags; 512 unsigned long flags;
562 struct sysmmu_drvdata *data;
563
564 data = dev_get_drvdata(owner->sysmmu);
565 513
566 spin_lock_irqsave(&data->lock, flags); 514 spin_lock_irqsave(&data->lock, flags);
567 if (is_sysmmu_active(data)) { 515 if (is_sysmmu_active(data)) {
@@ -580,7 +528,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, sysmmu_iova_t iova,
580 * 1MB page can be cached in one of all sets. 528 * 1MB page can be cached in one of all sets.
581 * 64KB page can be one of 16 consecutive sets. 529 * 64KB page can be one of 16 consecutive sets.
582 */ 530 */
583 if (MMU_MAJ_VER(__raw_sysmmu_version(data)) == 2) 531 if (MMU_MAJ_VER(data->version) == 2)
584 num_inv = min_t(unsigned int, size / PAGE_SIZE, 64); 532 num_inv = min_t(unsigned int, size / PAGE_SIZE, 64);
585 533
586 if (sysmmu_block(data->sfrbase)) { 534 if (sysmmu_block(data->sfrbase)) {
@@ -591,32 +539,8 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, sysmmu_iova_t iova,
591 if (!IS_ERR(data->clk_master)) 539 if (!IS_ERR(data->clk_master))
592 clk_disable(data->clk_master); 540 clk_disable(data->clk_master);
593 } else { 541 } else {
594 dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#x\n", 542 dev_dbg(data->master,
595 iova); 543 "disabled. Skipping TLB invalidation @ %#x\n", iova);
596 }
597 spin_unlock_irqrestore(&data->lock, flags);
598}
599
600void exynos_sysmmu_tlb_invalidate(struct device *dev)
601{
602 struct exynos_iommu_owner *owner = dev->archdata.iommu;
603 unsigned long flags;
604 struct sysmmu_drvdata *data;
605
606 data = dev_get_drvdata(owner->sysmmu);
607
608 spin_lock_irqsave(&data->lock, flags);
609 if (is_sysmmu_active(data)) {
610 if (!IS_ERR(data->clk_master))
611 clk_enable(data->clk_master);
612 if (sysmmu_block(data->sfrbase)) {
613 __sysmmu_tlb_invalidate(data->sfrbase);
614 sysmmu_unblock(data->sfrbase);
615 }
616 if (!IS_ERR(data->clk_master))
617 clk_disable(data->clk_master);
618 } else {
619 dev_dbg(dev, "disabled. Skipping TLB invalidation\n");
620 } 544 }
621 spin_unlock_irqrestore(&data->lock, flags); 545 spin_unlock_irqrestore(&data->lock, flags);
622} 546}
@@ -682,6 +606,36 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
682 return 0; 606 return 0;
683} 607}
684 608
609#ifdef CONFIG_PM_SLEEP
610static int exynos_sysmmu_suspend(struct device *dev)
611{
612 struct sysmmu_drvdata *data = dev_get_drvdata(dev);
613
614 dev_dbg(dev, "suspend\n");
615 if (is_sysmmu_active(data)) {
616 __sysmmu_disable_nocount(data);
617 pm_runtime_put(dev);
618 }
619 return 0;
620}
621
622static int exynos_sysmmu_resume(struct device *dev)
623{
624 struct sysmmu_drvdata *data = dev_get_drvdata(dev);
625
626 dev_dbg(dev, "resume\n");
627 if (is_sysmmu_active(data)) {
628 pm_runtime_get_sync(dev);
629 __sysmmu_enable_nocount(data);
630 }
631 return 0;
632}
633#endif
634
635static const struct dev_pm_ops sysmmu_pm_ops = {
636 SET_LATE_SYSTEM_SLEEP_PM_OPS(exynos_sysmmu_suspend, exynos_sysmmu_resume)
637};
638
685static const struct of_device_id sysmmu_of_match[] __initconst = { 639static const struct of_device_id sysmmu_of_match[] __initconst = {
686 { .compatible = "samsung,exynos-sysmmu", }, 640 { .compatible = "samsung,exynos-sysmmu", },
687 { }, 641 { },
@@ -692,6 +646,7 @@ static struct platform_driver exynos_sysmmu_driver __refdata = {
692 .driver = { 646 .driver = {
693 .name = "exynos-sysmmu", 647 .name = "exynos-sysmmu",
694 .of_match_table = sysmmu_of_match, 648 .of_match_table = sysmmu_of_match,
649 .pm = &sysmmu_pm_ops,
695 } 650 }
696}; 651};
697 652
@@ -704,104 +659,108 @@ static inline void pgtable_flush(void *vastart, void *vaend)
704 659
705static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type) 660static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
706{ 661{
707 struct exynos_iommu_domain *exynos_domain; 662 struct exynos_iommu_domain *domain;
708 int i; 663 int i;
709 664
710 if (type != IOMMU_DOMAIN_UNMANAGED) 665 if (type != IOMMU_DOMAIN_UNMANAGED)
711 return NULL; 666 return NULL;
712 667
713 exynos_domain = kzalloc(sizeof(*exynos_domain), GFP_KERNEL); 668 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
714 if (!exynos_domain) 669 if (!domain)
715 return NULL; 670 return NULL;
716 671
717 exynos_domain->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2); 672 domain->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2);
718 if (!exynos_domain->pgtable) 673 if (!domain->pgtable)
719 goto err_pgtable; 674 goto err_pgtable;
720 675
721 exynos_domain->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); 676 domain->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
722 if (!exynos_domain->lv2entcnt) 677 if (!domain->lv2entcnt)
723 goto err_counter; 678 goto err_counter;
724 679
725 /* Workaround for System MMU v3.3 to prevent caching 1MiB mapping */ 680 /* Workaround for System MMU v3.3 to prevent caching 1MiB mapping */
726 for (i = 0; i < NUM_LV1ENTRIES; i += 8) { 681 for (i = 0; i < NUM_LV1ENTRIES; i += 8) {
727 exynos_domain->pgtable[i + 0] = ZERO_LV2LINK; 682 domain->pgtable[i + 0] = ZERO_LV2LINK;
728 exynos_domain->pgtable[i + 1] = ZERO_LV2LINK; 683 domain->pgtable[i + 1] = ZERO_LV2LINK;
729 exynos_domain->pgtable[i + 2] = ZERO_LV2LINK; 684 domain->pgtable[i + 2] = ZERO_LV2LINK;
730 exynos_domain->pgtable[i + 3] = ZERO_LV2LINK; 685 domain->pgtable[i + 3] = ZERO_LV2LINK;
731 exynos_domain->pgtable[i + 4] = ZERO_LV2LINK; 686 domain->pgtable[i + 4] = ZERO_LV2LINK;
732 exynos_domain->pgtable[i + 5] = ZERO_LV2LINK; 687 domain->pgtable[i + 5] = ZERO_LV2LINK;
733 exynos_domain->pgtable[i + 6] = ZERO_LV2LINK; 688 domain->pgtable[i + 6] = ZERO_LV2LINK;
734 exynos_domain->pgtable[i + 7] = ZERO_LV2LINK; 689 domain->pgtable[i + 7] = ZERO_LV2LINK;
735 } 690 }
736 691
737 pgtable_flush(exynos_domain->pgtable, exynos_domain->pgtable + NUM_LV1ENTRIES); 692 pgtable_flush(domain->pgtable, domain->pgtable + NUM_LV1ENTRIES);
738 693
739 spin_lock_init(&exynos_domain->lock); 694 spin_lock_init(&domain->lock);
740 spin_lock_init(&exynos_domain->pgtablelock); 695 spin_lock_init(&domain->pgtablelock);
741 INIT_LIST_HEAD(&exynos_domain->clients); 696 INIT_LIST_HEAD(&domain->clients);
742 697
743 exynos_domain->domain.geometry.aperture_start = 0; 698 domain->domain.geometry.aperture_start = 0;
744 exynos_domain->domain.geometry.aperture_end = ~0UL; 699 domain->domain.geometry.aperture_end = ~0UL;
745 exynos_domain->domain.geometry.force_aperture = true; 700 domain->domain.geometry.force_aperture = true;
746 701
747 return &exynos_domain->domain; 702 return &domain->domain;
748 703
749err_counter: 704err_counter:
750 free_pages((unsigned long)exynos_domain->pgtable, 2); 705 free_pages((unsigned long)domain->pgtable, 2);
751err_pgtable: 706err_pgtable:
752 kfree(exynos_domain); 707 kfree(domain);
753 return NULL; 708 return NULL;
754} 709}
755 710
756static void exynos_iommu_domain_free(struct iommu_domain *domain) 711static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
757{ 712{
758 struct exynos_iommu_domain *priv = to_exynos_domain(domain); 713 struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
759 struct exynos_iommu_owner *owner; 714 struct sysmmu_drvdata *data, *next;
760 unsigned long flags; 715 unsigned long flags;
761 int i; 716 int i;
762 717
763 WARN_ON(!list_empty(&priv->clients)); 718 WARN_ON(!list_empty(&domain->clients));
764 719
765 spin_lock_irqsave(&priv->lock, flags); 720 spin_lock_irqsave(&domain->lock, flags);
766 721
767 list_for_each_entry(owner, &priv->clients, client) { 722 list_for_each_entry_safe(data, next, &domain->clients, domain_node) {
768 while (!exynos_sysmmu_disable(owner->dev)) 723 if (__sysmmu_disable(data))
769 ; /* until System MMU is actually disabled */ 724 data->master = NULL;
725 list_del_init(&data->domain_node);
770 } 726 }
771 727
772 while (!list_empty(&priv->clients)) 728 spin_unlock_irqrestore(&domain->lock, flags);
773 list_del_init(priv->clients.next);
774
775 spin_unlock_irqrestore(&priv->lock, flags);
776 729
777 for (i = 0; i < NUM_LV1ENTRIES; i++) 730 for (i = 0; i < NUM_LV1ENTRIES; i++)
778 if (lv1ent_page(priv->pgtable + i)) 731 if (lv1ent_page(domain->pgtable + i))
779 kmem_cache_free(lv2table_kmem_cache, 732 kmem_cache_free(lv2table_kmem_cache,
780 phys_to_virt(lv2table_base(priv->pgtable + i))); 733 phys_to_virt(lv2table_base(domain->pgtable + i)));
781 734
782 free_pages((unsigned long)priv->pgtable, 2); 735 free_pages((unsigned long)domain->pgtable, 2);
783 free_pages((unsigned long)priv->lv2entcnt, 1); 736 free_pages((unsigned long)domain->lv2entcnt, 1);
784 kfree(priv); 737 kfree(domain);
785} 738}
786 739
787static int exynos_iommu_attach_device(struct iommu_domain *domain, 740static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
788 struct device *dev) 741 struct device *dev)
789{ 742{
790 struct exynos_iommu_owner *owner = dev->archdata.iommu; 743 struct exynos_iommu_owner *owner = dev->archdata.iommu;
791 struct exynos_iommu_domain *priv = to_exynos_domain(domain); 744 struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
792 phys_addr_t pagetable = virt_to_phys(priv->pgtable); 745 struct sysmmu_drvdata *data;
746 phys_addr_t pagetable = virt_to_phys(domain->pgtable);
793 unsigned long flags; 747 unsigned long flags;
794 int ret; 748 int ret = -ENODEV;
795 749
796 spin_lock_irqsave(&priv->lock, flags); 750 if (!has_sysmmu(dev))
751 return -ENODEV;
797 752
798 ret = __exynos_sysmmu_enable(dev, pagetable, domain); 753 list_for_each_entry(data, &owner->controllers, owner_node) {
799 if (ret == 0) { 754 pm_runtime_get_sync(data->sysmmu);
800 list_add_tail(&owner->client, &priv->clients); 755 ret = __sysmmu_enable(data, pagetable, domain);
801 owner->domain = domain; 756 if (ret >= 0) {
802 } 757 data->master = dev;
803 758
804 spin_unlock_irqrestore(&priv->lock, flags); 759 spin_lock_irqsave(&domain->lock, flags);
760 list_add_tail(&data->domain_node, &domain->clients);
761 spin_unlock_irqrestore(&domain->lock, flags);
762 }
763 }
805 764
806 if (ret < 0) { 765 if (ret < 0) {
807 dev_err(dev, "%s: Failed to attach IOMMU with pgtable %pa\n", 766 dev_err(dev, "%s: Failed to attach IOMMU with pgtable %pa\n",
@@ -815,36 +774,39 @@ static int exynos_iommu_attach_device(struct iommu_domain *domain,
815 return ret; 774 return ret;
816} 775}
817 776
818static void exynos_iommu_detach_device(struct iommu_domain *domain, 777static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
819 struct device *dev) 778 struct device *dev)
820{ 779{
821 struct exynos_iommu_owner *owner; 780 struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
822 struct exynos_iommu_domain *priv = to_exynos_domain(domain); 781 phys_addr_t pagetable = virt_to_phys(domain->pgtable);
823 phys_addr_t pagetable = virt_to_phys(priv->pgtable); 782 struct sysmmu_drvdata *data, *next;
824 unsigned long flags; 783 unsigned long flags;
784 bool found = false;
825 785
826 spin_lock_irqsave(&priv->lock, flags); 786 if (!has_sysmmu(dev))
787 return;
827 788
828 list_for_each_entry(owner, &priv->clients, client) { 789 spin_lock_irqsave(&domain->lock, flags);
829 if (owner == dev->archdata.iommu) { 790 list_for_each_entry_safe(data, next, &domain->clients, domain_node) {
830 if (exynos_sysmmu_disable(dev)) { 791 if (data->master == dev) {
831 list_del_init(&owner->client); 792 if (__sysmmu_disable(data)) {
832 owner->domain = NULL; 793 data->master = NULL;
794 list_del_init(&data->domain_node);
833 } 795 }
834 break; 796 pm_runtime_put(data->sysmmu);
797 found = true;
835 } 798 }
836 } 799 }
800 spin_unlock_irqrestore(&domain->lock, flags);
837 801
838 spin_unlock_irqrestore(&priv->lock, flags); 802 if (found)
839
840 if (owner == dev->archdata.iommu)
841 dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n", 803 dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n",
842 __func__, &pagetable); 804 __func__, &pagetable);
843 else 805 else
844 dev_err(dev, "%s: No IOMMU is attached\n", __func__); 806 dev_err(dev, "%s: No IOMMU is attached\n", __func__);
845} 807}
846 808
847static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *priv, 809static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *domain,
848 sysmmu_pte_t *sent, sysmmu_iova_t iova, short *pgcounter) 810 sysmmu_pte_t *sent, sysmmu_iova_t iova, short *pgcounter)
849{ 811{
850 if (lv1ent_section(sent)) { 812 if (lv1ent_section(sent)) {
@@ -862,6 +824,7 @@ static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *priv,
862 return ERR_PTR(-ENOMEM); 824 return ERR_PTR(-ENOMEM);
863 825
864 *sent = mk_lv1ent_page(virt_to_phys(pent)); 826 *sent = mk_lv1ent_page(virt_to_phys(pent));
827 kmemleak_ignore(pent);
865 *pgcounter = NUM_LV2ENTRIES; 828 *pgcounter = NUM_LV2ENTRIES;
866 pgtable_flush(pent, pent + NUM_LV2ENTRIES); 829 pgtable_flush(pent, pent + NUM_LV2ENTRIES);
867 pgtable_flush(sent, sent + 1); 830 pgtable_flush(sent, sent + 1);
@@ -884,20 +847,19 @@ static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *priv,
884 * not currently mapped. 847 * not currently mapped.
885 */ 848 */
886 if (need_flush_flpd_cache) { 849 if (need_flush_flpd_cache) {
887 struct exynos_iommu_owner *owner; 850 struct sysmmu_drvdata *data;
888 851
889 spin_lock(&priv->lock); 852 spin_lock(&domain->lock);
890 list_for_each_entry(owner, &priv->clients, client) 853 list_for_each_entry(data, &domain->clients, domain_node)
891 sysmmu_tlb_invalidate_flpdcache( 854 sysmmu_tlb_invalidate_flpdcache(data, iova);
892 owner->dev, iova); 855 spin_unlock(&domain->lock);
893 spin_unlock(&priv->lock);
894 } 856 }
895 } 857 }
896 858
897 return page_entry(sent, iova); 859 return page_entry(sent, iova);
898} 860}
899 861
900static int lv1set_section(struct exynos_iommu_domain *priv, 862static int lv1set_section(struct exynos_iommu_domain *domain,
901 sysmmu_pte_t *sent, sysmmu_iova_t iova, 863 sysmmu_pte_t *sent, sysmmu_iova_t iova,
902 phys_addr_t paddr, short *pgcnt) 864 phys_addr_t paddr, short *pgcnt)
903{ 865{
@@ -922,17 +884,17 @@ static int lv1set_section(struct exynos_iommu_domain *priv,
922 884
923 pgtable_flush(sent, sent + 1); 885 pgtable_flush(sent, sent + 1);
924 886
925 spin_lock(&priv->lock); 887 spin_lock(&domain->lock);
926 if (lv1ent_page_zero(sent)) { 888 if (lv1ent_page_zero(sent)) {
927 struct exynos_iommu_owner *owner; 889 struct sysmmu_drvdata *data;
928 /* 890 /*
929 * Flushing FLPD cache in System MMU v3.3 that may cache a FLPD 891 * Flushing FLPD cache in System MMU v3.3 that may cache a FLPD
930 * entry by speculative prefetch of SLPD which has no mapping. 892 * entry by speculative prefetch of SLPD which has no mapping.
931 */ 893 */
932 list_for_each_entry(owner, &priv->clients, client) 894 list_for_each_entry(data, &domain->clients, domain_node)
933 sysmmu_tlb_invalidate_flpdcache(owner->dev, iova); 895 sysmmu_tlb_invalidate_flpdcache(data, iova);
934 } 896 }
935 spin_unlock(&priv->lock); 897 spin_unlock(&domain->lock);
936 898
937 return 0; 899 return 0;
938} 900}
@@ -992,74 +954,75 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,
992 * than or equal to 128KiB. 954 * than or equal to 128KiB.
993 * - Start address of an I/O virtual region must be aligned by 128KiB. 955 * - Start address of an I/O virtual region must be aligned by 128KiB.
994 */ 956 */
995static int exynos_iommu_map(struct iommu_domain *domain, unsigned long l_iova, 957static int exynos_iommu_map(struct iommu_domain *iommu_domain,
996 phys_addr_t paddr, size_t size, int prot) 958 unsigned long l_iova, phys_addr_t paddr, size_t size,
959 int prot)
997{ 960{
998 struct exynos_iommu_domain *priv = to_exynos_domain(domain); 961 struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
999 sysmmu_pte_t *entry; 962 sysmmu_pte_t *entry;
1000 sysmmu_iova_t iova = (sysmmu_iova_t)l_iova; 963 sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
1001 unsigned long flags; 964 unsigned long flags;
1002 int ret = -ENOMEM; 965 int ret = -ENOMEM;
1003 966
1004 BUG_ON(priv->pgtable == NULL); 967 BUG_ON(domain->pgtable == NULL);
1005 968
1006 spin_lock_irqsave(&priv->pgtablelock, flags); 969 spin_lock_irqsave(&domain->pgtablelock, flags);
1007 970
1008 entry = section_entry(priv->pgtable, iova); 971 entry = section_entry(domain->pgtable, iova);
1009 972
1010 if (size == SECT_SIZE) { 973 if (size == SECT_SIZE) {
1011 ret = lv1set_section(priv, entry, iova, paddr, 974 ret = lv1set_section(domain, entry, iova, paddr,
1012 &priv->lv2entcnt[lv1ent_offset(iova)]); 975 &domain->lv2entcnt[lv1ent_offset(iova)]);
1013 } else { 976 } else {
1014 sysmmu_pte_t *pent; 977 sysmmu_pte_t *pent;
1015 978
1016 pent = alloc_lv2entry(priv, entry, iova, 979 pent = alloc_lv2entry(domain, entry, iova,
1017 &priv->lv2entcnt[lv1ent_offset(iova)]); 980 &domain->lv2entcnt[lv1ent_offset(iova)]);
1018 981
1019 if (IS_ERR(pent)) 982 if (IS_ERR(pent))
1020 ret = PTR_ERR(pent); 983 ret = PTR_ERR(pent);
1021 else 984 else
1022 ret = lv2set_page(pent, paddr, size, 985 ret = lv2set_page(pent, paddr, size,
1023 &priv->lv2entcnt[lv1ent_offset(iova)]); 986 &domain->lv2entcnt[lv1ent_offset(iova)]);
1024 } 987 }
1025 988
1026 if (ret) 989 if (ret)
1027 pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n", 990 pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n",
1028 __func__, ret, size, iova); 991 __func__, ret, size, iova);
1029 992
1030 spin_unlock_irqrestore(&priv->pgtablelock, flags); 993 spin_unlock_irqrestore(&domain->pgtablelock, flags);
1031 994
1032 return ret; 995 return ret;
1033} 996}
1034 997
1035static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *priv, 998static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain,
1036 sysmmu_iova_t iova, size_t size) 999 sysmmu_iova_t iova, size_t size)
1037{ 1000{
1038 struct exynos_iommu_owner *owner; 1001 struct sysmmu_drvdata *data;
1039 unsigned long flags; 1002 unsigned long flags;
1040 1003
1041 spin_lock_irqsave(&priv->lock, flags); 1004 spin_lock_irqsave(&domain->lock, flags);
1042 1005
1043 list_for_each_entry(owner, &priv->clients, client) 1006 list_for_each_entry(data, &domain->clients, domain_node)
1044 sysmmu_tlb_invalidate_entry(owner->dev, iova, size); 1007 sysmmu_tlb_invalidate_entry(data, iova, size);
1045 1008
1046 spin_unlock_irqrestore(&priv->lock, flags); 1009 spin_unlock_irqrestore(&domain->lock, flags);
1047} 1010}
1048 1011
1049static size_t exynos_iommu_unmap(struct iommu_domain *domain, 1012static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
1050 unsigned long l_iova, size_t size) 1013 unsigned long l_iova, size_t size)
1051{ 1014{
1052 struct exynos_iommu_domain *priv = to_exynos_domain(domain); 1015 struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
1053 sysmmu_iova_t iova = (sysmmu_iova_t)l_iova; 1016 sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
1054 sysmmu_pte_t *ent; 1017 sysmmu_pte_t *ent;
1055 size_t err_pgsize; 1018 size_t err_pgsize;
1056 unsigned long flags; 1019 unsigned long flags;
1057 1020
1058 BUG_ON(priv->pgtable == NULL); 1021 BUG_ON(domain->pgtable == NULL);
1059 1022
1060 spin_lock_irqsave(&priv->pgtablelock, flags); 1023 spin_lock_irqsave(&domain->pgtablelock, flags);
1061 1024
1062 ent = section_entry(priv->pgtable, iova); 1025 ent = section_entry(domain->pgtable, iova);
1063 1026
1064 if (lv1ent_section(ent)) { 1027 if (lv1ent_section(ent)) {
1065 if (WARN_ON(size < SECT_SIZE)) { 1028 if (WARN_ON(size < SECT_SIZE)) {
@@ -1093,7 +1056,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
1093 *ent = 0; 1056 *ent = 0;
1094 size = SPAGE_SIZE; 1057 size = SPAGE_SIZE;
1095 pgtable_flush(ent, ent + 1); 1058 pgtable_flush(ent, ent + 1);
1096 priv->lv2entcnt[lv1ent_offset(iova)] += 1; 1059 domain->lv2entcnt[lv1ent_offset(iova)] += 1;
1097 goto done; 1060 goto done;
1098 } 1061 }
1099 1062
@@ -1107,15 +1070,15 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
1107 pgtable_flush(ent, ent + SPAGES_PER_LPAGE); 1070 pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
1108 1071
1109 size = LPAGE_SIZE; 1072 size = LPAGE_SIZE;
1110 priv->lv2entcnt[lv1ent_offset(iova)] += SPAGES_PER_LPAGE; 1073 domain->lv2entcnt[lv1ent_offset(iova)] += SPAGES_PER_LPAGE;
1111done: 1074done:
1112 spin_unlock_irqrestore(&priv->pgtablelock, flags); 1075 spin_unlock_irqrestore(&domain->pgtablelock, flags);
1113 1076
1114 exynos_iommu_tlb_invalidate_entry(priv, iova, size); 1077 exynos_iommu_tlb_invalidate_entry(domain, iova, size);
1115 1078
1116 return size; 1079 return size;
1117err: 1080err:
1118 spin_unlock_irqrestore(&priv->pgtablelock, flags); 1081 spin_unlock_irqrestore(&domain->pgtablelock, flags);
1119 1082
1120 pr_err("%s: Failed: size(%#zx) @ %#x is smaller than page size %#zx\n", 1083 pr_err("%s: Failed: size(%#zx) @ %#x is smaller than page size %#zx\n",
1121 __func__, size, iova, err_pgsize); 1084 __func__, size, iova, err_pgsize);
@@ -1123,17 +1086,17 @@ err:
1123 return 0; 1086 return 0;
1124} 1087}
1125 1088
1126static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain, 1089static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
1127 dma_addr_t iova) 1090 dma_addr_t iova)
1128{ 1091{
1129 struct exynos_iommu_domain *priv = to_exynos_domain(domain); 1092 struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
1130 sysmmu_pte_t *entry; 1093 sysmmu_pte_t *entry;
1131 unsigned long flags; 1094 unsigned long flags;
1132 phys_addr_t phys = 0; 1095 phys_addr_t phys = 0;
1133 1096
1134 spin_lock_irqsave(&priv->pgtablelock, flags); 1097 spin_lock_irqsave(&domain->pgtablelock, flags);
1135 1098
1136 entry = section_entry(priv->pgtable, iova); 1099 entry = section_entry(domain->pgtable, iova);
1137 1100
1138 if (lv1ent_section(entry)) { 1101 if (lv1ent_section(entry)) {
1139 phys = section_phys(entry) + section_offs(iova); 1102 phys = section_phys(entry) + section_offs(iova);
@@ -1146,7 +1109,7 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,
1146 phys = spage_phys(entry) + spage_offs(iova); 1109 phys = spage_phys(entry) + spage_offs(iova);
1147 } 1110 }
1148 1111
1149 spin_unlock_irqrestore(&priv->pgtablelock, flags); 1112 spin_unlock_irqrestore(&domain->pgtablelock, flags);
1150 1113
1151 return phys; 1114 return phys;
1152} 1115}
@@ -1156,6 +1119,9 @@ static int exynos_iommu_add_device(struct device *dev)
1156 struct iommu_group *group; 1119 struct iommu_group *group;
1157 int ret; 1120 int ret;
1158 1121
1122 if (!has_sysmmu(dev))
1123 return -ENODEV;
1124
1159 group = iommu_group_get(dev); 1125 group = iommu_group_get(dev);
1160 1126
1161 if (!group) { 1127 if (!group) {
@@ -1174,10 +1140,40 @@ static int exynos_iommu_add_device(struct device *dev)
1174 1140
1175static void exynos_iommu_remove_device(struct device *dev) 1141static void exynos_iommu_remove_device(struct device *dev)
1176{ 1142{
1143 if (!has_sysmmu(dev))
1144 return;
1145
1177 iommu_group_remove_device(dev); 1146 iommu_group_remove_device(dev);
1178} 1147}
1179 1148
1180static const struct iommu_ops exynos_iommu_ops = { 1149static int exynos_iommu_of_xlate(struct device *dev,
1150 struct of_phandle_args *spec)
1151{
1152 struct exynos_iommu_owner *owner = dev->archdata.iommu;
1153 struct platform_device *sysmmu = of_find_device_by_node(spec->np);
1154 struct sysmmu_drvdata *data;
1155
1156 if (!sysmmu)
1157 return -ENODEV;
1158
1159 data = platform_get_drvdata(sysmmu);
1160 if (!data)
1161 return -ENODEV;
1162
1163 if (!owner) {
1164 owner = kzalloc(sizeof(*owner), GFP_KERNEL);
1165 if (!owner)
1166 return -ENOMEM;
1167
1168 INIT_LIST_HEAD(&owner->controllers);
1169 dev->archdata.iommu = owner;
1170 }
1171
1172 list_add_tail(&data->owner_node, &owner->controllers);
1173 return 0;
1174}
1175
1176static struct iommu_ops exynos_iommu_ops = {
1181 .domain_alloc = exynos_iommu_domain_alloc, 1177 .domain_alloc = exynos_iommu_domain_alloc,
1182 .domain_free = exynos_iommu_domain_free, 1178 .domain_free = exynos_iommu_domain_free,
1183 .attach_dev = exynos_iommu_attach_device, 1179 .attach_dev = exynos_iommu_attach_device,
@@ -1189,19 +1185,15 @@ static const struct iommu_ops exynos_iommu_ops = {
1189 .add_device = exynos_iommu_add_device, 1185 .add_device = exynos_iommu_add_device,
1190 .remove_device = exynos_iommu_remove_device, 1186 .remove_device = exynos_iommu_remove_device,
1191 .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, 1187 .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
1188 .of_xlate = exynos_iommu_of_xlate,
1192}; 1189};
1193 1190
1191static bool init_done;
1192
1194static int __init exynos_iommu_init(void) 1193static int __init exynos_iommu_init(void)
1195{ 1194{
1196 struct device_node *np;
1197 int ret; 1195 int ret;
1198 1196
1199 np = of_find_matching_node(NULL, sysmmu_of_match);
1200 if (!np)
1201 return 0;
1202
1203 of_node_put(np);
1204
1205 lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table", 1197 lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table",
1206 LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL); 1198 LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL);
1207 if (!lv2table_kmem_cache) { 1199 if (!lv2table_kmem_cache) {
@@ -1230,6 +1222,8 @@ static int __init exynos_iommu_init(void)
1230 goto err_set_iommu; 1222 goto err_set_iommu;
1231 } 1223 }
1232 1224
1225 init_done = true;
1226
1233 return 0; 1227 return 0;
1234err_set_iommu: 1228err_set_iommu:
1235 kmem_cache_free(lv2table_kmem_cache, zero_lv2_table); 1229 kmem_cache_free(lv2table_kmem_cache, zero_lv2_table);
@@ -1239,4 +1233,21 @@ err_reg_driver:
1239 kmem_cache_destroy(lv2table_kmem_cache); 1233 kmem_cache_destroy(lv2table_kmem_cache);
1240 return ret; 1234 return ret;
1241} 1235}
1242subsys_initcall(exynos_iommu_init); 1236
1237static int __init exynos_iommu_of_setup(struct device_node *np)
1238{
1239 struct platform_device *pdev;
1240
1241 if (!init_done)
1242 exynos_iommu_init();
1243
1244 pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root);
1245 if (IS_ERR(pdev))
1246 return PTR_ERR(pdev);
1247
1248 of_iommu_set_ops(np, &exynos_iommu_ops);
1249 return 0;
1250}
1251
1252IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu",
1253 exynos_iommu_of_setup);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5ecfaf29933a..a98a7b27aca1 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -15,8 +15,11 @@
15 * Shaohua Li <shaohua.li@intel.com>, 15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, 16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com> 17 * Fenghua Yu <fenghua.yu@intel.com>
18 * Joerg Roedel <jroedel@suse.de>
18 */ 19 */
19 20
21#define pr_fmt(fmt) "DMAR: " fmt
22
20#include <linux/init.h> 23#include <linux/init.h>
21#include <linux/bitmap.h> 24#include <linux/bitmap.h>
22#include <linux/debugfs.h> 25#include <linux/debugfs.h>
@@ -40,6 +43,7 @@
40#include <linux/pci-ats.h> 43#include <linux/pci-ats.h>
41#include <linux/memblock.h> 44#include <linux/memblock.h>
42#include <linux/dma-contiguous.h> 45#include <linux/dma-contiguous.h>
46#include <linux/crash_dump.h>
43#include <asm/irq_remapping.h> 47#include <asm/irq_remapping.h>
44#include <asm/cacheflush.h> 48#include <asm/cacheflush.h>
45#include <asm/iommu.h> 49#include <asm/iommu.h>
@@ -190,7 +194,29 @@ struct root_entry {
190}; 194};
191#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 195#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
192 196
197/*
198 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_lctp(struct root_entry *re)
202{
203 if (!(re->lo & 1))
204 return 0;
205
206 return re->lo & VTD_PAGE_MASK;
207}
208
209/*
210 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
211 * if marked present.
212 */
213static phys_addr_t root_entry_uctp(struct root_entry *re)
214{
215 if (!(re->hi & 1))
216 return 0;
193 217
218 return re->hi & VTD_PAGE_MASK;
219}
194/* 220/*
195 * low 64 bits: 221 * low 64 bits:
196 * 0: present 222 * 0: present
@@ -207,10 +233,38 @@ struct context_entry {
207 u64 hi; 233 u64 hi;
208}; 234};
209 235
210static inline bool context_present(struct context_entry *context) 236static inline void context_clear_pasid_enable(struct context_entry *context)
237{
238 context->lo &= ~(1ULL << 11);
239}
240
241static inline bool context_pasid_enabled(struct context_entry *context)
242{
243 return !!(context->lo & (1ULL << 11));
244}
245
246static inline void context_set_copied(struct context_entry *context)
247{
248 context->hi |= (1ull << 3);
249}
250
251static inline bool context_copied(struct context_entry *context)
252{
253 return !!(context->hi & (1ULL << 3));
254}
255
256static inline bool __context_present(struct context_entry *context)
211{ 257{
212 return (context->lo & 1); 258 return (context->lo & 1);
213} 259}
260
261static inline bool context_present(struct context_entry *context)
262{
263 return context_pasid_enabled(context) ?
264 __context_present(context) :
265 __context_present(context) && !context_copied(context);
266}
267
214static inline void context_set_present(struct context_entry *context) 268static inline void context_set_present(struct context_entry *context)
215{ 269{
216 context->lo |= 1; 270 context->lo |= 1;
@@ -247,6 +301,11 @@ static inline void context_set_domain_id(struct context_entry *context,
247 context->hi |= (value & ((1 << 16) - 1)) << 8; 301 context->hi |= (value & ((1 << 16) - 1)) << 8;
248} 302}
249 303
304static inline int context_domain_id(struct context_entry *c)
305{
306 return((c->hi >> 8) & 0xffff);
307}
308
250static inline void context_clear_entry(struct context_entry *context) 309static inline void context_clear_entry(struct context_entry *context)
251{ 310{
252 context->lo = 0; 311 context->lo = 0;
@@ -440,6 +499,25 @@ static LIST_HEAD(device_domain_list);
440 499
441static const struct iommu_ops intel_iommu_ops; 500static const struct iommu_ops intel_iommu_ops;
442 501
502static bool translation_pre_enabled(struct intel_iommu *iommu)
503{
504 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
505}
506
507static void clear_translation_pre_enabled(struct intel_iommu *iommu)
508{
509 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
510}
511
512static void init_translation_status(struct intel_iommu *iommu)
513{
514 u32 gsts;
515
516 gsts = readl(iommu->reg + DMAR_GSTS_REG);
517 if (gsts & DMA_GSTS_TES)
518 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
519}
520
443/* Convert generic 'struct iommu_domain to private struct dmar_domain */ 521/* Convert generic 'struct iommu_domain to private struct dmar_domain */
444static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) 522static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
445{ 523{
@@ -453,25 +531,21 @@ static int __init intel_iommu_setup(char *str)
453 while (*str) { 531 while (*str) {
454 if (!strncmp(str, "on", 2)) { 532 if (!strncmp(str, "on", 2)) {
455 dmar_disabled = 0; 533 dmar_disabled = 0;
456 printk(KERN_INFO "Intel-IOMMU: enabled\n"); 534 pr_info("IOMMU enabled\n");
457 } else if (!strncmp(str, "off", 3)) { 535 } else if (!strncmp(str, "off", 3)) {
458 dmar_disabled = 1; 536 dmar_disabled = 1;
459 printk(KERN_INFO "Intel-IOMMU: disabled\n"); 537 pr_info("IOMMU disabled\n");
460 } else if (!strncmp(str, "igfx_off", 8)) { 538 } else if (!strncmp(str, "igfx_off", 8)) {
461 dmar_map_gfx = 0; 539 dmar_map_gfx = 0;
462 printk(KERN_INFO 540 pr_info("Disable GFX device mapping\n");
463 "Intel-IOMMU: disable GFX device mapping\n");
464 } else if (!strncmp(str, "forcedac", 8)) { 541 } else if (!strncmp(str, "forcedac", 8)) {
465 printk(KERN_INFO 542 pr_info("Forcing DAC for PCI devices\n");
466 "Intel-IOMMU: Forcing DAC for PCI devices\n");
467 dmar_forcedac = 1; 543 dmar_forcedac = 1;
468 } else if (!strncmp(str, "strict", 6)) { 544 } else if (!strncmp(str, "strict", 6)) {
469 printk(KERN_INFO 545 pr_info("Disable batched IOTLB flush\n");
470 "Intel-IOMMU: disable batched IOTLB flush\n");
471 intel_iommu_strict = 1; 546 intel_iommu_strict = 1;
472 } else if (!strncmp(str, "sp_off", 6)) { 547 } else if (!strncmp(str, "sp_off", 6)) {
473 printk(KERN_INFO 548 pr_info("Disable supported super page\n");
474 "Intel-IOMMU: disable supported super page\n");
475 intel_iommu_superpage = 0; 549 intel_iommu_superpage = 0;
476 } else if (!strncmp(str, "ecs_off", 7)) { 550 } else if (!strncmp(str, "ecs_off", 7)) {
477 printk(KERN_INFO 551 printk(KERN_INFO
@@ -1132,7 +1206,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1132 1206
1133 root = (struct root_entry *)alloc_pgtable_page(iommu->node); 1207 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1134 if (!root) { 1208 if (!root) {
1135 pr_err("IOMMU: allocating root entry for %s failed\n", 1209 pr_err("Allocating root entry for %s failed\n",
1136 iommu->name); 1210 iommu->name);
1137 return -ENOMEM; 1211 return -ENOMEM;
1138 } 1212 }
@@ -1270,9 +1344,9 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1270 1344
1271 /* check IOTLB invalidation granularity */ 1345 /* check IOTLB invalidation granularity */
1272 if (DMA_TLB_IAIG(val) == 0) 1346 if (DMA_TLB_IAIG(val) == 0)
1273 printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); 1347 pr_err("Flush IOTLB failed\n");
1274 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 1348 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1275 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 1349 pr_debug("TLB flush request %Lx, actual %Lx\n",
1276 (unsigned long long)DMA_TLB_IIRG(type), 1350 (unsigned long long)DMA_TLB_IIRG(type),
1277 (unsigned long long)DMA_TLB_IAIG(val)); 1351 (unsigned long long)DMA_TLB_IAIG(val));
1278} 1352}
@@ -1443,8 +1517,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
1443 unsigned long nlongs; 1517 unsigned long nlongs;
1444 1518
1445 ndomains = cap_ndoms(iommu->cap); 1519 ndomains = cap_ndoms(iommu->cap);
1446 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n", 1520 pr_debug("%s: Number of Domains supported <%ld>\n",
1447 iommu->seq_id, ndomains); 1521 iommu->name, ndomains);
1448 nlongs = BITS_TO_LONGS(ndomains); 1522 nlongs = BITS_TO_LONGS(ndomains);
1449 1523
1450 spin_lock_init(&iommu->lock); 1524 spin_lock_init(&iommu->lock);
@@ -1454,15 +1528,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
1454 */ 1528 */
1455 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); 1529 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1456 if (!iommu->domain_ids) { 1530 if (!iommu->domain_ids) {
1457 pr_err("IOMMU%d: allocating domain id array failed\n", 1531 pr_err("%s: Allocating domain id array failed\n",
1458 iommu->seq_id); 1532 iommu->name);
1459 return -ENOMEM; 1533 return -ENOMEM;
1460 } 1534 }
1461 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), 1535 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1462 GFP_KERNEL); 1536 GFP_KERNEL);
1463 if (!iommu->domains) { 1537 if (!iommu->domains) {
1464 pr_err("IOMMU%d: allocating domain array failed\n", 1538 pr_err("%s: Allocating domain array failed\n",
1465 iommu->seq_id); 1539 iommu->name);
1466 kfree(iommu->domain_ids); 1540 kfree(iommu->domain_ids);
1467 iommu->domain_ids = NULL; 1541 iommu->domain_ids = NULL;
1468 return -ENOMEM; 1542 return -ENOMEM;
@@ -1567,7 +1641,7 @@ static int iommu_attach_domain(struct dmar_domain *domain,
1567 num = __iommu_attach_domain(domain, iommu); 1641 num = __iommu_attach_domain(domain, iommu);
1568 spin_unlock_irqrestore(&iommu->lock, flags); 1642 spin_unlock_irqrestore(&iommu->lock, flags);
1569 if (num < 0) 1643 if (num < 0)
1570 pr_err("IOMMU: no free domain ids\n"); 1644 pr_err("%s: No free domain ids\n", iommu->name);
1571 1645
1572 return num; 1646 return num;
1573} 1647}
@@ -1659,7 +1733,7 @@ static int dmar_init_reserved_ranges(void)
1659 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), 1733 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1660 IOVA_PFN(IOAPIC_RANGE_END)); 1734 IOVA_PFN(IOAPIC_RANGE_END));
1661 if (!iova) { 1735 if (!iova) {
1662 printk(KERN_ERR "Reserve IOAPIC range failed\n"); 1736 pr_err("Reserve IOAPIC range failed\n");
1663 return -ENODEV; 1737 return -ENODEV;
1664 } 1738 }
1665 1739
@@ -1675,7 +1749,7 @@ static int dmar_init_reserved_ranges(void)
1675 IOVA_PFN(r->start), 1749 IOVA_PFN(r->start),
1676 IOVA_PFN(r->end)); 1750 IOVA_PFN(r->end));
1677 if (!iova) { 1751 if (!iova) {
1678 printk(KERN_ERR "Reserve iova failed\n"); 1752 pr_err("Reserve iova failed\n");
1679 return -ENODEV; 1753 return -ENODEV;
1680 } 1754 }
1681 } 1755 }
@@ -1722,7 +1796,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1722 sagaw = cap_sagaw(iommu->cap); 1796 sagaw = cap_sagaw(iommu->cap);
1723 if (!test_bit(agaw, &sagaw)) { 1797 if (!test_bit(agaw, &sagaw)) {
1724 /* hardware doesn't support it, choose a bigger one */ 1798 /* hardware doesn't support it, choose a bigger one */
1725 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); 1799 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1726 agaw = find_next_bit(&sagaw, 5, agaw); 1800 agaw = find_next_bit(&sagaw, 5, agaw);
1727 if (agaw >= 5) 1801 if (agaw >= 5)
1728 return -ENODEV; 1802 return -ENODEV;
@@ -1815,6 +1889,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1815 return 0; 1889 return 0;
1816 } 1890 }
1817 1891
1892 context_clear_entry(context);
1893
1818 id = domain->id; 1894 id = domain->id;
1819 pgd = domain->pgd; 1895 pgd = domain->pgd;
1820 1896
@@ -1823,7 +1899,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1823 id = iommu_attach_vm_domain(domain, iommu); 1899 id = iommu_attach_vm_domain(domain, iommu);
1824 if (id < 0) { 1900 if (id < 0) {
1825 spin_unlock_irqrestore(&iommu->lock, flags); 1901 spin_unlock_irqrestore(&iommu->lock, flags);
1826 pr_err("IOMMU: no free domain ids\n"); 1902 pr_err("%s: No free domain ids\n", iommu->name);
1827 return -EFAULT; 1903 return -EFAULT;
1828 } 1904 }
1829 } 1905 }
@@ -2050,8 +2126,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2050 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); 2126 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2051 if (tmp) { 2127 if (tmp) {
2052 static int dumps = 5; 2128 static int dumps = 5;
2053 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", 2129 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2054 iov_pfn, tmp, (unsigned long long)pteval); 2130 iov_pfn, tmp, (unsigned long long)pteval);
2055 if (dumps) { 2131 if (dumps) {
2056 dumps--; 2132 dumps--;
2057 debug_dma_dump_mappings(NULL); 2133 debug_dma_dump_mappings(NULL);
@@ -2323,7 +2399,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
2323 2399
2324 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), 2400 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2325 dma_to_mm_pfn(last_vpfn))) { 2401 dma_to_mm_pfn(last_vpfn))) {
2326 printk(KERN_ERR "IOMMU: reserve iova failed\n"); 2402 pr_err("Reserving iova failed\n");
2327 return -ENOMEM; 2403 return -ENOMEM;
2328 } 2404 }
2329 2405
@@ -2356,15 +2432,14 @@ static int iommu_prepare_identity_map(struct device *dev,
2356 range which is reserved in E820, so which didn't get set 2432 range which is reserved in E820, so which didn't get set
2357 up to start with in si_domain */ 2433 up to start with in si_domain */
2358 if (domain == si_domain && hw_pass_through) { 2434 if (domain == si_domain && hw_pass_through) {
2359 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", 2435 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2360 dev_name(dev), start, end); 2436 dev_name(dev), start, end);
2361 return 0; 2437 return 0;
2362 } 2438 }
2363 2439
2364 printk(KERN_INFO 2440 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2365 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 2441 dev_name(dev), start, end);
2366 dev_name(dev), start, end); 2442
2367
2368 if (end < start) { 2443 if (end < start) {
2369 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" 2444 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2370 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2445 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
@@ -2421,12 +2496,11 @@ static inline void iommu_prepare_isa(void)
2421 if (!pdev) 2496 if (!pdev)
2422 return; 2497 return;
2423 2498
2424 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); 2499 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2425 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1); 2500 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2426 2501
2427 if (ret) 2502 if (ret)
2428 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " 2503 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2429 "floppy might not work\n");
2430 2504
2431 pci_dev_put(pdev); 2505 pci_dev_put(pdev);
2432} 2506}
@@ -2470,7 +2544,7 @@ static int __init si_domain_init(int hw)
2470 return -EFAULT; 2544 return -EFAULT;
2471 } 2545 }
2472 2546
2473 pr_debug("IOMMU: identity mapping domain is domain %d\n", 2547 pr_debug("Identity mapping domain is domain %d\n",
2474 si_domain->id); 2548 si_domain->id);
2475 2549
2476 if (hw) 2550 if (hw)
@@ -2670,8 +2744,8 @@ static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw
2670 hw ? CONTEXT_TT_PASS_THROUGH : 2744 hw ? CONTEXT_TT_PASS_THROUGH :
2671 CONTEXT_TT_MULTI_LEVEL); 2745 CONTEXT_TT_MULTI_LEVEL);
2672 if (!ret) 2746 if (!ret)
2673 pr_info("IOMMU: %s identity mapping for device %s\n", 2747 pr_info("%s identity mapping for device %s\n",
2674 hw ? "hardware" : "software", dev_name(dev)); 2748 hw ? "Hardware" : "Software", dev_name(dev));
2675 else if (ret == -ENODEV) 2749 else if (ret == -ENODEV)
2676 /* device not associated with an iommu */ 2750 /* device not associated with an iommu */
2677 ret = 0; 2751 ret = 0;
@@ -2689,10 +2763,6 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2689 int i; 2763 int i;
2690 int ret = 0; 2764 int ret = 0;
2691 2765
2692 ret = si_domain_init(hw);
2693 if (ret)
2694 return -EFAULT;
2695
2696 for_each_pci_dev(pdev) { 2766 for_each_pci_dev(pdev) {
2697 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); 2767 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2698 if (ret) 2768 if (ret)
@@ -2706,7 +2776,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2706 2776
2707 if (dev->bus != &acpi_bus_type) 2777 if (dev->bus != &acpi_bus_type)
2708 continue; 2778 continue;
2709 2779
2710 adev= to_acpi_device(dev); 2780 adev= to_acpi_device(dev);
2711 mutex_lock(&adev->physical_node_lock); 2781 mutex_lock(&adev->physical_node_lock);
2712 list_for_each_entry(pn, &adev->physical_node_list, node) { 2782 list_for_each_entry(pn, &adev->physical_node_list, node) {
@@ -2748,19 +2818,200 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu)
2748 */ 2818 */
2749 iommu->flush.flush_context = __iommu_flush_context; 2819 iommu->flush.flush_context = __iommu_flush_context;
2750 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 2820 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2751 pr_info("IOMMU: %s using Register based invalidation\n", 2821 pr_info("%s: Using Register based invalidation\n",
2752 iommu->name); 2822 iommu->name);
2753 } else { 2823 } else {
2754 iommu->flush.flush_context = qi_flush_context; 2824 iommu->flush.flush_context = qi_flush_context;
2755 iommu->flush.flush_iotlb = qi_flush_iotlb; 2825 iommu->flush.flush_iotlb = qi_flush_iotlb;
2756 pr_info("IOMMU: %s using Queued invalidation\n", iommu->name); 2826 pr_info("%s: Using Queued invalidation\n", iommu->name);
2827 }
2828}
2829
2830static int copy_context_table(struct intel_iommu *iommu,
2831 struct root_entry *old_re,
2832 struct context_entry **tbl,
2833 int bus, bool ext)
2834{
2835 struct context_entry *old_ce = NULL, *new_ce = NULL, ce;
2836 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2837 phys_addr_t old_ce_phys;
2838
2839 tbl_idx = ext ? bus * 2 : bus;
2840
2841 for (devfn = 0; devfn < 256; devfn++) {
2842 /* First calculate the correct index */
2843 idx = (ext ? devfn * 2 : devfn) % 256;
2844
2845 if (idx == 0) {
2846 /* First save what we may have and clean up */
2847 if (new_ce) {
2848 tbl[tbl_idx] = new_ce;
2849 __iommu_flush_cache(iommu, new_ce,
2850 VTD_PAGE_SIZE);
2851 pos = 1;
2852 }
2853
2854 if (old_ce)
2855 iounmap(old_ce);
2856
2857 ret = 0;
2858 if (devfn < 0x80)
2859 old_ce_phys = root_entry_lctp(old_re);
2860 else
2861 old_ce_phys = root_entry_uctp(old_re);
2862
2863 if (!old_ce_phys) {
2864 if (ext && devfn == 0) {
2865 /* No LCTP, try UCTP */
2866 devfn = 0x7f;
2867 continue;
2868 } else {
2869 goto out;
2870 }
2871 }
2872
2873 ret = -ENOMEM;
2874 old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
2875 if (!old_ce)
2876 goto out;
2877
2878 new_ce = alloc_pgtable_page(iommu->node);
2879 if (!new_ce)
2880 goto out_unmap;
2881
2882 ret = 0;
2883 }
2884
2885 /* Now copy the context entry */
2886 ce = old_ce[idx];
2887
2888 if (!__context_present(&ce))
2889 continue;
2890
2891 did = context_domain_id(&ce);
2892 if (did >= 0 && did < cap_ndoms(iommu->cap))
2893 set_bit(did, iommu->domain_ids);
2894
2895 /*
2896 * We need a marker for copied context entries. This
2897 * marker needs to work for the old format as well as
2898 * for extended context entries.
2899 *
2900 * Bit 67 of the context entry is used. In the old
2901 * format this bit is available to software, in the
2902 * extended format it is the PGE bit, but PGE is ignored
2903 * by HW if PASIDs are disabled (and thus still
2904 * available).
2905 *
2906 * So disable PASIDs first and then mark the entry
2907 * copied. This means that we don't copy PASID
2908 * translations from the old kernel, but this is fine as
2909 * faults there are not fatal.
2910 */
2911 context_clear_pasid_enable(&ce);
2912 context_set_copied(&ce);
2913
2914 new_ce[idx] = ce;
2915 }
2916
2917 tbl[tbl_idx + pos] = new_ce;
2918
2919 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2920
2921out_unmap:
2922 iounmap(old_ce);
2923
2924out:
2925 return ret;
2926}
2927
2928static int copy_translation_tables(struct intel_iommu *iommu)
2929{
2930 struct context_entry **ctxt_tbls;
2931 struct root_entry *old_rt;
2932 phys_addr_t old_rt_phys;
2933 int ctxt_table_entries;
2934 unsigned long flags;
2935 u64 rtaddr_reg;
2936 int bus, ret;
2937 bool new_ext, ext;
2938
2939 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2940 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
2941 new_ext = !!ecap_ecs(iommu->ecap);
2942
2943 /*
2944 * The RTT bit can only be changed when translation is disabled,
2945 * but disabling translation means to open a window for data
2946 * corruption. So bail out and don't copy anything if we would
2947 * have to change the bit.
2948 */
2949 if (new_ext != ext)
2950 return -EINVAL;
2951
2952 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2953 if (!old_rt_phys)
2954 return -EINVAL;
2955
2956 old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
2957 if (!old_rt)
2958 return -ENOMEM;
2959
2960 /* This is too big for the stack - allocate it from slab */
2961 ctxt_table_entries = ext ? 512 : 256;
2962 ret = -ENOMEM;
2963 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
2964 if (!ctxt_tbls)
2965 goto out_unmap;
2966
2967 for (bus = 0; bus < 256; bus++) {
2968 ret = copy_context_table(iommu, &old_rt[bus],
2969 ctxt_tbls, bus, ext);
2970 if (ret) {
2971 pr_err("%s: Failed to copy context table for bus %d\n",
2972 iommu->name, bus);
2973 continue;
2974 }
2757 } 2975 }
2976
2977 spin_lock_irqsave(&iommu->lock, flags);
2978
2979 /* Context tables are copied, now write them to the root_entry table */
2980 for (bus = 0; bus < 256; bus++) {
2981 int idx = ext ? bus * 2 : bus;
2982 u64 val;
2983
2984 if (ctxt_tbls[idx]) {
2985 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2986 iommu->root_entry[bus].lo = val;
2987 }
2988
2989 if (!ext || !ctxt_tbls[idx + 1])
2990 continue;
2991
2992 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2993 iommu->root_entry[bus].hi = val;
2994 }
2995
2996 spin_unlock_irqrestore(&iommu->lock, flags);
2997
2998 kfree(ctxt_tbls);
2999
3000 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3001
3002 ret = 0;
3003
3004out_unmap:
3005 iounmap(old_rt);
3006
3007 return ret;
2758} 3008}
2759 3009
2760static int __init init_dmars(void) 3010static int __init init_dmars(void)
2761{ 3011{
2762 struct dmar_drhd_unit *drhd; 3012 struct dmar_drhd_unit *drhd;
2763 struct dmar_rmrr_unit *rmrr; 3013 struct dmar_rmrr_unit *rmrr;
3014 bool copied_tables = false;
2764 struct device *dev; 3015 struct device *dev;
2765 struct intel_iommu *iommu; 3016 struct intel_iommu *iommu;
2766 int i, ret; 3017 int i, ret;
@@ -2781,8 +3032,7 @@ static int __init init_dmars(void)
2781 g_num_of_iommus++; 3032 g_num_of_iommus++;
2782 continue; 3033 continue;
2783 } 3034 }
2784 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n", 3035 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
2785 DMAR_UNITS_SUPPORTED);
2786 } 3036 }
2787 3037
2788 /* Preallocate enough resources for IOMMU hot-addition */ 3038 /* Preallocate enough resources for IOMMU hot-addition */
@@ -2792,7 +3042,7 @@ static int __init init_dmars(void)
2792 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), 3042 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2793 GFP_KERNEL); 3043 GFP_KERNEL);
2794 if (!g_iommus) { 3044 if (!g_iommus) {
2795 printk(KERN_ERR "Allocating global iommu array failed\n"); 3045 pr_err("Allocating global iommu array failed\n");
2796 ret = -ENOMEM; 3046 ret = -ENOMEM;
2797 goto error; 3047 goto error;
2798 } 3048 }
@@ -2807,10 +3057,21 @@ static int __init init_dmars(void)
2807 for_each_active_iommu(iommu, drhd) { 3057 for_each_active_iommu(iommu, drhd) {
2808 g_iommus[iommu->seq_id] = iommu; 3058 g_iommus[iommu->seq_id] = iommu;
2809 3059
3060 intel_iommu_init_qi(iommu);
3061
2810 ret = iommu_init_domains(iommu); 3062 ret = iommu_init_domains(iommu);
2811 if (ret) 3063 if (ret)
2812 goto free_iommu; 3064 goto free_iommu;
2813 3065
3066 init_translation_status(iommu);
3067
3068 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3069 iommu_disable_translation(iommu);
3070 clear_translation_pre_enabled(iommu);
3071 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3072 iommu->name);
3073 }
3074
2814 /* 3075 /*
2815 * TBD: 3076 * TBD:
2816 * we could share the same root & context tables 3077 * we could share the same root & context tables
@@ -2819,13 +3080,41 @@ static int __init init_dmars(void)
2819 ret = iommu_alloc_root_entry(iommu); 3080 ret = iommu_alloc_root_entry(iommu);
2820 if (ret) 3081 if (ret)
2821 goto free_iommu; 3082 goto free_iommu;
3083
3084 if (translation_pre_enabled(iommu)) {
3085 pr_info("Translation already enabled - trying to copy translation structures\n");
3086
3087 ret = copy_translation_tables(iommu);
3088 if (ret) {
3089 /*
3090 * We found the IOMMU with translation
3091 * enabled - but failed to copy over the
3092 * old root-entry table. Try to proceed
3093 * by disabling translation now and
3094 * allocating a clean root-entry table.
3095 * This might cause DMAR faults, but
3096 * probably the dump will still succeed.
3097 */
3098 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3099 iommu->name);
3100 iommu_disable_translation(iommu);
3101 clear_translation_pre_enabled(iommu);
3102 } else {
3103 pr_info("Copied translation tables from previous kernel for %s\n",
3104 iommu->name);
3105 copied_tables = true;
3106 }
3107 }
3108
3109 iommu_flush_write_buffer(iommu);
3110 iommu_set_root_entry(iommu);
3111 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3112 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3113
2822 if (!ecap_pass_through(iommu->ecap)) 3114 if (!ecap_pass_through(iommu->ecap))
2823 hw_pass_through = 0; 3115 hw_pass_through = 0;
2824 } 3116 }
2825 3117
2826 for_each_active_iommu(iommu, drhd)
2827 intel_iommu_init_qi(iommu);
2828
2829 if (iommu_pass_through) 3118 if (iommu_pass_through)
2830 iommu_identity_mapping |= IDENTMAP_ALL; 3119 iommu_identity_mapping |= IDENTMAP_ALL;
2831 3120
@@ -2833,9 +3122,24 @@ static int __init init_dmars(void)
2833 iommu_identity_mapping |= IDENTMAP_GFX; 3122 iommu_identity_mapping |= IDENTMAP_GFX;
2834#endif 3123#endif
2835 3124
3125 if (iommu_identity_mapping) {
3126 ret = si_domain_init(hw_pass_through);
3127 if (ret)
3128 goto free_iommu;
3129 }
3130
2836 check_tylersburg_isoch(); 3131 check_tylersburg_isoch();
2837 3132
2838 /* 3133 /*
3134 * If we copied translations from a previous kernel in the kdump
3135 * case, we can not assign the devices to domains now, as that
3136 * would eliminate the old mappings. So skip this part and defer
3137 * the assignment to device driver initialization time.
3138 */
3139 if (copied_tables)
3140 goto domains_done;
3141
3142 /*
2839 * If pass through is not set or not enabled, setup context entries for 3143 * If pass through is not set or not enabled, setup context entries for
2840 * identity mappings for rmrr, gfx, and isa and may fall back to static 3144 * identity mappings for rmrr, gfx, and isa and may fall back to static
2841 * identity mapping if iommu_identity_mapping is set. 3145 * identity mapping if iommu_identity_mapping is set.
@@ -2843,7 +3147,7 @@ static int __init init_dmars(void)
2843 if (iommu_identity_mapping) { 3147 if (iommu_identity_mapping) {
2844 ret = iommu_prepare_static_identity_mapping(hw_pass_through); 3148 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2845 if (ret) { 3149 if (ret) {
2846 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); 3150 pr_crit("Failed to setup IOMMU pass-through\n");
2847 goto free_iommu; 3151 goto free_iommu;
2848 } 3152 }
2849 } 3153 }
@@ -2861,20 +3165,21 @@ static int __init init_dmars(void)
2861 * endfor 3165 * endfor
2862 * endfor 3166 * endfor
2863 */ 3167 */
2864 printk(KERN_INFO "IOMMU: Setting RMRR:\n"); 3168 pr_info("Setting RMRR:\n");
2865 for_each_rmrr_units(rmrr) { 3169 for_each_rmrr_units(rmrr) {
2866 /* some BIOS lists non-exist devices in DMAR table. */ 3170 /* some BIOS lists non-exist devices in DMAR table. */
2867 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 3171 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2868 i, dev) { 3172 i, dev) {
2869 ret = iommu_prepare_rmrr_dev(rmrr, dev); 3173 ret = iommu_prepare_rmrr_dev(rmrr, dev);
2870 if (ret) 3174 if (ret)
2871 printk(KERN_ERR 3175 pr_err("Mapping reserved region failed\n");
2872 "IOMMU: mapping reserved region failed\n");
2873 } 3176 }
2874 } 3177 }
2875 3178
2876 iommu_prepare_isa(); 3179 iommu_prepare_isa();
2877 3180
3181domains_done:
3182
2878 /* 3183 /*
2879 * for each drhd 3184 * for each drhd
2880 * enable fault log 3185 * enable fault log
@@ -2899,11 +3204,9 @@ static int __init init_dmars(void)
2899 if (ret) 3204 if (ret)
2900 goto free_iommu; 3205 goto free_iommu;
2901 3206
2902 iommu_set_root_entry(iommu); 3207 if (!translation_pre_enabled(iommu))
3208 iommu_enable_translation(iommu);
2903 3209
2904 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2905 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2906 iommu_enable_translation(iommu);
2907 iommu_disable_protect_mem_regions(iommu); 3210 iommu_disable_protect_mem_regions(iommu);
2908 } 3211 }
2909 3212
@@ -2944,7 +3247,7 @@ static struct iova *intel_alloc_iova(struct device *dev,
2944 } 3247 }
2945 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); 3248 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2946 if (unlikely(!iova)) { 3249 if (unlikely(!iova)) {
2947 printk(KERN_ERR "Allocating %ld-page iova for %s failed", 3250 pr_err("Allocating %ld-page iova for %s failed",
2948 nrpages, dev_name(dev)); 3251 nrpages, dev_name(dev));
2949 return NULL; 3252 return NULL;
2950 } 3253 }
@@ -2959,7 +3262,7 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2959 3262
2960 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 3263 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2961 if (!domain) { 3264 if (!domain) {
2962 printk(KERN_ERR "Allocating domain for %s failed", 3265 pr_err("Allocating domain for %s failed\n",
2963 dev_name(dev)); 3266 dev_name(dev));
2964 return NULL; 3267 return NULL;
2965 } 3268 }
@@ -2968,7 +3271,7 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2968 if (unlikely(!domain_context_mapped(dev))) { 3271 if (unlikely(!domain_context_mapped(dev))) {
2969 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL); 3272 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2970 if (ret) { 3273 if (ret) {
2971 printk(KERN_ERR "Domain context map for %s failed", 3274 pr_err("Domain context map for %s failed\n",
2972 dev_name(dev)); 3275 dev_name(dev));
2973 return NULL; 3276 return NULL;
2974 } 3277 }
@@ -3010,8 +3313,8 @@ static int iommu_no_mapping(struct device *dev)
3010 * to non-identity mapping. 3313 * to non-identity mapping.
3011 */ 3314 */
3012 domain_remove_one_dev_info(si_domain, dev); 3315 domain_remove_one_dev_info(si_domain, dev);
3013 printk(KERN_INFO "32bit %s uses non-identity mapping\n", 3316 pr_info("32bit %s uses non-identity mapping\n",
3014 dev_name(dev)); 3317 dev_name(dev));
3015 return 0; 3318 return 0;
3016 } 3319 }
3017 } else { 3320 } else {
@@ -3026,8 +3329,8 @@ static int iommu_no_mapping(struct device *dev)
3026 CONTEXT_TT_PASS_THROUGH : 3329 CONTEXT_TT_PASS_THROUGH :
3027 CONTEXT_TT_MULTI_LEVEL); 3330 CONTEXT_TT_MULTI_LEVEL);
3028 if (!ret) { 3331 if (!ret) {
3029 printk(KERN_INFO "64bit %s uses identity mapping\n", 3332 pr_info("64bit %s uses identity mapping\n",
3030 dev_name(dev)); 3333 dev_name(dev));
3031 return 1; 3334 return 1;
3032 } 3335 }
3033 } 3336 }
@@ -3096,7 +3399,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3096error: 3399error:
3097 if (iova) 3400 if (iova)
3098 __free_iova(&domain->iovad, iova); 3401 __free_iova(&domain->iovad, iova);
3099 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", 3402 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3100 dev_name(dev), size, (unsigned long long)paddr, dir); 3403 dev_name(dev), size, (unsigned long long)paddr, dir);
3101 return 0; 3404 return 0;
3102} 3405}
@@ -3411,7 +3714,7 @@ static inline int iommu_domain_cache_init(void)
3411 3714
3412 NULL); 3715 NULL);
3413 if (!iommu_domain_cache) { 3716 if (!iommu_domain_cache) {
3414 printk(KERN_ERR "Couldn't create iommu_domain cache\n"); 3717 pr_err("Couldn't create iommu_domain cache\n");
3415 ret = -ENOMEM; 3718 ret = -ENOMEM;
3416 } 3719 }
3417 3720
@@ -3428,7 +3731,7 @@ static inline int iommu_devinfo_cache_init(void)
3428 SLAB_HWCACHE_ALIGN, 3731 SLAB_HWCACHE_ALIGN,
3429 NULL); 3732 NULL);
3430 if (!iommu_devinfo_cache) { 3733 if (!iommu_devinfo_cache) {
3431 printk(KERN_ERR "Couldn't create devinfo cache\n"); 3734 pr_err("Couldn't create devinfo cache\n");
3432 ret = -ENOMEM; 3735 ret = -ENOMEM;
3433 } 3736 }
3434 3737
@@ -3805,19 +4108,19 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3805 return 0; 4108 return 0;
3806 4109
3807 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { 4110 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3808 pr_warn("IOMMU: %s doesn't support hardware pass through.\n", 4111 pr_warn("%s: Doesn't support hardware pass through.\n",
3809 iommu->name); 4112 iommu->name);
3810 return -ENXIO; 4113 return -ENXIO;
3811 } 4114 }
3812 if (!ecap_sc_support(iommu->ecap) && 4115 if (!ecap_sc_support(iommu->ecap) &&
3813 domain_update_iommu_snooping(iommu)) { 4116 domain_update_iommu_snooping(iommu)) {
3814 pr_warn("IOMMU: %s doesn't support snooping.\n", 4117 pr_warn("%s: Doesn't support snooping.\n",
3815 iommu->name); 4118 iommu->name);
3816 return -ENXIO; 4119 return -ENXIO;
3817 } 4120 }
3818 sp = domain_update_iommu_superpage(iommu) - 1; 4121 sp = domain_update_iommu_superpage(iommu) - 1;
3819 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { 4122 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3820 pr_warn("IOMMU: %s doesn't support large page.\n", 4123 pr_warn("%s: Doesn't support large page.\n",
3821 iommu->name); 4124 iommu->name);
3822 return -ENXIO; 4125 return -ENXIO;
3823 } 4126 }
@@ -4048,7 +4351,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
4048 start = mhp->start_pfn << PAGE_SHIFT; 4351 start = mhp->start_pfn << PAGE_SHIFT;
4049 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; 4352 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4050 if (iommu_domain_identity_map(si_domain, start, end)) { 4353 if (iommu_domain_identity_map(si_domain, start, end)) {
4051 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n", 4354 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4052 start, end); 4355 start, end);
4053 return NOTIFY_BAD; 4356 return NOTIFY_BAD;
4054 } 4357 }
@@ -4066,7 +4369,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
4066 4369
4067 iova = find_iova(&si_domain->iovad, start_vpfn); 4370 iova = find_iova(&si_domain->iovad, start_vpfn);
4068 if (iova == NULL) { 4371 if (iova == NULL) {
4069 pr_debug("dmar: failed get IOVA for PFN %lx\n", 4372 pr_debug("Failed get IOVA for PFN %lx\n",
4070 start_vpfn); 4373 start_vpfn);
4071 break; 4374 break;
4072 } 4375 }
@@ -4074,7 +4377,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
4074 iova = split_and_remove_iova(&si_domain->iovad, iova, 4377 iova = split_and_remove_iova(&si_domain->iovad, iova,
4075 start_vpfn, last_vpfn); 4378 start_vpfn, last_vpfn);
4076 if (iova == NULL) { 4379 if (iova == NULL) {
4077 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n", 4380 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4078 start_vpfn, last_vpfn); 4381 start_vpfn, last_vpfn);
4079 return NOTIFY_BAD; 4382 return NOTIFY_BAD;
4080 } 4383 }
@@ -4183,13 +4486,6 @@ int __init intel_iommu_init(void)
4183 goto out_free_dmar; 4486 goto out_free_dmar;
4184 } 4487 }
4185 4488
4186 /*
4187 * Disable translation if already enabled prior to OS handover.
4188 */
4189 for_each_active_iommu(iommu, drhd)
4190 if (iommu->gcmd & DMA_GCMD_TE)
4191 iommu_disable_translation(iommu);
4192
4193 if (dmar_dev_scope_init() < 0) { 4489 if (dmar_dev_scope_init() < 0) {
4194 if (force_on) 4490 if (force_on)
4195 panic("tboot: Failed to initialize DMAR device scope\n"); 4491 panic("tboot: Failed to initialize DMAR device scope\n");
@@ -4200,10 +4496,10 @@ int __init intel_iommu_init(void)
4200 goto out_free_dmar; 4496 goto out_free_dmar;
4201 4497
4202 if (list_empty(&dmar_rmrr_units)) 4498 if (list_empty(&dmar_rmrr_units))
4203 printk(KERN_INFO "DMAR: No RMRR found\n"); 4499 pr_info("No RMRR found\n");
4204 4500
4205 if (list_empty(&dmar_atsr_units)) 4501 if (list_empty(&dmar_atsr_units))
4206 printk(KERN_INFO "DMAR: No ATSR found\n"); 4502 pr_info("No ATSR found\n");
4207 4503
4208 if (dmar_init_reserved_ranges()) { 4504 if (dmar_init_reserved_ranges()) {
4209 if (force_on) 4505 if (force_on)
@@ -4217,12 +4513,11 @@ int __init intel_iommu_init(void)
4217 if (ret) { 4513 if (ret) {
4218 if (force_on) 4514 if (force_on)
4219 panic("tboot: Failed to initialize DMARs\n"); 4515 panic("tboot: Failed to initialize DMARs\n");
4220 printk(KERN_ERR "IOMMU: dmar init failed\n"); 4516 pr_err("Initialization failed\n");
4221 goto out_free_reserved_range; 4517 goto out_free_reserved_range;
4222 } 4518 }
4223 up_write(&dmar_global_lock); 4519 up_write(&dmar_global_lock);
4224 printk(KERN_INFO 4520 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4225 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4226 4521
4227 init_timer(&unmap_timer); 4522 init_timer(&unmap_timer);
4228#ifdef CONFIG_SWIOTLB 4523#ifdef CONFIG_SWIOTLB
@@ -4364,13 +4659,11 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4364 4659
4365 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); 4660 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4366 if (!dmar_domain) { 4661 if (!dmar_domain) {
4367 printk(KERN_ERR 4662 pr_err("Can't allocate dmar_domain\n");
4368 "intel_iommu_domain_init: dmar_domain == NULL\n");
4369 return NULL; 4663 return NULL;
4370 } 4664 }
4371 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 4665 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4372 printk(KERN_ERR 4666 pr_err("Domain initialization failed\n");
4373 "intel_iommu_domain_init() failed\n");
4374 domain_exit(dmar_domain); 4667 domain_exit(dmar_domain);
4375 return NULL; 4668 return NULL;
4376 } 4669 }
@@ -4429,7 +4722,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
4429 addr_width = cap_mgaw(iommu->cap); 4722 addr_width = cap_mgaw(iommu->cap);
4430 4723
4431 if (dmar_domain->max_addr > (1LL << addr_width)) { 4724 if (dmar_domain->max_addr > (1LL << addr_width)) {
4432 printk(KERN_ERR "%s: iommu width (%d) is not " 4725 pr_err("%s: iommu width (%d) is not "
4433 "sufficient for the mapped address (%llx)\n", 4726 "sufficient for the mapped address (%llx)\n",
4434 __func__, addr_width, dmar_domain->max_addr); 4727 __func__, addr_width, dmar_domain->max_addr);
4435 return -EFAULT; 4728 return -EFAULT;
@@ -4483,7 +4776,7 @@ static int intel_iommu_map(struct iommu_domain *domain,
4483 /* check if minimum agaw is sufficient for mapped address */ 4776 /* check if minimum agaw is sufficient for mapped address */
4484 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1; 4777 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4485 if (end < max_addr) { 4778 if (end < max_addr) {
4486 printk(KERN_ERR "%s: iommu width (%d) is not " 4779 pr_err("%s: iommu width (%d) is not "
4487 "sufficient for the mapped address (%llx)\n", 4780 "sufficient for the mapped address (%llx)\n",
4488 __func__, dmar_domain->gaw, max_addr); 4781 __func__, dmar_domain->gaw, max_addr);
4489 return -EFAULT; 4782 return -EFAULT;
@@ -4624,7 +4917,7 @@ static const struct iommu_ops intel_iommu_ops = {
4624static void quirk_iommu_g4x_gfx(struct pci_dev *dev) 4917static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4625{ 4918{
4626 /* G4x/GM45 integrated gfx dmar support is totally busted. */ 4919 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4627 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n"); 4920 pr_info("Disabling IOMMU for graphics on this chipset\n");
4628 dmar_map_gfx = 0; 4921 dmar_map_gfx = 0;
4629} 4922}
4630 4923
@@ -4642,7 +4935,7 @@ static void quirk_iommu_rwbf(struct pci_dev *dev)
4642 * Mobile 4 Series Chipset neglects to set RWBF capability, 4935 * Mobile 4 Series Chipset neglects to set RWBF capability,
4643 * but needs it. Same seems to hold for the desktop versions. 4936 * but needs it. Same seems to hold for the desktop versions.
4644 */ 4937 */
4645 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n"); 4938 pr_info("Forcing write-buffer flush capability\n");
4646 rwbf_quirk = 1; 4939 rwbf_quirk = 1;
4647} 4940}
4648 4941
@@ -4672,11 +4965,11 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4672 return; 4965 return;
4673 4966
4674 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 4967 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4675 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 4968 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4676 dmar_map_gfx = 0; 4969 dmar_map_gfx = 0;
4677 } else if (dmar_map_gfx) { 4970 } else if (dmar_map_gfx) {
4678 /* we have to ensure the gfx device is idle before we flush */ 4971 /* we have to ensure the gfx device is idle before we flush */
4679 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n"); 4972 pr_info("Disabling batched IOTLB flush on Ironlake\n");
4680 intel_iommu_strict = 1; 4973 intel_iommu_strict = 1;
4681 } 4974 }
4682} 4975}
@@ -4738,7 +5031,7 @@ static void __init check_tylersburg_isoch(void)
4738 iommu_identity_mapping |= IDENTMAP_AZALIA; 5031 iommu_identity_mapping |= IDENTMAP_AZALIA;
4739 return; 5032 return;
4740 } 5033 }
4741 5034
4742 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 5035 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4743 vtisochctrl); 5036 vtisochctrl);
4744} 5037}
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 80f1d1486247..f15692a410c7 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1,3 +1,6 @@
1
2#define pr_fmt(fmt) "DMAR-IR: " fmt
3
1#include <linux/interrupt.h> 4#include <linux/interrupt.h>
2#include <linux/dmar.h> 5#include <linux/dmar.h>
3#include <linux/spinlock.h> 6#include <linux/spinlock.h>
@@ -9,6 +12,7 @@
9#include <linux/intel-iommu.h> 12#include <linux/intel-iommu.h>
10#include <linux/acpi.h> 13#include <linux/acpi.h>
11#include <linux/irqdomain.h> 14#include <linux/irqdomain.h>
15#include <linux/crash_dump.h>
12#include <asm/io_apic.h> 16#include <asm/io_apic.h>
13#include <asm/smp.h> 17#include <asm/smp.h>
14#include <asm/cpu.h> 18#include <asm/cpu.h>
@@ -74,8 +78,28 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
74static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); 78static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
75static struct irq_domain_ops intel_ir_domain_ops; 79static struct irq_domain_ops intel_ir_domain_ops;
76 80
81static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
77static int __init parse_ioapics_under_ir(void); 82static int __init parse_ioapics_under_ir(void);
78 83
84static bool ir_pre_enabled(struct intel_iommu *iommu)
85{
86 return (iommu->flags & VTD_FLAG_IRQ_REMAP_PRE_ENABLED);
87}
88
89static void clear_ir_pre_enabled(struct intel_iommu *iommu)
90{
91 iommu->flags &= ~VTD_FLAG_IRQ_REMAP_PRE_ENABLED;
92}
93
94static void init_ir_status(struct intel_iommu *iommu)
95{
96 u32 gsts;
97
98 gsts = readl(iommu->reg + DMAR_GSTS_REG);
99 if (gsts & DMA_GSTS_IRES)
100 iommu->flags |= VTD_FLAG_IRQ_REMAP_PRE_ENABLED;
101}
102
79static int alloc_irte(struct intel_iommu *iommu, int irq, 103static int alloc_irte(struct intel_iommu *iommu, int irq,
80 struct irq_2_iommu *irq_iommu, u16 count) 104 struct irq_2_iommu *irq_iommu, u16 count)
81{ 105{
@@ -93,8 +117,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq,
93 } 117 }
94 118
95 if (mask > ecap_max_handle_mask(iommu->ecap)) { 119 if (mask > ecap_max_handle_mask(iommu->ecap)) {
96 printk(KERN_ERR 120 pr_err("Requested mask %x exceeds the max invalidation handle"
97 "Requested mask %x exceeds the max invalidation handle"
98 " mask value %Lx\n", mask, 121 " mask value %Lx\n", mask,
99 ecap_max_handle_mask(iommu->ecap)); 122 ecap_max_handle_mask(iommu->ecap));
100 return -1; 123 return -1;
@@ -268,7 +291,7 @@ static int set_ioapic_sid(struct irte *irte, int apic)
268 up_read(&dmar_global_lock); 291 up_read(&dmar_global_lock);
269 292
270 if (sid == 0) { 293 if (sid == 0) {
271 pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic); 294 pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic);
272 return -1; 295 return -1;
273 } 296 }
274 297
@@ -295,7 +318,7 @@ static int set_hpet_sid(struct irte *irte, u8 id)
295 up_read(&dmar_global_lock); 318 up_read(&dmar_global_lock);
296 319
297 if (sid == 0) { 320 if (sid == 0) {
298 pr_warning("Failed to set source-id of HPET block (%d)\n", id); 321 pr_warn("Failed to set source-id of HPET block (%d)\n", id);
299 return -1; 322 return -1;
300 } 323 }
301 324
@@ -359,11 +382,59 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
359 return 0; 382 return 0;
360} 383}
361 384
385static int iommu_load_old_irte(struct intel_iommu *iommu)
386{
387 struct irte *old_ir_table;
388 phys_addr_t irt_phys;
389 unsigned int i;
390 size_t size;
391 u64 irta;
392
393 if (!is_kdump_kernel()) {
394 pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n",
395 iommu->name);
396 clear_ir_pre_enabled(iommu);
397 iommu_disable_irq_remapping(iommu);
398 return -EINVAL;
399 }
400
401 /* Check whether the old ir-table has the same size as ours */
402 irta = dmar_readq(iommu->reg + DMAR_IRTA_REG);
403 if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK)
404 != INTR_REMAP_TABLE_REG_SIZE)
405 return -EINVAL;
406
407 irt_phys = irta & VTD_PAGE_MASK;
408 size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte);
409
410 /* Map the old IR table */
411 old_ir_table = ioremap_cache(irt_phys, size);
412 if (!old_ir_table)
413 return -ENOMEM;
414
415 /* Copy data over */
416 memcpy(iommu->ir_table->base, old_ir_table, size);
417
418 __iommu_flush_cache(iommu, iommu->ir_table->base, size);
419
420 /*
421 * Now check the table for used entries and mark those as
422 * allocated in the bitmap
423 */
424 for (i = 0; i < INTR_REMAP_TABLE_ENTRIES; i++) {
425 if (iommu->ir_table->base[i].present)
426 bitmap_set(iommu->ir_table->bitmap, i, 1);
427 }
428
429 return 0;
430}
431
432
362static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) 433static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
363{ 434{
435 unsigned long flags;
364 u64 addr; 436 u64 addr;
365 u32 sts; 437 u32 sts;
366 unsigned long flags;
367 438
368 addr = virt_to_phys((void *)iommu->ir_table->base); 439 addr = virt_to_phys((void *)iommu->ir_table->base);
369 440
@@ -380,10 +451,16 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
380 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 451 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
381 452
382 /* 453 /*
383 * global invalidation of interrupt entry cache before enabling 454 * Global invalidation of interrupt entry cache to make sure the
384 * interrupt-remapping. 455 * hardware uses the new irq remapping table.
385 */ 456 */
386 qi_global_iec(iommu); 457 qi_global_iec(iommu);
458}
459
460static void iommu_enable_irq_remapping(struct intel_iommu *iommu)
461{
462 unsigned long flags;
463 u32 sts;
387 464
388 raw_spin_lock_irqsave(&iommu->register_lock, flags); 465 raw_spin_lock_irqsave(&iommu->register_lock, flags);
389 466
@@ -449,6 +526,37 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
449 ir_table->base = page_address(pages); 526 ir_table->base = page_address(pages);
450 ir_table->bitmap = bitmap; 527 ir_table->bitmap = bitmap;
451 iommu->ir_table = ir_table; 528 iommu->ir_table = ir_table;
529
530 /*
531 * If the queued invalidation is already initialized,
532 * shouldn't disable it.
533 */
534 if (!iommu->qi) {
535 /*
536 * Clear previous faults.
537 */
538 dmar_fault(-1, iommu);
539 dmar_disable_qi(iommu);
540
541 if (dmar_enable_qi(iommu)) {
542 pr_err("Failed to enable queued invalidation\n");
543 goto out_free_bitmap;
544 }
545 }
546
547 init_ir_status(iommu);
548
549 if (ir_pre_enabled(iommu)) {
550 if (iommu_load_old_irte(iommu))
551 pr_err("Failed to copy IR table for %s from previous kernel\n",
552 iommu->name);
553 else
554 pr_info("Copied IR table for %s from previous kernel\n",
555 iommu->name);
556 }
557
558 iommu_set_irq_remapping(iommu, eim_mode);
559
452 return 0; 560 return 0;
453 561
454out_free_bitmap: 562out_free_bitmap:
@@ -457,6 +565,9 @@ out_free_pages:
457 __free_pages(pages, INTR_REMAP_PAGE_ORDER); 565 __free_pages(pages, INTR_REMAP_PAGE_ORDER);
458out_free_table: 566out_free_table:
459 kfree(ir_table); 567 kfree(ir_table);
568
569 iommu->ir_table = NULL;
570
460 return -ENOMEM; 571 return -ENOMEM;
461} 572}
462 573
@@ -534,17 +645,17 @@ static void __init intel_cleanup_irq_remapping(void)
534 } 645 }
535 646
536 if (x2apic_supported()) 647 if (x2apic_supported())
537 pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n"); 648 pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n");
538} 649}
539 650
540static int __init intel_prepare_irq_remapping(void) 651static int __init intel_prepare_irq_remapping(void)
541{ 652{
542 struct dmar_drhd_unit *drhd; 653 struct dmar_drhd_unit *drhd;
543 struct intel_iommu *iommu; 654 struct intel_iommu *iommu;
655 int eim = 0;
544 656
545 if (irq_remap_broken) { 657 if (irq_remap_broken) {
546 printk(KERN_WARNING 658 pr_warn("This system BIOS has enabled interrupt remapping\n"
547 "This system BIOS has enabled interrupt remapping\n"
548 "on a chipset that contains an erratum making that\n" 659 "on a chipset that contains an erratum making that\n"
549 "feature unstable. To maintain system stability\n" 660 "feature unstable. To maintain system stability\n"
550 "interrupt remapping is being disabled. Please\n" 661 "interrupt remapping is being disabled. Please\n"
@@ -560,7 +671,7 @@ static int __init intel_prepare_irq_remapping(void)
560 return -ENODEV; 671 return -ENODEV;
561 672
562 if (parse_ioapics_under_ir() != 1) { 673 if (parse_ioapics_under_ir() != 1) {
563 printk(KERN_INFO "Not enabling interrupt remapping\n"); 674 pr_info("Not enabling interrupt remapping\n");
564 goto error; 675 goto error;
565 } 676 }
566 677
@@ -569,10 +680,34 @@ static int __init intel_prepare_irq_remapping(void)
569 if (!ecap_ir_support(iommu->ecap)) 680 if (!ecap_ir_support(iommu->ecap))
570 goto error; 681 goto error;
571 682
572 /* Do the allocations early */ 683 /* Detect remapping mode: lapic or x2apic */
573 for_each_iommu(iommu, drhd) 684 if (x2apic_supported()) {
574 if (intel_setup_irq_remapping(iommu)) 685 eim = !dmar_x2apic_optout();
686 if (!eim) {
687 pr_info("x2apic is disabled because BIOS sets x2apic opt out bit.");
688 pr_info("Use 'intremap=no_x2apic_optout' to override the BIOS setting.\n");
689 }
690 }
691
692 for_each_iommu(iommu, drhd) {
693 if (eim && !ecap_eim_support(iommu->ecap)) {
694 pr_info("%s does not support EIM\n", iommu->name);
695 eim = 0;
696 }
697 }
698
699 eim_mode = eim;
700 if (eim)
701 pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
702
703 /* Do the initializations early */
704 for_each_iommu(iommu, drhd) {
705 if (intel_setup_irq_remapping(iommu)) {
706 pr_err("Failed to setup irq remapping for %s\n",
707 iommu->name);
575 goto error; 708 goto error;
709 }
710 }
576 711
577 return 0; 712 return 0;
578 713
@@ -606,68 +741,13 @@ static int __init intel_enable_irq_remapping(void)
606 struct dmar_drhd_unit *drhd; 741 struct dmar_drhd_unit *drhd;
607 struct intel_iommu *iommu; 742 struct intel_iommu *iommu;
608 bool setup = false; 743 bool setup = false;
609 int eim = 0;
610
611 if (x2apic_supported()) {
612 eim = !dmar_x2apic_optout();
613 if (!eim)
614 pr_info("x2apic is disabled because BIOS sets x2apic opt out bit. You can use 'intremap=no_x2apic_optout' to override the BIOS setting.\n");
615 }
616
617 for_each_iommu(iommu, drhd) {
618 /*
619 * If the queued invalidation is already initialized,
620 * shouldn't disable it.
621 */
622 if (iommu->qi)
623 continue;
624
625 /*
626 * Clear previous faults.
627 */
628 dmar_fault(-1, iommu);
629
630 /*
631 * Disable intr remapping and queued invalidation, if already
632 * enabled prior to OS handover.
633 */
634 iommu_disable_irq_remapping(iommu);
635
636 dmar_disable_qi(iommu);
637 }
638
639 /*
640 * check for the Interrupt-remapping support
641 */
642 for_each_iommu(iommu, drhd)
643 if (eim && !ecap_eim_support(iommu->ecap)) {
644 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
645 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
646 eim = 0;
647 }
648 eim_mode = eim;
649 if (eim)
650 pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
651
652 /*
653 * Enable queued invalidation for all the DRHD's.
654 */
655 for_each_iommu(iommu, drhd) {
656 int ret = dmar_enable_qi(iommu);
657
658 if (ret) {
659 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
660 " invalidation, ecap %Lx, ret %d\n",
661 drhd->reg_base_addr, iommu->ecap, ret);
662 goto error;
663 }
664 }
665 744
666 /* 745 /*
667 * Setup Interrupt-remapping for all the DRHD's now. 746 * Setup Interrupt-remapping for all the DRHD's now.
668 */ 747 */
669 for_each_iommu(iommu, drhd) { 748 for_each_iommu(iommu, drhd) {
670 iommu_set_irq_remapping(iommu, eim); 749 if (!ir_pre_enabled(iommu))
750 iommu_enable_irq_remapping(iommu);
671 setup = true; 751 setup = true;
672 } 752 }
673 753
@@ -678,9 +758,9 @@ static int __init intel_enable_irq_remapping(void)
678 758
679 set_irq_posting_cap(); 759 set_irq_posting_cap();
680 760
681 pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); 761 pr_info("Enabled IRQ remapping in %s mode\n", eim_mode ? "x2apic" : "xapic");
682 762
683 return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; 763 return eim_mode ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
684 764
685error: 765error:
686 intel_cleanup_irq_remapping(); 766 intel_cleanup_irq_remapping();
@@ -905,6 +985,7 @@ static int reenable_irq_remapping(int eim)
905 985
906 /* Set up interrupt remapping for iommu.*/ 986 /* Set up interrupt remapping for iommu.*/
907 iommu_set_irq_remapping(iommu, eim); 987 iommu_set_irq_remapping(iommu, eim);
988 iommu_enable_irq_remapping(iommu);
908 setup = true; 989 setup = true;
909 } 990 }
910 991
@@ -1169,7 +1250,6 @@ static void intel_free_irq_resources(struct irq_domain *domain,
1169 struct irq_2_iommu *irq_iommu; 1250 struct irq_2_iommu *irq_iommu;
1170 unsigned long flags; 1251 unsigned long flags;
1171 int i; 1252 int i;
1172
1173 for (i = 0; i < nr_irqs; i++) { 1253 for (i = 0; i < nr_irqs; i++) {
1174 irq_data = irq_domain_get_irq_data(domain, virq + i); 1254 irq_data = irq_domain_get_irq_data(domain, virq + i);
1175 if (irq_data && irq_data->chip_data) { 1255 if (irq_data && irq_data->chip_data) {
@@ -1317,28 +1397,12 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
1317 /* Setup Interrupt-remapping now. */ 1397 /* Setup Interrupt-remapping now. */
1318 ret = intel_setup_irq_remapping(iommu); 1398 ret = intel_setup_irq_remapping(iommu);
1319 if (ret) { 1399 if (ret) {
1320 pr_err("DRHD %Lx: failed to allocate resource\n", 1400 pr_err("Failed to setup irq remapping for %s\n",
1321 iommu->reg_phys); 1401 iommu->name);
1322 ir_remove_ioapic_hpet_scope(iommu);
1323 return ret;
1324 }
1325
1326 if (!iommu->qi) {
1327 /* Clear previous faults. */
1328 dmar_fault(-1, iommu);
1329 iommu_disable_irq_remapping(iommu);
1330 dmar_disable_qi(iommu);
1331 }
1332
1333 /* Enable queued invalidation */
1334 ret = dmar_enable_qi(iommu);
1335 if (!ret) {
1336 iommu_set_irq_remapping(iommu, eim);
1337 } else {
1338 pr_err("DRHD %Lx: failed to enable queued invalidation, ecap %Lx, ret %d\n",
1339 iommu->reg_phys, iommu->ecap, ret);
1340 intel_teardown_irq_remapping(iommu); 1402 intel_teardown_irq_remapping(iommu);
1341 ir_remove_ioapic_hpet_scope(iommu); 1403 ir_remove_ioapic_hpet_scope(iommu);
1404 } else {
1405 iommu_enable_irq_remapping(iommu);
1342 } 1406 }
1343 1407
1344 return ret; 1408 return ret;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d4f527e56679..49e7542510d1 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -16,7 +16,7 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19#define pr_fmt(fmt) "%s: " fmt, __func__ 19#define pr_fmt(fmt) "iommu: " fmt
20 20
21#include <linux/device.h> 21#include <linux/device.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
@@ -51,6 +51,8 @@ struct iommu_group {
51 void (*iommu_data_release)(void *iommu_data); 51 void (*iommu_data_release)(void *iommu_data);
52 char *name; 52 char *name;
53 int id; 53 int id;
54 struct iommu_domain *default_domain;
55 struct iommu_domain *domain;
54}; 56};
55 57
56struct iommu_device { 58struct iommu_device {
@@ -75,6 +77,15 @@ struct iommu_group_attribute iommu_group_attr_##_name = \
75#define to_iommu_group(_kobj) \ 77#define to_iommu_group(_kobj) \
76 container_of(_kobj, struct iommu_group, kobj) 78 container_of(_kobj, struct iommu_group, kobj)
77 79
80static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
81 unsigned type);
82static int __iommu_attach_device(struct iommu_domain *domain,
83 struct device *dev);
84static int __iommu_attach_group(struct iommu_domain *domain,
85 struct iommu_group *group);
86static void __iommu_detach_group(struct iommu_domain *domain,
87 struct iommu_group *group);
88
78static ssize_t iommu_group_attr_show(struct kobject *kobj, 89static ssize_t iommu_group_attr_show(struct kobject *kobj,
79 struct attribute *__attr, char *buf) 90 struct attribute *__attr, char *buf)
80{ 91{
@@ -128,6 +139,8 @@ static void iommu_group_release(struct kobject *kobj)
128{ 139{
129 struct iommu_group *group = to_iommu_group(kobj); 140 struct iommu_group *group = to_iommu_group(kobj);
130 141
142 pr_debug("Releasing group %d\n", group->id);
143
131 if (group->iommu_data_release) 144 if (group->iommu_data_release)
132 group->iommu_data_release(group->iommu_data); 145 group->iommu_data_release(group->iommu_data);
133 146
@@ -135,6 +148,9 @@ static void iommu_group_release(struct kobject *kobj)
135 ida_remove(&iommu_group_ida, group->id); 148 ida_remove(&iommu_group_ida, group->id);
136 mutex_unlock(&iommu_group_mutex); 149 mutex_unlock(&iommu_group_mutex);
137 150
151 if (group->default_domain)
152 iommu_domain_free(group->default_domain);
153
138 kfree(group->name); 154 kfree(group->name);
139 kfree(group); 155 kfree(group);
140} 156}
@@ -207,6 +223,8 @@ again:
207 */ 223 */
208 kobject_put(&group->kobj); 224 kobject_put(&group->kobj);
209 225
226 pr_debug("Allocated group %d\n", group->id);
227
210 return group; 228 return group;
211} 229}
212EXPORT_SYMBOL_GPL(iommu_group_alloc); 230EXPORT_SYMBOL_GPL(iommu_group_alloc);
@@ -307,6 +325,52 @@ int iommu_group_set_name(struct iommu_group *group, const char *name)
307} 325}
308EXPORT_SYMBOL_GPL(iommu_group_set_name); 326EXPORT_SYMBOL_GPL(iommu_group_set_name);
309 327
328static int iommu_group_create_direct_mappings(struct iommu_group *group,
329 struct device *dev)
330{
331 struct iommu_domain *domain = group->default_domain;
332 struct iommu_dm_region *entry;
333 struct list_head mappings;
334 unsigned long pg_size;
335 int ret = 0;
336
337 if (!domain || domain->type != IOMMU_DOMAIN_DMA)
338 return 0;
339
340 BUG_ON(!domain->ops->pgsize_bitmap);
341
342 pg_size = 1UL << __ffs(domain->ops->pgsize_bitmap);
343 INIT_LIST_HEAD(&mappings);
344
345 iommu_get_dm_regions(dev, &mappings);
346
347 /* We need to consider overlapping regions for different devices */
348 list_for_each_entry(entry, &mappings, list) {
349 dma_addr_t start, end, addr;
350
351 start = ALIGN(entry->start, pg_size);
352 end = ALIGN(entry->start + entry->length, pg_size);
353
354 for (addr = start; addr < end; addr += pg_size) {
355 phys_addr_t phys_addr;
356
357 phys_addr = iommu_iova_to_phys(domain, addr);
358 if (phys_addr)
359 continue;
360
361 ret = iommu_map(domain, addr, addr, pg_size, entry->prot);
362 if (ret)
363 goto out;
364 }
365
366 }
367
368out:
369 iommu_put_dm_regions(dev, &mappings);
370
371 return ret;
372}
373
310/** 374/**
311 * iommu_group_add_device - add a device to an iommu group 375 * iommu_group_add_device - add a device to an iommu group
312 * @group: the group into which to add the device (reference should be held) 376 * @group: the group into which to add the device (reference should be held)
@@ -363,8 +427,12 @@ rename:
363 427
364 dev->iommu_group = group; 428 dev->iommu_group = group;
365 429
430 iommu_group_create_direct_mappings(group, dev);
431
366 mutex_lock(&group->mutex); 432 mutex_lock(&group->mutex);
367 list_add_tail(&device->list, &group->devices); 433 list_add_tail(&device->list, &group->devices);
434 if (group->domain)
435 __iommu_attach_device(group->domain, dev);
368 mutex_unlock(&group->mutex); 436 mutex_unlock(&group->mutex);
369 437
370 /* Notify any listeners about change to group. */ 438 /* Notify any listeners about change to group. */
@@ -372,6 +440,9 @@ rename:
372 IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev); 440 IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev);
373 441
374 trace_add_device_to_group(group->id, dev); 442 trace_add_device_to_group(group->id, dev);
443
444 pr_info("Adding device %s to group %d\n", dev_name(dev), group->id);
445
375 return 0; 446 return 0;
376} 447}
377EXPORT_SYMBOL_GPL(iommu_group_add_device); 448EXPORT_SYMBOL_GPL(iommu_group_add_device);
@@ -388,6 +459,8 @@ void iommu_group_remove_device(struct device *dev)
388 struct iommu_group *group = dev->iommu_group; 459 struct iommu_group *group = dev->iommu_group;
389 struct iommu_device *tmp_device, *device = NULL; 460 struct iommu_device *tmp_device, *device = NULL;
390 461
462 pr_info("Removing device %s from group %d\n", dev_name(dev), group->id);
463
391 /* Pre-notify listeners that a device is being removed. */ 464 /* Pre-notify listeners that a device is being removed. */
392 blocking_notifier_call_chain(&group->notifier, 465 blocking_notifier_call_chain(&group->notifier,
393 IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev); 466 IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev);
@@ -417,6 +490,17 @@ void iommu_group_remove_device(struct device *dev)
417} 490}
418EXPORT_SYMBOL_GPL(iommu_group_remove_device); 491EXPORT_SYMBOL_GPL(iommu_group_remove_device);
419 492
493static int iommu_group_device_count(struct iommu_group *group)
494{
495 struct iommu_device *entry;
496 int ret = 0;
497
498 list_for_each_entry(entry, &group->devices, list)
499 ret++;
500
501 return ret;
502}
503
420/** 504/**
421 * iommu_group_for_each_dev - iterate over each device in the group 505 * iommu_group_for_each_dev - iterate over each device in the group
422 * @group: the group 506 * @group: the group
@@ -428,19 +512,30 @@ EXPORT_SYMBOL_GPL(iommu_group_remove_device);
428 * The group->mutex is held across callbacks, which will block calls to 512 * The group->mutex is held across callbacks, which will block calls to
429 * iommu_group_add/remove_device. 513 * iommu_group_add/remove_device.
430 */ 514 */
431int iommu_group_for_each_dev(struct iommu_group *group, void *data, 515static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
432 int (*fn)(struct device *, void *)) 516 int (*fn)(struct device *, void *))
433{ 517{
434 struct iommu_device *device; 518 struct iommu_device *device;
435 int ret = 0; 519 int ret = 0;
436 520
437 mutex_lock(&group->mutex);
438 list_for_each_entry(device, &group->devices, list) { 521 list_for_each_entry(device, &group->devices, list) {
439 ret = fn(device->dev, data); 522 ret = fn(device->dev, data);
440 if (ret) 523 if (ret)
441 break; 524 break;
442 } 525 }
526 return ret;
527}
528
529
530int iommu_group_for_each_dev(struct iommu_group *group, void *data,
531 int (*fn)(struct device *, void *))
532{
533 int ret;
534
535 mutex_lock(&group->mutex);
536 ret = __iommu_group_for_each_dev(group, data, fn);
443 mutex_unlock(&group->mutex); 537 mutex_unlock(&group->mutex);
538
444 return ret; 539 return ret;
445} 540}
446EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 541EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
@@ -692,7 +787,19 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
692 return group; 787 return group;
693 788
694 /* No shared group found, allocate new */ 789 /* No shared group found, allocate new */
695 return iommu_group_alloc(); 790 group = iommu_group_alloc();
791 if (IS_ERR(group))
792 return NULL;
793
794 /*
795 * Try to allocate a default domain - needs support from the
796 * IOMMU driver.
797 */
798 group->default_domain = __iommu_domain_alloc(pdev->dev.bus,
799 IOMMU_DOMAIN_DMA);
800 group->domain = group->default_domain;
801
802 return group;
696} 803}
697 804
698/** 805/**
@@ -731,6 +838,11 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
731 return group; 838 return group;
732} 839}
733 840
841struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
842{
843 return group->default_domain;
844}
845
734static int add_iommu_group(struct device *dev, void *data) 846static int add_iommu_group(struct device *dev, void *data)
735{ 847{
736 struct iommu_callback_data *cb = data; 848 struct iommu_callback_data *cb = data;
@@ -741,7 +853,16 @@ static int add_iommu_group(struct device *dev, void *data)
741 853
742 WARN_ON(dev->iommu_group); 854 WARN_ON(dev->iommu_group);
743 855
744 ops->add_device(dev); 856 return ops->add_device(dev);
857}
858
859static int remove_iommu_group(struct device *dev, void *data)
860{
861 struct iommu_callback_data *cb = data;
862 const struct iommu_ops *ops = cb->ops;
863
864 if (ops->remove_device && dev->iommu_group)
865 ops->remove_device(dev);
745 866
746 return 0; 867 return 0;
747} 868}
@@ -761,7 +882,7 @@ static int iommu_bus_notifier(struct notifier_block *nb,
761 if (action == BUS_NOTIFY_ADD_DEVICE) { 882 if (action == BUS_NOTIFY_ADD_DEVICE) {
762 if (ops->add_device) 883 if (ops->add_device)
763 return ops->add_device(dev); 884 return ops->add_device(dev);
764 } else if (action == BUS_NOTIFY_DEL_DEVICE) { 885 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
765 if (ops->remove_device && dev->iommu_group) { 886 if (ops->remove_device && dev->iommu_group) {
766 ops->remove_device(dev); 887 ops->remove_device(dev);
767 return 0; 888 return 0;
@@ -814,19 +935,25 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
814 nb->notifier_call = iommu_bus_notifier; 935 nb->notifier_call = iommu_bus_notifier;
815 936
816 err = bus_register_notifier(bus, nb); 937 err = bus_register_notifier(bus, nb);
817 if (err) { 938 if (err)
818 kfree(nb); 939 goto out_free;
819 return err;
820 }
821 940
822 err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group); 941 err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
823 if (err) { 942 if (err)
824 bus_unregister_notifier(bus, nb); 943 goto out_err;
825 kfree(nb); 944
826 return err;
827 }
828 945
829 return 0; 946 return 0;
947
948out_err:
949 /* Clean up */
950 bus_for_each_dev(bus, NULL, &cb, remove_iommu_group);
951 bus_unregister_notifier(bus, nb);
952
953out_free:
954 kfree(nb);
955
956 return err;
830} 957}
831 958
832/** 959/**
@@ -898,22 +1025,28 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
898} 1025}
899EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1026EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
900 1027
901struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) 1028static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
1029 unsigned type)
902{ 1030{
903 struct iommu_domain *domain; 1031 struct iommu_domain *domain;
904 1032
905 if (bus == NULL || bus->iommu_ops == NULL) 1033 if (bus == NULL || bus->iommu_ops == NULL)
906 return NULL; 1034 return NULL;
907 1035
908 domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); 1036 domain = bus->iommu_ops->domain_alloc(type);
909 if (!domain) 1037 if (!domain)
910 return NULL; 1038 return NULL;
911 1039
912 domain->ops = bus->iommu_ops; 1040 domain->ops = bus->iommu_ops;
913 domain->type = IOMMU_DOMAIN_UNMANAGED; 1041 domain->type = type;
914 1042
915 return domain; 1043 return domain;
916} 1044}
1045
1046struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
1047{
1048 return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
1049}
917EXPORT_SYMBOL_GPL(iommu_domain_alloc); 1050EXPORT_SYMBOL_GPL(iommu_domain_alloc);
918 1051
919void iommu_domain_free(struct iommu_domain *domain) 1052void iommu_domain_free(struct iommu_domain *domain)
@@ -922,7 +1055,8 @@ void iommu_domain_free(struct iommu_domain *domain)
922} 1055}
923EXPORT_SYMBOL_GPL(iommu_domain_free); 1056EXPORT_SYMBOL_GPL(iommu_domain_free);
924 1057
925int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 1058static int __iommu_attach_device(struct iommu_domain *domain,
1059 struct device *dev)
926{ 1060{
927 int ret; 1061 int ret;
928 if (unlikely(domain->ops->attach_dev == NULL)) 1062 if (unlikely(domain->ops->attach_dev == NULL))
@@ -933,9 +1067,38 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
933 trace_attach_device_to_domain(dev); 1067 trace_attach_device_to_domain(dev);
934 return ret; 1068 return ret;
935} 1069}
1070
1071int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
1072{
1073 struct iommu_group *group;
1074 int ret;
1075
1076 group = iommu_group_get(dev);
1077 /* FIXME: Remove this when groups a mandatory for iommu drivers */
1078 if (group == NULL)
1079 return __iommu_attach_device(domain, dev);
1080
1081 /*
1082 * We have a group - lock it to make sure the device-count doesn't
1083 * change while we are attaching
1084 */
1085 mutex_lock(&group->mutex);
1086 ret = -EINVAL;
1087 if (iommu_group_device_count(group) != 1)
1088 goto out_unlock;
1089
1090 ret = __iommu_attach_group(domain, group);
1091
1092out_unlock:
1093 mutex_unlock(&group->mutex);
1094 iommu_group_put(group);
1095
1096 return ret;
1097}
936EXPORT_SYMBOL_GPL(iommu_attach_device); 1098EXPORT_SYMBOL_GPL(iommu_attach_device);
937 1099
938void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 1100static void __iommu_detach_device(struct iommu_domain *domain,
1101 struct device *dev)
939{ 1102{
940 if (unlikely(domain->ops->detach_dev == NULL)) 1103 if (unlikely(domain->ops->detach_dev == NULL))
941 return; 1104 return;
@@ -943,8 +1106,48 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
943 domain->ops->detach_dev(domain, dev); 1106 domain->ops->detach_dev(domain, dev);
944 trace_detach_device_from_domain(dev); 1107 trace_detach_device_from_domain(dev);
945} 1108}
1109
1110void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
1111{
1112 struct iommu_group *group;
1113
1114 group = iommu_group_get(dev);
1115 /* FIXME: Remove this when groups a mandatory for iommu drivers */
1116 if (group == NULL)
1117 return __iommu_detach_device(domain, dev);
1118
1119 mutex_lock(&group->mutex);
1120 if (iommu_group_device_count(group) != 1) {
1121 WARN_ON(1);
1122 goto out_unlock;
1123 }
1124
1125 __iommu_detach_group(domain, group);
1126
1127out_unlock:
1128 mutex_unlock(&group->mutex);
1129 iommu_group_put(group);
1130}
946EXPORT_SYMBOL_GPL(iommu_detach_device); 1131EXPORT_SYMBOL_GPL(iommu_detach_device);
947 1132
1133struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
1134{
1135 struct iommu_domain *domain;
1136 struct iommu_group *group;
1137
1138 group = iommu_group_get(dev);
1139 /* FIXME: Remove this when groups a mandatory for iommu drivers */
1140 if (group == NULL)
1141 return NULL;
1142
1143 domain = group->domain;
1144
1145 iommu_group_put(group);
1146
1147 return domain;
1148}
1149EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
1150
948/* 1151/*
949 * IOMMU groups are really the natrual working unit of the IOMMU, but 1152 * IOMMU groups are really the natrual working unit of the IOMMU, but
950 * the IOMMU API works on domains and devices. Bridge that gap by 1153 * the IOMMU API works on domains and devices. Bridge that gap by
@@ -959,13 +1162,34 @@ static int iommu_group_do_attach_device(struct device *dev, void *data)
959{ 1162{
960 struct iommu_domain *domain = data; 1163 struct iommu_domain *domain = data;
961 1164
962 return iommu_attach_device(domain, dev); 1165 return __iommu_attach_device(domain, dev);
1166}
1167
1168static int __iommu_attach_group(struct iommu_domain *domain,
1169 struct iommu_group *group)
1170{
1171 int ret;
1172
1173 if (group->default_domain && group->domain != group->default_domain)
1174 return -EBUSY;
1175
1176 ret = __iommu_group_for_each_dev(group, domain,
1177 iommu_group_do_attach_device);
1178 if (ret == 0)
1179 group->domain = domain;
1180
1181 return ret;
963} 1182}
964 1183
965int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 1184int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
966{ 1185{
967 return iommu_group_for_each_dev(group, domain, 1186 int ret;
968 iommu_group_do_attach_device); 1187
1188 mutex_lock(&group->mutex);
1189 ret = __iommu_attach_group(domain, group);
1190 mutex_unlock(&group->mutex);
1191
1192 return ret;
969} 1193}
970EXPORT_SYMBOL_GPL(iommu_attach_group); 1194EXPORT_SYMBOL_GPL(iommu_attach_group);
971 1195
@@ -973,14 +1197,40 @@ static int iommu_group_do_detach_device(struct device *dev, void *data)
973{ 1197{
974 struct iommu_domain *domain = data; 1198 struct iommu_domain *domain = data;
975 1199
976 iommu_detach_device(domain, dev); 1200 __iommu_detach_device(domain, dev);
977 1201
978 return 0; 1202 return 0;
979} 1203}
980 1204
1205static void __iommu_detach_group(struct iommu_domain *domain,
1206 struct iommu_group *group)
1207{
1208 int ret;
1209
1210 if (!group->default_domain) {
1211 __iommu_group_for_each_dev(group, domain,
1212 iommu_group_do_detach_device);
1213 group->domain = NULL;
1214 return;
1215 }
1216
1217 if (group->domain == group->default_domain)
1218 return;
1219
1220 /* Detach by re-attaching to the default domain */
1221 ret = __iommu_group_for_each_dev(group, group->default_domain,
1222 iommu_group_do_attach_device);
1223 if (ret != 0)
1224 WARN_ON(1);
1225 else
1226 group->domain = group->default_domain;
1227}
1228
981void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 1229void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
982{ 1230{
983 iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device); 1231 mutex_lock(&group->mutex);
1232 __iommu_detach_group(domain, group);
1233 mutex_unlock(&group->mutex);
984} 1234}
985EXPORT_SYMBOL_GPL(iommu_detach_group); 1235EXPORT_SYMBOL_GPL(iommu_detach_group);
986 1236
@@ -1207,7 +1457,7 @@ static int __init iommu_init(void)
1207 1457
1208 return 0; 1458 return 0;
1209} 1459}
1210arch_initcall(iommu_init); 1460core_initcall(iommu_init);
1211 1461
1212int iommu_domain_get_attr(struct iommu_domain *domain, 1462int iommu_domain_get_attr(struct iommu_domain *domain,
1213 enum iommu_attr attr, void *data) 1463 enum iommu_attr attr, void *data)
@@ -1273,3 +1523,72 @@ int iommu_domain_set_attr(struct iommu_domain *domain,
1273 return ret; 1523 return ret;
1274} 1524}
1275EXPORT_SYMBOL_GPL(iommu_domain_set_attr); 1525EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
1526
1527void iommu_get_dm_regions(struct device *dev, struct list_head *list)
1528{
1529 const struct iommu_ops *ops = dev->bus->iommu_ops;
1530
1531 if (ops && ops->get_dm_regions)
1532 ops->get_dm_regions(dev, list);
1533}
1534
1535void iommu_put_dm_regions(struct device *dev, struct list_head *list)
1536{
1537 const struct iommu_ops *ops = dev->bus->iommu_ops;
1538
1539 if (ops && ops->put_dm_regions)
1540 ops->put_dm_regions(dev, list);
1541}
1542
1543/* Request that a device is direct mapped by the IOMMU */
1544int iommu_request_dm_for_dev(struct device *dev)
1545{
1546 struct iommu_domain *dm_domain;
1547 struct iommu_group *group;
1548 int ret;
1549
1550 /* Device must already be in a group before calling this function */
1551 group = iommu_group_get_for_dev(dev);
1552 if (IS_ERR(group))
1553 return PTR_ERR(group);
1554
1555 mutex_lock(&group->mutex);
1556
1557 /* Check if the default domain is already direct mapped */
1558 ret = 0;
1559 if (group->default_domain &&
1560 group->default_domain->type == IOMMU_DOMAIN_IDENTITY)
1561 goto out;
1562
1563 /* Don't change mappings of existing devices */
1564 ret = -EBUSY;
1565 if (iommu_group_device_count(group) != 1)
1566 goto out;
1567
1568 /* Allocate a direct mapped domain */
1569 ret = -ENOMEM;
1570 dm_domain = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_IDENTITY);
1571 if (!dm_domain)
1572 goto out;
1573
1574 /* Attach the device to the domain */
1575 ret = __iommu_attach_group(dm_domain, group);
1576 if (ret) {
1577 iommu_domain_free(dm_domain);
1578 goto out;
1579 }
1580
1581 /* Make the direct mapped domain the default for this group */
1582 if (group->default_domain)
1583 iommu_domain_free(group->default_domain);
1584 group->default_domain = dm_domain;
1585
1586 pr_info("Using direct mapping for device %s\n", dev_name(dev));
1587
1588 ret = 0;
1589out:
1590 mutex_unlock(&group->mutex);
1591 iommu_group_put(group);
1592
1593 return ret;
1594}
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 9dd8208312c2..b7c3d923f3e1 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -227,6 +227,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova)
227 /* Figure out where to put new node */ 227 /* Figure out where to put new node */
228 while (*new) { 228 while (*new) {
229 struct iova *this = container_of(*new, struct iova, node); 229 struct iova *this = container_of(*new, struct iova, node);
230
230 parent = *new; 231 parent = *new;
231 232
232 if (iova->pfn_lo < this->pfn_lo) 233 if (iova->pfn_lo < this->pfn_lo)
@@ -350,6 +351,7 @@ void
350free_iova(struct iova_domain *iovad, unsigned long pfn) 351free_iova(struct iova_domain *iovad, unsigned long pfn)
351{ 352{
352 struct iova *iova = find_iova(iovad, pfn); 353 struct iova *iova = find_iova(iovad, pfn);
354
353 if (iova) 355 if (iova)
354 __free_iova(iovad, iova); 356 __free_iova(iovad, iova);
355 357
@@ -369,6 +371,7 @@ void put_iova_domain(struct iova_domain *iovad)
369 node = rb_first(&iovad->rbroot); 371 node = rb_first(&iovad->rbroot);
370 while (node) { 372 while (node) {
371 struct iova *iova = container_of(node, struct iova, node); 373 struct iova *iova = container_of(node, struct iova, node);
374
372 rb_erase(node, &iovad->rbroot); 375 rb_erase(node, &iovad->rbroot);
373 free_iova_mem(iova); 376 free_iova_mem(iova);
374 node = rb_first(&iovad->rbroot); 377 node = rb_first(&iovad->rbroot);
@@ -482,6 +485,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
482 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { 485 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
483 struct iova *iova = container_of(node, struct iova, node); 486 struct iova *iova = container_of(node, struct iova, node);
484 struct iova *new_iova; 487 struct iova *new_iova;
488
485 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); 489 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
486 if (!new_iova) 490 if (!new_iova)
487 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", 491 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index cab214544237..ebf0adb8e7ea 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -551,6 +551,15 @@ static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain,
551 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); 551 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
552} 552}
553 553
554static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain,
555 dma_addr_t iova, size_t size)
556{
557 rk_iommu_zap_iova(rk_domain, iova, SPAGE_SIZE);
558 if (size > SPAGE_SIZE)
559 rk_iommu_zap_iova(rk_domain, iova + size - SPAGE_SIZE,
560 SPAGE_SIZE);
561}
562
554static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, 563static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
555 dma_addr_t iova) 564 dma_addr_t iova)
556{ 565{
@@ -575,12 +584,6 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
575 rk_table_flush(page_table, NUM_PT_ENTRIES); 584 rk_table_flush(page_table, NUM_PT_ENTRIES);
576 rk_table_flush(dte_addr, 1); 585 rk_table_flush(dte_addr, 1);
577 586
578 /*
579 * Zap the first iova of newly allocated page table so iommu evicts
580 * old cached value of new dte from the iotlb.
581 */
582 rk_iommu_zap_iova(rk_domain, iova, SPAGE_SIZE);
583
584done: 587done:
585 pt_phys = rk_dte_pt_address(dte); 588 pt_phys = rk_dte_pt_address(dte);
586 return (u32 *)phys_to_virt(pt_phys); 589 return (u32 *)phys_to_virt(pt_phys);
@@ -630,6 +633,14 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
630 633
631 rk_table_flush(pte_addr, pte_count); 634 rk_table_flush(pte_addr, pte_count);
632 635
636 /*
637 * Zap the first and last iova to evict from iotlb any previously
638 * mapped cachelines holding stale values for its dte and pte.
639 * We only zap the first and last iova, since only they could have
640 * dte or pte shared with an existing mapping.
641 */
642 rk_iommu_zap_iova_first_last(rk_domain, iova, size);
643
633 return 0; 644 return 0;
634unwind: 645unwind:
635 /* Unmap the range of iovas that we just mapped */ 646 /* Unmap the range of iovas that we just mapped */
@@ -774,7 +785,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
774 list_add_tail(&iommu->node, &rk_domain->iommus); 785 list_add_tail(&iommu->node, &rk_domain->iommus);
775 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); 786 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
776 787
777 dev_info(dev, "Attached to iommu domain\n"); 788 dev_dbg(dev, "Attached to iommu domain\n");
778 789
779 rk_iommu_disable_stall(iommu); 790 rk_iommu_disable_stall(iommu);
780 791
@@ -808,7 +819,7 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
808 819
809 iommu->domain = NULL; 820 iommu->domain = NULL;
810 821
811 dev_info(dev, "Detached from iommu domain\n"); 822 dev_dbg(dev, "Detached from iommu domain\n");
812} 823}
813 824
814static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) 825static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 7d092ddc8119..454017928ed0 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -21,7 +21,7 @@ config VFIO_VIRQFD
21menuconfig VFIO 21menuconfig VFIO
22 tristate "VFIO Non-Privileged userspace driver framework" 22 tristate "VFIO Non-Privileged userspace driver framework"
23 depends on IOMMU_API 23 depends on IOMMU_API
24 select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU) 24 select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
25 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) 25 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
26 select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES) 26 select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
27 select ANON_INODES 27 select ANON_INODES
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3665cb331ca1..d9a366d24e3b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -297,6 +297,7 @@ struct q_inval {
297/* 1MB - maximum possible interrupt remapping table size */ 297/* 1MB - maximum possible interrupt remapping table size */
298#define INTR_REMAP_PAGE_ORDER 8 298#define INTR_REMAP_PAGE_ORDER 8
299#define INTR_REMAP_TABLE_REG_SIZE 0xf 299#define INTR_REMAP_TABLE_REG_SIZE 0xf
300#define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf
300 301
301#define INTR_REMAP_TABLE_ENTRIES 65536 302#define INTR_REMAP_TABLE_ENTRIES 65536
302 303
@@ -323,6 +324,9 @@ enum {
323 MAX_SR_DMAR_REGS 324 MAX_SR_DMAR_REGS
324}; 325};
325 326
327#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0)
328#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1)
329
326struct intel_iommu { 330struct intel_iommu {
327 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 331 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
328 u64 reg_phys; /* physical address of hw register set */ 332 u64 reg_phys; /* physical address of hw register set */
@@ -356,6 +360,7 @@ struct intel_iommu {
356#endif 360#endif
357 struct device *iommu_dev; /* IOMMU-sysfs device */ 361 struct device *iommu_dev; /* IOMMU-sysfs device */
358 int node; 362 int node;
363 u32 flags; /* Software defined flags */
359}; 364};
360 365
361static inline void __iommu_flush_cache( 366static inline void __iommu_flush_cache(
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0546b8710ce3..dc767f7c3704 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -114,6 +114,20 @@ enum iommu_attr {
114 DOMAIN_ATTR_MAX, 114 DOMAIN_ATTR_MAX,
115}; 115};
116 116
117/**
118 * struct iommu_dm_region - descriptor for a direct mapped memory region
119 * @list: Linked list pointers
120 * @start: System physical start address of the region
121 * @length: Length of the region in bytes
122 * @prot: IOMMU Protection flags (READ/WRITE/...)
123 */
124struct iommu_dm_region {
125 struct list_head list;
126 phys_addr_t start;
127 size_t length;
128 int prot;
129};
130
117#ifdef CONFIG_IOMMU_API 131#ifdef CONFIG_IOMMU_API
118 132
119/** 133/**
@@ -159,6 +173,10 @@ struct iommu_ops {
159 int (*domain_set_attr)(struct iommu_domain *domain, 173 int (*domain_set_attr)(struct iommu_domain *domain,
160 enum iommu_attr attr, void *data); 174 enum iommu_attr attr, void *data);
161 175
176 /* Request/Free a list of direct mapping requirements for a device */
177 void (*get_dm_regions)(struct device *dev, struct list_head *list);
178 void (*put_dm_regions)(struct device *dev, struct list_head *list);
179
162 /* Window handling functions */ 180 /* Window handling functions */
163 int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, 181 int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
164 phys_addr_t paddr, u64 size, int prot); 182 phys_addr_t paddr, u64 size, int prot);
@@ -193,6 +211,7 @@ extern int iommu_attach_device(struct iommu_domain *domain,
193 struct device *dev); 211 struct device *dev);
194extern void iommu_detach_device(struct iommu_domain *domain, 212extern void iommu_detach_device(struct iommu_domain *domain,
195 struct device *dev); 213 struct device *dev);
214extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
196extern int iommu_map(struct iommu_domain *domain, unsigned long iova, 215extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
197 phys_addr_t paddr, size_t size, int prot); 216 phys_addr_t paddr, size_t size, int prot);
198extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, 217extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -204,6 +223,10 @@ extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t io
204extern void iommu_set_fault_handler(struct iommu_domain *domain, 223extern void iommu_set_fault_handler(struct iommu_domain *domain,
205 iommu_fault_handler_t handler, void *token); 224 iommu_fault_handler_t handler, void *token);
206 225
226extern void iommu_get_dm_regions(struct device *dev, struct list_head *list);
227extern void iommu_put_dm_regions(struct device *dev, struct list_head *list);
228extern int iommu_request_dm_for_dev(struct device *dev);
229
207extern int iommu_attach_group(struct iommu_domain *domain, 230extern int iommu_attach_group(struct iommu_domain *domain,
208 struct iommu_group *group); 231 struct iommu_group *group);
209extern void iommu_detach_group(struct iommu_domain *domain, 232extern void iommu_detach_group(struct iommu_domain *domain,
@@ -227,6 +250,7 @@ extern int iommu_group_unregister_notifier(struct iommu_group *group,
227 struct notifier_block *nb); 250 struct notifier_block *nb);
228extern int iommu_group_id(struct iommu_group *group); 251extern int iommu_group_id(struct iommu_group *group);
229extern struct iommu_group *iommu_group_get_for_dev(struct device *dev); 252extern struct iommu_group *iommu_group_get_for_dev(struct device *dev);
253extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
230 254
231extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, 255extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
232 void *data); 256 void *data);
@@ -332,6 +356,11 @@ static inline void iommu_detach_device(struct iommu_domain *domain,
332{ 356{
333} 357}
334 358
359static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
360{
361 return NULL;
362}
363
335static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, 364static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
336 phys_addr_t paddr, int gfp_order, int prot) 365 phys_addr_t paddr, int gfp_order, int prot)
337{ 366{
@@ -373,6 +402,21 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
373{ 402{
374} 403}
375 404
405static inline void iommu_get_dm_regions(struct device *dev,
406 struct list_head *list)
407{
408}
409
410static inline void iommu_put_dm_regions(struct device *dev,
411 struct list_head *list)
412{
413}
414
415static inline int iommu_request_dm_for_dev(struct device *dev)
416{
417 return -ENODEV;
418}
419
376static inline int iommu_attach_group(struct iommu_domain *domain, 420static inline int iommu_attach_group(struct iommu_domain *domain,
377 struct iommu_group *group) 421 struct iommu_group *group)
378{ 422{