diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-06-01 16:48:50 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-06-01 16:48:50 -0400 |
commit | f0f52a9463839c52a63c05d6e7d4a330d94a9794 (patch) | |
tree | d964f65aa37dd46b457bc615f2f37e75390c6df8 | |
parent | 0f48f2600911d5de6393829e4a9986d4075558b3 (diff) | |
parent | 70e535d1e5d1e4317e894d6228b762cf9c3fbc6a (diff) |
Merge git://git.infradead.org/iommu-2.6
* git://git.infradead.org/iommu-2.6:
intel-iommu: Fix off-by-one in RMRR setup
intel-iommu: Add domain check in domain_remove_one_dev_info
intel-iommu: Remove Host Bridge devices from identity mapping
intel-iommu: Use coherent DMA mask when requested
intel-iommu: Dont cache iova above 32bit
intel-iommu: Speed up processing of the identity_mapping function
intel-iommu: Check for identity mapping candidate using system dma mask
intel-iommu: Only unlink device domains from iommu
intel-iommu: Enable super page (2MiB, 1GiB, etc.) support
intel-iommu: Flush unmaps at domain_exit
intel-iommu: Remove obsolete comment from detect_intel_iommu
intel-iommu: fix VT-d PMR disable for TXT on S3 resume
-rw-r--r-- | Documentation/kernel-parameters.txt | 5 | ||||
-rw-r--r-- | drivers/pci/dmar.c | 7 | ||||
-rw-r--r-- | drivers/pci/intel-iommu.c | 240 | ||||
-rw-r--r-- | drivers/pci/iova.c | 12 | ||||
-rw-r--r-- | include/linux/dma_remapping.h | 4 |
5 files changed, 220 insertions, 48 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5438a2d7907f..d9a203b058f1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
999 | With this option on every unmap_single operation will | 999 | With this option on every unmap_single operation will |
1000 | result in a hardware IOTLB flush operation as opposed | 1000 | result in a hardware IOTLB flush operation as opposed |
1001 | to batching them for performance. | 1001 | to batching them for performance. |
1002 | 1002 | sp_off [Default Off] | |
1003 | By default, super page will be supported if Intel IOMMU | ||
1004 | has the capability. With this option, super page will | ||
1005 | not be supported. | ||
1003 | intremap= [X86-64, Intel-IOMMU] | 1006 | intremap= [X86-64, Intel-IOMMU] |
1004 | Format: { on (default) | off | nosid } | 1007 | Format: { on (default) | off | nosid } |
1005 | on enable Interrupt Remapping (default) | 1008 | on enable Interrupt Remapping (default) |
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 12e02bf92c4a..3dc9befa5aec 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c | |||
@@ -698,12 +698,7 @@ int __init detect_intel_iommu(void) | |||
698 | { | 698 | { |
699 | #ifdef CONFIG_INTR_REMAP | 699 | #ifdef CONFIG_INTR_REMAP |
700 | struct acpi_table_dmar *dmar; | 700 | struct acpi_table_dmar *dmar; |
701 | /* | 701 | |
702 | * for now we will disable dma-remapping when interrupt | ||
703 | * remapping is enabled. | ||
704 | * When support for queued invalidation for IOTLB invalidation | ||
705 | * is added, we will not need this any more. | ||
706 | */ | ||
707 | dmar = (struct acpi_table_dmar *) dmar_tbl; | 702 | dmar = (struct acpi_table_dmar *) dmar_tbl; |
708 | if (ret && cpu_has_x2apic && dmar->flags & 0x1) | 703 | if (ret && cpu_has_x2apic && dmar->flags & 0x1) |
709 | printk(KERN_INFO | 704 | printk(KERN_INFO |
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6af6b628175b..59f17acf7f68 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
@@ -47,6 +47,8 @@ | |||
47 | #define ROOT_SIZE VTD_PAGE_SIZE | 47 | #define ROOT_SIZE VTD_PAGE_SIZE |
48 | #define CONTEXT_SIZE VTD_PAGE_SIZE | 48 | #define CONTEXT_SIZE VTD_PAGE_SIZE |
49 | 49 | ||
50 | #define IS_BRIDGE_HOST_DEVICE(pdev) \ | ||
51 | ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) | ||
50 | #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) | 52 | #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) |
51 | #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) | 53 | #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) |
52 | #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) | 54 | #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) |
@@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) | |||
116 | return (pfn + level_size(level) - 1) & level_mask(level); | 118 | return (pfn + level_size(level) - 1) & level_mask(level); |
117 | } | 119 | } |
118 | 120 | ||
121 | static inline unsigned long lvl_to_nr_pages(unsigned int lvl) | ||
122 | { | ||
123 | return 1 << ((lvl - 1) * LEVEL_STRIDE); | ||
124 | } | ||
125 | |||
119 | /* VT-d pages must always be _smaller_ than MM pages. Otherwise things | 126 | /* VT-d pages must always be _smaller_ than MM pages. Otherwise things |
120 | are never going to work. */ | 127 | are never going to work. */ |
121 | static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) | 128 | static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) |
@@ -143,6 +150,12 @@ static void __init check_tylersburg_isoch(void); | |||
143 | static int rwbf_quirk; | 150 | static int rwbf_quirk; |
144 | 151 | ||
145 | /* | 152 | /* |
153 | * set to 1 to panic kernel if can't successfully enable VT-d | ||
154 | * (used when kernel is launched w/ TXT) | ||
155 | */ | ||
156 | static int force_on = 0; | ||
157 | |||
158 | /* | ||
146 | * 0: Present | 159 | * 0: Present |
147 | * 1-11: Reserved | 160 | * 1-11: Reserved |
148 | * 12-63: Context Ptr (12 - (haw-1)) | 161 | * 12-63: Context Ptr (12 - (haw-1)) |
@@ -338,6 +351,9 @@ struct dmar_domain { | |||
338 | int iommu_coherency;/* indicate coherency of iommu access */ | 351 | int iommu_coherency;/* indicate coherency of iommu access */ |
339 | int iommu_snooping; /* indicate snooping control feature*/ | 352 | int iommu_snooping; /* indicate snooping control feature*/ |
340 | int iommu_count; /* reference count of iommu */ | 353 | int iommu_count; /* reference count of iommu */ |
354 | int iommu_superpage;/* Level of superpages supported: | ||
355 | 0 == 4KiB (no superpages), 1 == 2MiB, | ||
356 | 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ | ||
341 | spinlock_t iommu_lock; /* protect iommu set in domain */ | 357 | spinlock_t iommu_lock; /* protect iommu set in domain */ |
342 | u64 max_addr; /* maximum mapped address */ | 358 | u64 max_addr; /* maximum mapped address */ |
343 | }; | 359 | }; |
@@ -387,6 +403,7 @@ int dmar_disabled = 1; | |||
387 | static int dmar_map_gfx = 1; | 403 | static int dmar_map_gfx = 1; |
388 | static int dmar_forcedac; | 404 | static int dmar_forcedac; |
389 | static int intel_iommu_strict; | 405 | static int intel_iommu_strict; |
406 | static int intel_iommu_superpage = 1; | ||
390 | 407 | ||
391 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) | 408 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) |
392 | static DEFINE_SPINLOCK(device_domain_lock); | 409 | static DEFINE_SPINLOCK(device_domain_lock); |
@@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str) | |||
417 | printk(KERN_INFO | 434 | printk(KERN_INFO |
418 | "Intel-IOMMU: disable batched IOTLB flush\n"); | 435 | "Intel-IOMMU: disable batched IOTLB flush\n"); |
419 | intel_iommu_strict = 1; | 436 | intel_iommu_strict = 1; |
437 | } else if (!strncmp(str, "sp_off", 6)) { | ||
438 | printk(KERN_INFO | ||
439 | "Intel-IOMMU: disable supported super page\n"); | ||
440 | intel_iommu_superpage = 0; | ||
420 | } | 441 | } |
421 | 442 | ||
422 | str += strcspn(str, ","); | 443 | str += strcspn(str, ","); |
@@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) | |||
555 | } | 576 | } |
556 | } | 577 | } |
557 | 578 | ||
579 | static void domain_update_iommu_superpage(struct dmar_domain *domain) | ||
580 | { | ||
581 | int i, mask = 0xf; | ||
582 | |||
583 | if (!intel_iommu_superpage) { | ||
584 | domain->iommu_superpage = 0; | ||
585 | return; | ||
586 | } | ||
587 | |||
588 | domain->iommu_superpage = 4; /* 1TiB */ | ||
589 | |||
590 | for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { | ||
591 | mask |= cap_super_page_val(g_iommus[i]->cap); | ||
592 | if (!mask) { | ||
593 | break; | ||
594 | } | ||
595 | } | ||
596 | domain->iommu_superpage = fls(mask); | ||
597 | } | ||
598 | |||
558 | /* Some capabilities may be different across iommus */ | 599 | /* Some capabilities may be different across iommus */ |
559 | static void domain_update_iommu_cap(struct dmar_domain *domain) | 600 | static void domain_update_iommu_cap(struct dmar_domain *domain) |
560 | { | 601 | { |
561 | domain_update_iommu_coherency(domain); | 602 | domain_update_iommu_coherency(domain); |
562 | domain_update_iommu_snooping(domain); | 603 | domain_update_iommu_snooping(domain); |
604 | domain_update_iommu_superpage(domain); | ||
563 | } | 605 | } |
564 | 606 | ||
565 | static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) | 607 | static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) |
@@ -689,23 +731,31 @@ out: | |||
689 | } | 731 | } |
690 | 732 | ||
691 | static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, | 733 | static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, |
692 | unsigned long pfn) | 734 | unsigned long pfn, int large_level) |
693 | { | 735 | { |
694 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; | 736 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
695 | struct dma_pte *parent, *pte = NULL; | 737 | struct dma_pte *parent, *pte = NULL; |
696 | int level = agaw_to_level(domain->agaw); | 738 | int level = agaw_to_level(domain->agaw); |
697 | int offset; | 739 | int offset, target_level; |
698 | 740 | ||
699 | BUG_ON(!domain->pgd); | 741 | BUG_ON(!domain->pgd); |
700 | BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); | 742 | BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); |
701 | parent = domain->pgd; | 743 | parent = domain->pgd; |
702 | 744 | ||
745 | /* Search pte */ | ||
746 | if (!large_level) | ||
747 | target_level = 1; | ||
748 | else | ||
749 | target_level = large_level; | ||
750 | |||
703 | while (level > 0) { | 751 | while (level > 0) { |
704 | void *tmp_page; | 752 | void *tmp_page; |
705 | 753 | ||
706 | offset = pfn_level_offset(pfn, level); | 754 | offset = pfn_level_offset(pfn, level); |
707 | pte = &parent[offset]; | 755 | pte = &parent[offset]; |
708 | if (level == 1) | 756 | if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE)) |
757 | break; | ||
758 | if (level == target_level) | ||
709 | break; | 759 | break; |
710 | 760 | ||
711 | if (!dma_pte_present(pte)) { | 761 | if (!dma_pte_present(pte)) { |
@@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, | |||
733 | return pte; | 783 | return pte; |
734 | } | 784 | } |
735 | 785 | ||
786 | |||
736 | /* return address's pte at specific level */ | 787 | /* return address's pte at specific level */ |
737 | static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, | 788 | static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, |
738 | unsigned long pfn, | 789 | unsigned long pfn, |
739 | int level) | 790 | int level, int *large_page) |
740 | { | 791 | { |
741 | struct dma_pte *parent, *pte = NULL; | 792 | struct dma_pte *parent, *pte = NULL; |
742 | int total = agaw_to_level(domain->agaw); | 793 | int total = agaw_to_level(domain->agaw); |
@@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, | |||
749 | if (level == total) | 800 | if (level == total) |
750 | return pte; | 801 | return pte; |
751 | 802 | ||
752 | if (!dma_pte_present(pte)) | 803 | if (!dma_pte_present(pte)) { |
804 | *large_page = total; | ||
753 | break; | 805 | break; |
806 | } | ||
807 | |||
808 | if (pte->val & DMA_PTE_LARGE_PAGE) { | ||
809 | *large_page = total; | ||
810 | return pte; | ||
811 | } | ||
812 | |||
754 | parent = phys_to_virt(dma_pte_addr(pte)); | 813 | parent = phys_to_virt(dma_pte_addr(pte)); |
755 | total--; | 814 | total--; |
756 | } | 815 | } |
@@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, | |||
763 | unsigned long last_pfn) | 822 | unsigned long last_pfn) |
764 | { | 823 | { |
765 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; | 824 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
825 | unsigned int large_page = 1; | ||
766 | struct dma_pte *first_pte, *pte; | 826 | struct dma_pte *first_pte, *pte; |
767 | 827 | ||
768 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); | 828 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); |
@@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain, | |||
771 | 831 | ||
772 | /* we don't need lock here; nobody else touches the iova range */ | 832 | /* we don't need lock here; nobody else touches the iova range */ |
773 | do { | 833 | do { |
774 | first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); | 834 | large_page = 1; |
835 | first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); | ||
775 | if (!pte) { | 836 | if (!pte) { |
776 | start_pfn = align_to_level(start_pfn + 1, 2); | 837 | start_pfn = align_to_level(start_pfn + 1, large_page + 1); |
777 | continue; | 838 | continue; |
778 | } | 839 | } |
779 | do { | 840 | do { |
780 | dma_clear_pte(pte); | 841 | dma_clear_pte(pte); |
781 | start_pfn++; | 842 | start_pfn += lvl_to_nr_pages(large_page); |
782 | pte++; | 843 | pte++; |
783 | } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); | 844 | } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); |
784 | 845 | ||
@@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, | |||
798 | int total = agaw_to_level(domain->agaw); | 859 | int total = agaw_to_level(domain->agaw); |
799 | int level; | 860 | int level; |
800 | unsigned long tmp; | 861 | unsigned long tmp; |
862 | int large_page = 2; | ||
801 | 863 | ||
802 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); | 864 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); |
803 | BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); | 865 | BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); |
@@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, | |||
813 | return; | 875 | return; |
814 | 876 | ||
815 | do { | 877 | do { |
816 | first_pte = pte = dma_pfn_level_pte(domain, tmp, level); | 878 | large_page = level; |
879 | first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); | ||
880 | if (large_page > level) | ||
881 | level = large_page + 1; | ||
817 | if (!pte) { | 882 | if (!pte) { |
818 | tmp = align_to_level(tmp + 1, level + 1); | 883 | tmp = align_to_level(tmp + 1, level + 1); |
819 | continue; | 884 | continue; |
@@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) | |||
1397 | else | 1462 | else |
1398 | domain->iommu_snooping = 0; | 1463 | domain->iommu_snooping = 0; |
1399 | 1464 | ||
1465 | domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); | ||
1400 | domain->iommu_count = 1; | 1466 | domain->iommu_count = 1; |
1401 | domain->nid = iommu->node; | 1467 | domain->nid = iommu->node; |
1402 | 1468 | ||
@@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain) | |||
1417 | if (!domain) | 1483 | if (!domain) |
1418 | return; | 1484 | return; |
1419 | 1485 | ||
1486 | /* Flush any lazy unmaps that may reference this domain */ | ||
1487 | if (!intel_iommu_strict) | ||
1488 | flush_unmaps_timeout(0); | ||
1489 | |||
1420 | domain_remove_dev_info(domain); | 1490 | domain_remove_dev_info(domain); |
1421 | /* destroy iovas */ | 1491 | /* destroy iovas */ |
1422 | put_iova_domain(&domain->iovad); | 1492 | put_iova_domain(&domain->iovad); |
@@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, | |||
1648 | return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; | 1718 | return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; |
1649 | } | 1719 | } |
1650 | 1720 | ||
1721 | /* Return largest possible superpage level for a given mapping */ | ||
1722 | static inline int hardware_largepage_caps(struct dmar_domain *domain, | ||
1723 | unsigned long iov_pfn, | ||
1724 | unsigned long phy_pfn, | ||
1725 | unsigned long pages) | ||
1726 | { | ||
1727 | int support, level = 1; | ||
1728 | unsigned long pfnmerge; | ||
1729 | |||
1730 | support = domain->iommu_superpage; | ||
1731 | |||
1732 | /* To use a large page, the virtual *and* physical addresses | ||
1733 | must be aligned to 2MiB/1GiB/etc. Lower bits set in either | ||
1734 | of them will mean we have to use smaller pages. So just | ||
1735 | merge them and check both at once. */ | ||
1736 | pfnmerge = iov_pfn | phy_pfn; | ||
1737 | |||
1738 | while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { | ||
1739 | pages >>= VTD_STRIDE_SHIFT; | ||
1740 | if (!pages) | ||
1741 | break; | ||
1742 | pfnmerge >>= VTD_STRIDE_SHIFT; | ||
1743 | level++; | ||
1744 | support--; | ||
1745 | } | ||
1746 | return level; | ||
1747 | } | ||
1748 | |||
1651 | static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | 1749 | static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, |
1652 | struct scatterlist *sg, unsigned long phys_pfn, | 1750 | struct scatterlist *sg, unsigned long phys_pfn, |
1653 | unsigned long nr_pages, int prot) | 1751 | unsigned long nr_pages, int prot) |
@@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | |||
1656 | phys_addr_t uninitialized_var(pteval); | 1754 | phys_addr_t uninitialized_var(pteval); |
1657 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; | 1755 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
1658 | unsigned long sg_res; | 1756 | unsigned long sg_res; |
1757 | unsigned int largepage_lvl = 0; | ||
1758 | unsigned long lvl_pages = 0; | ||
1659 | 1759 | ||
1660 | BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); | 1760 | BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); |
1661 | 1761 | ||
@@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | |||
1671 | pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; | 1771 | pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; |
1672 | } | 1772 | } |
1673 | 1773 | ||
1674 | while (nr_pages--) { | 1774 | while (nr_pages > 0) { |
1675 | uint64_t tmp; | 1775 | uint64_t tmp; |
1676 | 1776 | ||
1677 | if (!sg_res) { | 1777 | if (!sg_res) { |
@@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | |||
1679 | sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; | 1779 | sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; |
1680 | sg->dma_length = sg->length; | 1780 | sg->dma_length = sg->length; |
1681 | pteval = page_to_phys(sg_page(sg)) | prot; | 1781 | pteval = page_to_phys(sg_page(sg)) | prot; |
1782 | phys_pfn = pteval >> VTD_PAGE_SHIFT; | ||
1682 | } | 1783 | } |
1784 | |||
1683 | if (!pte) { | 1785 | if (!pte) { |
1684 | first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); | 1786 | largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); |
1787 | |||
1788 | first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); | ||
1685 | if (!pte) | 1789 | if (!pte) |
1686 | return -ENOMEM; | 1790 | return -ENOMEM; |
1791 | /* It is large page*/ | ||
1792 | if (largepage_lvl > 1) | ||
1793 | pteval |= DMA_PTE_LARGE_PAGE; | ||
1794 | else | ||
1795 | pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; | ||
1796 | |||
1687 | } | 1797 | } |
1688 | /* We don't need lock here, nobody else | 1798 | /* We don't need lock here, nobody else |
1689 | * touches the iova range | 1799 | * touches the iova range |
@@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | |||
1699 | } | 1809 | } |
1700 | WARN_ON(1); | 1810 | WARN_ON(1); |
1701 | } | 1811 | } |
1812 | |||
1813 | lvl_pages = lvl_to_nr_pages(largepage_lvl); | ||
1814 | |||
1815 | BUG_ON(nr_pages < lvl_pages); | ||
1816 | BUG_ON(sg_res < lvl_pages); | ||
1817 | |||
1818 | nr_pages -= lvl_pages; | ||
1819 | iov_pfn += lvl_pages; | ||
1820 | phys_pfn += lvl_pages; | ||
1821 | pteval += lvl_pages * VTD_PAGE_SIZE; | ||
1822 | sg_res -= lvl_pages; | ||
1823 | |||
1824 | /* If the next PTE would be the first in a new page, then we | ||
1825 | need to flush the cache on the entries we've just written. | ||
1826 | And then we'll need to recalculate 'pte', so clear it and | ||
1827 | let it get set again in the if (!pte) block above. | ||
1828 | |||
1829 | If we're done (!nr_pages) we need to flush the cache too. | ||
1830 | |||
1831 | Also if we've been setting superpages, we may need to | ||
1832 | recalculate 'pte' and switch back to smaller pages for the | ||
1833 | end of the mapping, if the trailing size is not enough to | ||
1834 | use another superpage (i.e. sg_res < lvl_pages). */ | ||
1702 | pte++; | 1835 | pte++; |
1703 | if (!nr_pages || first_pte_in_page(pte)) { | 1836 | if (!nr_pages || first_pte_in_page(pte) || |
1837 | (largepage_lvl > 1 && sg_res < lvl_pages)) { | ||
1704 | domain_flush_cache(domain, first_pte, | 1838 | domain_flush_cache(domain, first_pte, |
1705 | (void *)pte - (void *)first_pte); | 1839 | (void *)pte - (void *)first_pte); |
1706 | pte = NULL; | 1840 | pte = NULL; |
1707 | } | 1841 | } |
1708 | iov_pfn++; | 1842 | |
1709 | pteval += VTD_PAGE_SIZE; | 1843 | if (!sg_res && nr_pages) |
1710 | sg_res--; | ||
1711 | if (!sg_res) | ||
1712 | sg = sg_next(sg); | 1844 | sg = sg_next(sg); |
1713 | } | 1845 | } |
1714 | return 0; | 1846 | return 0; |
@@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, | |||
2016 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | 2148 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) |
2017 | return 0; | 2149 | return 0; |
2018 | return iommu_prepare_identity_map(pdev, rmrr->base_address, | 2150 | return iommu_prepare_identity_map(pdev, rmrr->base_address, |
2019 | rmrr->end_address + 1); | 2151 | rmrr->end_address); |
2020 | } | 2152 | } |
2021 | 2153 | ||
2022 | #ifdef CONFIG_DMAR_FLOPPY_WA | 2154 | #ifdef CONFIG_DMAR_FLOPPY_WA |
@@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void) | |||
2030 | return; | 2162 | return; |
2031 | 2163 | ||
2032 | printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); | 2164 | printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); |
2033 | ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); | 2165 | ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); |
2034 | 2166 | ||
2035 | if (ret) | 2167 | if (ret) |
2036 | printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " | 2168 | printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " |
@@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev) | |||
2106 | if (likely(!iommu_identity_mapping)) | 2238 | if (likely(!iommu_identity_mapping)) |
2107 | return 0; | 2239 | return 0; |
2108 | 2240 | ||
2241 | info = pdev->dev.archdata.iommu; | ||
2242 | if (info && info != DUMMY_DEVICE_DOMAIN_INFO) | ||
2243 | return (info->domain == si_domain); | ||
2109 | 2244 | ||
2110 | list_for_each_entry(info, &si_domain->devices, link) | ||
2111 | if (info->dev == pdev) | ||
2112 | return 1; | ||
2113 | return 0; | 2245 | return 0; |
2114 | } | 2246 | } |
2115 | 2247 | ||
@@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) | |||
2187 | * Assume that they will -- if they turn out not to be, then we can | 2319 | * Assume that they will -- if they turn out not to be, then we can |
2188 | * take them out of the 1:1 domain later. | 2320 | * take them out of the 1:1 domain later. |
2189 | */ | 2321 | */ |
2190 | if (!startup) | 2322 | if (!startup) { |
2191 | return pdev->dma_mask > DMA_BIT_MASK(32); | 2323 | /* |
2324 | * If the device's dma_mask is less than the system's memory | ||
2325 | * size then this is not a candidate for identity mapping. | ||
2326 | */ | ||
2327 | u64 dma_mask = pdev->dma_mask; | ||
2328 | |||
2329 | if (pdev->dev.coherent_dma_mask && | ||
2330 | pdev->dev.coherent_dma_mask < dma_mask) | ||
2331 | dma_mask = pdev->dev.coherent_dma_mask; | ||
2332 | |||
2333 | return dma_mask >= dma_get_required_mask(&pdev->dev); | ||
2334 | } | ||
2192 | 2335 | ||
2193 | return 1; | 2336 | return 1; |
2194 | } | 2337 | } |
@@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw) | |||
2203 | return -EFAULT; | 2346 | return -EFAULT; |
2204 | 2347 | ||
2205 | for_each_pci_dev(pdev) { | 2348 | for_each_pci_dev(pdev) { |
2349 | /* Skip Host/PCI Bridge devices */ | ||
2350 | if (IS_BRIDGE_HOST_DEVICE(pdev)) | ||
2351 | continue; | ||
2206 | if (iommu_should_identity_map(pdev, 1)) { | 2352 | if (iommu_should_identity_map(pdev, 1)) { |
2207 | printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", | 2353 | printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", |
2208 | hw ? "hardware" : "software", pci_name(pdev)); | 2354 | hw ? "hardware" : "software", pci_name(pdev)); |
@@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) | |||
2218 | return 0; | 2364 | return 0; |
2219 | } | 2365 | } |
2220 | 2366 | ||
2221 | static int __init init_dmars(int force_on) | 2367 | static int __init init_dmars(void) |
2222 | { | 2368 | { |
2223 | struct dmar_drhd_unit *drhd; | 2369 | struct dmar_drhd_unit *drhd; |
2224 | struct dmar_rmrr_unit *rmrr; | 2370 | struct dmar_rmrr_unit *rmrr; |
@@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, | |||
2592 | iommu = domain_get_iommu(domain); | 2738 | iommu = domain_get_iommu(domain); |
2593 | size = aligned_nrpages(paddr, size); | 2739 | size = aligned_nrpages(paddr, size); |
2594 | 2740 | ||
2595 | iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), | 2741 | iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); |
2596 | pdev->dma_mask); | ||
2597 | if (!iova) | 2742 | if (!iova) |
2598 | goto error; | 2743 | goto error; |
2599 | 2744 | ||
@@ -3118,7 +3263,17 @@ static int init_iommu_hw(void) | |||
3118 | if (iommu->qi) | 3263 | if (iommu->qi) |
3119 | dmar_reenable_qi(iommu); | 3264 | dmar_reenable_qi(iommu); |
3120 | 3265 | ||
3121 | for_each_active_iommu(iommu, drhd) { | 3266 | for_each_iommu(iommu, drhd) { |
3267 | if (drhd->ignored) { | ||
3268 | /* | ||
3269 | * we always have to disable PMRs or DMA may fail on | ||
3270 | * this device | ||
3271 | */ | ||
3272 | if (force_on) | ||
3273 | iommu_disable_protect_mem_regions(iommu); | ||
3274 | continue; | ||
3275 | } | ||
3276 | |||
3122 | iommu_flush_write_buffer(iommu); | 3277 | iommu_flush_write_buffer(iommu); |
3123 | 3278 | ||
3124 | iommu_set_root_entry(iommu); | 3279 | iommu_set_root_entry(iommu); |
@@ -3127,7 +3282,8 @@ static int init_iommu_hw(void) | |||
3127 | DMA_CCMD_GLOBAL_INVL); | 3282 | DMA_CCMD_GLOBAL_INVL); |
3128 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, | 3283 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, |
3129 | DMA_TLB_GLOBAL_FLUSH); | 3284 | DMA_TLB_GLOBAL_FLUSH); |
3130 | iommu_enable_translation(iommu); | 3285 | if (iommu_enable_translation(iommu)) |
3286 | return 1; | ||
3131 | iommu_disable_protect_mem_regions(iommu); | 3287 | iommu_disable_protect_mem_regions(iommu); |
3132 | } | 3288 | } |
3133 | 3289 | ||
@@ -3194,7 +3350,10 @@ static void iommu_resume(void) | |||
3194 | unsigned long flag; | 3350 | unsigned long flag; |
3195 | 3351 | ||
3196 | if (init_iommu_hw()) { | 3352 | if (init_iommu_hw()) { |
3197 | WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); | 3353 | if (force_on) |
3354 | panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); | ||
3355 | else | ||
3356 | WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); | ||
3198 | return; | 3357 | return; |
3199 | } | 3358 | } |
3200 | 3359 | ||
@@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = { | |||
3271 | int __init intel_iommu_init(void) | 3430 | int __init intel_iommu_init(void) |
3272 | { | 3431 | { |
3273 | int ret = 0; | 3432 | int ret = 0; |
3274 | int force_on = 0; | ||
3275 | 3433 | ||
3276 | /* VT-d is required for a TXT/tboot launch, so enforce that */ | 3434 | /* VT-d is required for a TXT/tboot launch, so enforce that */ |
3277 | force_on = tboot_force_iommu(); | 3435 | force_on = tboot_force_iommu(); |
@@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void) | |||
3309 | 3467 | ||
3310 | init_no_remapping_devices(); | 3468 | init_no_remapping_devices(); |
3311 | 3469 | ||
3312 | ret = init_dmars(force_on); | 3470 | ret = init_dmars(); |
3313 | if (ret) { | 3471 | if (ret) { |
3314 | if (force_on) | 3472 | if (force_on) |
3315 | panic("tboot: Failed to initialize DMARs\n"); | 3473 | panic("tboot: Failed to initialize DMARs\n"); |
@@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, | |||
3380 | spin_lock_irqsave(&device_domain_lock, flags); | 3538 | spin_lock_irqsave(&device_domain_lock, flags); |
3381 | list_for_each_safe(entry, tmp, &domain->devices) { | 3539 | list_for_each_safe(entry, tmp, &domain->devices) { |
3382 | info = list_entry(entry, struct device_domain_info, link); | 3540 | info = list_entry(entry, struct device_domain_info, link); |
3383 | /* No need to compare PCI domain; it has to be the same */ | 3541 | if (info->segment == pci_domain_nr(pdev->bus) && |
3384 | if (info->bus == pdev->bus->number && | 3542 | info->bus == pdev->bus->number && |
3385 | info->devfn == pdev->devfn) { | 3543 | info->devfn == pdev->devfn) { |
3386 | list_del(&info->link); | 3544 | list_del(&info->link); |
3387 | list_del(&info->global); | 3545 | list_del(&info->global); |
@@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, | |||
3419 | domain_update_iommu_cap(domain); | 3577 | domain_update_iommu_cap(domain); |
3420 | spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); | 3578 | spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); |
3421 | 3579 | ||
3422 | spin_lock_irqsave(&iommu->lock, tmp_flags); | 3580 | if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && |
3423 | clear_bit(domain->id, iommu->domain_ids); | 3581 | !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { |
3424 | iommu->domains[domain->id] = NULL; | 3582 | spin_lock_irqsave(&iommu->lock, tmp_flags); |
3425 | spin_unlock_irqrestore(&iommu->lock, tmp_flags); | 3583 | clear_bit(domain->id, iommu->domain_ids); |
3584 | iommu->domains[domain->id] = NULL; | ||
3585 | spin_unlock_irqrestore(&iommu->lock, tmp_flags); | ||
3586 | } | ||
3426 | } | 3587 | } |
3427 | 3588 | ||
3428 | spin_unlock_irqrestore(&device_domain_lock, flags); | 3589 | spin_unlock_irqrestore(&device_domain_lock, flags); |
@@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) | |||
3505 | domain->iommu_count = 0; | 3666 | domain->iommu_count = 0; |
3506 | domain->iommu_coherency = 0; | 3667 | domain->iommu_coherency = 0; |
3507 | domain->iommu_snooping = 0; | 3668 | domain->iommu_snooping = 0; |
3669 | domain->iommu_superpage = 0; | ||
3508 | domain->max_addr = 0; | 3670 | domain->max_addr = 0; |
3509 | domain->nid = -1; | 3671 | domain->nid = -1; |
3510 | 3672 | ||
@@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, | |||
3720 | struct dma_pte *pte; | 3882 | struct dma_pte *pte; |
3721 | u64 phys = 0; | 3883 | u64 phys = 0; |
3722 | 3884 | ||
3723 | pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); | 3885 | pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); |
3724 | if (pte) | 3886 | if (pte) |
3725 | phys = dma_pte_addr(pte); | 3887 | phys = dma_pte_addr(pte); |
3726 | 3888 | ||
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 9606e599a475..c5c274ab5c5a 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c | |||
@@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) | |||
63 | curr = iovad->cached32_node; | 63 | curr = iovad->cached32_node; |
64 | cached_iova = container_of(curr, struct iova, node); | 64 | cached_iova = container_of(curr, struct iova, node); |
65 | 65 | ||
66 | if (free->pfn_lo >= cached_iova->pfn_lo) | 66 | if (free->pfn_lo >= cached_iova->pfn_lo) { |
67 | iovad->cached32_node = rb_next(&free->node); | 67 | struct rb_node *node = rb_next(&free->node); |
68 | struct iova *iova = container_of(node, struct iova, node); | ||
69 | |||
70 | /* only cache if it's below 32bit pfn */ | ||
71 | if (node && iova->pfn_lo < iovad->dma_32bit_pfn) | ||
72 | iovad->cached32_node = node; | ||
73 | else | ||
74 | iovad->cached32_node = NULL; | ||
75 | } | ||
68 | } | 76 | } |
69 | 77 | ||
70 | /* Computes the padding size required, to make the | 78 | /* Computes the padding size required, to make the |
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 5619f8522738..bbd8661b3473 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h | |||
@@ -9,8 +9,12 @@ | |||
9 | #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) | 9 | #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) |
10 | #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) | 10 | #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) |
11 | 11 | ||
12 | #define VTD_STRIDE_SHIFT (9) | ||
13 | #define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) | ||
14 | |||
12 | #define DMA_PTE_READ (1) | 15 | #define DMA_PTE_READ (1) |
13 | #define DMA_PTE_WRITE (2) | 16 | #define DMA_PTE_WRITE (2) |
17 | #define DMA_PTE_LARGE_PAGE (1 << 7) | ||
14 | #define DMA_PTE_SNP (1 << 11) | 18 | #define DMA_PTE_SNP (1 << 11) |
15 | 19 | ||
16 | #define CONTEXT_TT_MULTI_LEVEL 0 | 20 | #define CONTEXT_TT_MULTI_LEVEL 0 |