aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKeshavamurthy, Anil S <anil.s.keshavamurthy@intel.com>2007-10-21 19:41:58 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-22 11:13:19 -0400
commitf76aec76ec7f68829a66624d11a50ed6cb404185 (patch)
treee470670d6c26aa06ca1fa28e97b25717e80dfa8a /drivers
parent49a0429e53f29109cbf1eadd89497286ba81f1ae (diff)
intel-iommu: optimize sg map/unmap calls
This patch adds PageSelectiveInvalidation support replacing existing DomainSelectiveInvalidation for intel_{map/unmap}_sg() calls and also enables to mapping one big contiguous DMA virtual address which is mapped to discontiguous physical address for SG map/unmap calls. "Doamin selective invalidations" wipes out the IOMMU address translation cache based on domain ID where as "Page selective invalidations" wipes out the IOMMU address translation cache for that address mask range which is more cache friendly when compared to Domain selective invalidations. Here is how it is done. 1) changes to iova.c alloc_iova() now takes a bool size_aligned argument, which when when set, returns the io virtual address that is naturally aligned to 2 ^ x, where x is the order of the size requested. Returning this io vitual address which is naturally aligned helps iommu to do the "page selective invalidations" which is IOMMU cache friendly over "domain selective invalidations". 2) Changes to driver/pci/intel-iommu.c Clean up intel_{map/unmap}_{single/sg} () calls so that s/g map/unamp calls is no more dependent on intel_{map/unmap}_single() intel_map_sg() now computes the total DMA virtual address required and allocates the size aligned total DMA virtual address and maps the discontiguous physical address to the allocated contiguous DMA virtual address. In the intel_unmap_sg() case since the DMA virtual address is contiguous and size_aligned, PageSelectiveInvalidation is used replacing earlier DomainSelectiveInvalidations. Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> Cc: Greg KH <greg@kroah.com> Cc: Ashok Raj <ashok.raj@intel.com> Cc: Suresh B <suresh.b.siddha@intel.com> Cc: Andi Kleen <ak@suse.de> Cc: Arjan van de Ven <arjan@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/pci/intel-iommu.c325
-rw-r--r--drivers/pci/iova.c63
-rw-r--r--drivers/pci/iova.h3
3 files changed, 231 insertions, 160 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 4cca5b939e0e..dab329f01584 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -665,24 +665,10 @@ static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
665 non_present_entry_flush); 665 non_present_entry_flush);
666} 666}
667 667
668static int iommu_get_alignment(u64 base, unsigned int size)
669{
670 int t = 0;
671 u64 end;
672
673 end = base + size - 1;
674 while (base != end) {
675 t++;
676 base >>= 1;
677 end >>= 1;
678 }
679 return t;
680}
681
682static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 668static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
683 u64 addr, unsigned int pages, int non_present_entry_flush) 669 u64 addr, unsigned int pages, int non_present_entry_flush)
684{ 670{
685 unsigned int align; 671 unsigned int mask;
686 672
687 BUG_ON(addr & (~PAGE_MASK_4K)); 673 BUG_ON(addr & (~PAGE_MASK_4K));
688 BUG_ON(pages == 0); 674 BUG_ON(pages == 0);
@@ -696,16 +682,13 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
696 * PSI requires page size to be 2 ^ x, and the base address is naturally 682 * PSI requires page size to be 2 ^ x, and the base address is naturally
697 * aligned to the size 683 * aligned to the size
698 */ 684 */
699 align = iommu_get_alignment(addr >> PAGE_SHIFT_4K, pages); 685 mask = ilog2(__roundup_pow_of_two(pages));
700 /* Fallback to domain selective flush if size is too big */ 686 /* Fallback to domain selective flush if size is too big */
701 if (align > cap_max_amask_val(iommu->cap)) 687 if (mask > cap_max_amask_val(iommu->cap))
702 return iommu_flush_iotlb_dsi(iommu, did, 688 return iommu_flush_iotlb_dsi(iommu, did,
703 non_present_entry_flush); 689 non_present_entry_flush);
704 690
705 addr >>= PAGE_SHIFT_4K + align; 691 return __iommu_flush_iotlb(iommu, did, addr, mask,
706 addr <<= PAGE_SHIFT_4K + align;
707
708 return __iommu_flush_iotlb(iommu, did, addr, align,
709 DMA_TLB_PSI_FLUSH, non_present_entry_flush); 692 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
710} 693}
711 694
@@ -1772,78 +1755,103 @@ static inline u64 aligned_size(u64 host_addr, size_t size)
1772} 1755}
1773 1756
1774struct iova * 1757struct iova *
1775iommu_alloc_iova(struct dmar_domain *domain, void *host_addr, size_t size, 1758iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1776 u64 start, u64 end)
1777{ 1759{
1778 u64 start_addr;
1779 struct iova *piova; 1760 struct iova *piova;
1780 1761
1781 /* Make sure it's in range */ 1762 /* Make sure it's in range */
1782 if ((start > DOMAIN_MAX_ADDR(domain->gaw)) || end < start)
1783 return NULL;
1784
1785 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); 1763 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1786 start_addr = PAGE_ALIGN_4K(start); 1764 if (!size || (IOVA_START_ADDR + size > end))
1787 size = aligned_size((u64)host_addr, size);
1788 if (!size || (start_addr + size > end))
1789 return NULL; 1765 return NULL;
1790 1766
1791 piova = alloc_iova(&domain->iovad, 1767 piova = alloc_iova(&domain->iovad,
1792 size >> PAGE_SHIFT_4K, IOVA_PFN(end)); 1768 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1793
1794 return piova; 1769 return piova;
1795} 1770}
1796 1771
1797static dma_addr_t __intel_map_single(struct device *dev, void *addr, 1772static struct iova *
1798 size_t size, int dir, u64 *flush_addr, unsigned int *flush_size) 1773__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1774 size_t size)
1799{ 1775{
1800 struct dmar_domain *domain;
1801 struct pci_dev *pdev = to_pci_dev(dev); 1776 struct pci_dev *pdev = to_pci_dev(dev);
1802 int ret;
1803 int prot = 0;
1804 struct iova *iova = NULL; 1777 struct iova *iova = NULL;
1805 u64 start_addr;
1806
1807 addr = (void *)virt_to_phys(addr);
1808
1809 domain = get_domain_for_dev(pdev,
1810 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1811 if (!domain) {
1812 printk(KERN_ERR
1813 "Allocating domain for %s failed", pci_name(pdev));
1814 return 0;
1815 }
1816
1817 start_addr = IOVA_START_ADDR;
1818 1778
1819 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) { 1779 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1820 iova = iommu_alloc_iova(domain, addr, size, start_addr, 1780 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1821 pdev->dma_mask);
1822 } else { 1781 } else {
1823 /* 1782 /*
1824 * First try to allocate an io virtual address in 1783 * First try to allocate an io virtual address in
1825 * DMA_32BIT_MASK and if that fails then try allocating 1784 * DMA_32BIT_MASK and if that fails then try allocating
1826 * from higer range 1785 * from higer range
1827 */ 1786 */
1828 iova = iommu_alloc_iova(domain, addr, size, start_addr, 1787 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1829 DMA_32BIT_MASK);
1830 if (!iova) 1788 if (!iova)
1831 iova = iommu_alloc_iova(domain, addr, size, start_addr, 1789 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1832 pdev->dma_mask);
1833 } 1790 }
1834 1791
1835 if (!iova) { 1792 if (!iova) {
1836 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 1793 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1794 return NULL;
1795 }
1796
1797 return iova;
1798}
1799
1800static struct dmar_domain *
1801get_valid_domain_for_dev(struct pci_dev *pdev)
1802{
1803 struct dmar_domain *domain;
1804 int ret;
1805
1806 domain = get_domain_for_dev(pdev,
1807 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1808 if (!domain) {
1809 printk(KERN_ERR
1810 "Allocating domain for %s failed", pci_name(pdev));
1837 return 0; 1811 return 0;
1838 } 1812 }
1839 1813
1840 /* make sure context mapping is ok */ 1814 /* make sure context mapping is ok */
1841 if (unlikely(!domain_context_mapped(domain, pdev))) { 1815 if (unlikely(!domain_context_mapped(domain, pdev))) {
1842 ret = domain_context_mapping(domain, pdev); 1816 ret = domain_context_mapping(domain, pdev);
1843 if (ret) 1817 if (ret) {
1844 goto error; 1818 printk(KERN_ERR
1819 "Domain context map for %s failed",
1820 pci_name(pdev));
1821 return 0;
1822 }
1845 } 1823 }
1846 1824
1825 return domain;
1826}
1827
1828static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1829 size_t size, int dir)
1830{
1831 struct pci_dev *pdev = to_pci_dev(hwdev);
1832 int ret;
1833 struct dmar_domain *domain;
1834 unsigned long start_addr;
1835 struct iova *iova;
1836 int prot = 0;
1837
1838 BUG_ON(dir == DMA_NONE);
1839 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1840 return virt_to_bus(addr);
1841
1842 domain = get_valid_domain_for_dev(pdev);
1843 if (!domain)
1844 return 0;
1845
1846 addr = (void *)virt_to_phys(addr);
1847 size = aligned_size((u64)addr, size);
1848
1849 iova = __intel_alloc_iova(hwdev, domain, size);
1850 if (!iova)
1851 goto error;
1852
1853 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1854
1847 /* 1855 /*
1848 * Check if DMAR supports zero-length reads on write only 1856 * Check if DMAR supports zero-length reads on write only
1849 * mappings.. 1857 * mappings..
@@ -1859,101 +1867,65 @@ static dma_addr_t __intel_map_single(struct device *dev, void *addr,
1859 * might have two guest_addr mapping to the same host addr, but this 1867 * might have two guest_addr mapping to the same host addr, but this
1860 * is not a big problem 1868 * is not a big problem
1861 */ 1869 */
1862 ret = domain_page_mapping(domain, iova->pfn_lo << PAGE_SHIFT_4K, 1870 ret = domain_page_mapping(domain, start_addr,
1863 ((u64)addr) & PAGE_MASK_4K, 1871 ((u64)addr) & PAGE_MASK_4K, size, prot);
1864 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, prot);
1865 if (ret) 1872 if (ret)
1866 goto error; 1873 goto error;
1867 1874
1868 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", 1875 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1869 pci_name(pdev), size, (u64)addr, 1876 pci_name(pdev), size, (u64)addr,
1870 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, 1877 size, (u64)start_addr, dir);
1871 (u64)(iova->pfn_lo << PAGE_SHIFT_4K), dir); 1878
1879 /* it's a non-present to present mapping */
1880 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1881 start_addr, size >> PAGE_SHIFT_4K, 1);
1882 if (ret)
1883 iommu_flush_write_buffer(domain->iommu);
1884
1885 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
1872 1886
1873 *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1874 *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K;
1875 return (iova->pfn_lo << PAGE_SHIFT_4K) + ((u64)addr & (~PAGE_MASK_4K));
1876error: 1887error:
1877 __free_iova(&domain->iovad, iova); 1888 if (iova)
1889 __free_iova(&domain->iovad, iova);
1878 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", 1890 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1879 pci_name(pdev), size, (u64)addr, dir); 1891 pci_name(pdev), size, (u64)addr, dir);
1880 return 0; 1892 return 0;
1881} 1893}
1882 1894
1883static dma_addr_t intel_map_single(struct device *hwdev, void *addr, 1895static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1884 size_t size, int dir) 1896 size_t size, int dir)
1885{ 1897{
1886 struct pci_dev *pdev = to_pci_dev(hwdev);
1887 dma_addr_t ret;
1888 struct dmar_domain *domain;
1889 u64 flush_addr;
1890 unsigned int flush_size;
1891
1892 BUG_ON(dir == DMA_NONE);
1893 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1894 return virt_to_bus(addr);
1895
1896 ret = __intel_map_single(hwdev, addr, size,
1897 dir, &flush_addr, &flush_size);
1898 if (ret) {
1899 domain = find_domain(pdev);
1900 /* it's a non-present to present mapping */
1901 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
1902 flush_addr, flush_size >> PAGE_SHIFT_4K, 1))
1903 iommu_flush_write_buffer(domain->iommu);
1904 }
1905 return ret;
1906}
1907
1908static void __intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1909 size_t size, int dir, u64 *flush_addr, unsigned int *flush_size)
1910{
1911 struct dmar_domain *domain;
1912 struct pci_dev *pdev = to_pci_dev(dev); 1898 struct pci_dev *pdev = to_pci_dev(dev);
1899 struct dmar_domain *domain;
1900 unsigned long start_addr;
1913 struct iova *iova; 1901 struct iova *iova;
1914 1902
1903 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1904 return;
1915 domain = find_domain(pdev); 1905 domain = find_domain(pdev);
1916 BUG_ON(!domain); 1906 BUG_ON(!domain);
1917 1907
1918 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 1908 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1919 if (!iova) { 1909 if (!iova)
1920 *flush_size = 0;
1921 return; 1910 return;
1922 }
1923 pr_debug("Device %s unmapping: %lx@%llx\n",
1924 pci_name(pdev),
1925 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K,
1926 (u64)(iova->pfn_lo << PAGE_SHIFT_4K));
1927
1928 *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1929 *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K;
1930 /* clear the whole page, not just dev_addr - (dev_addr + size) */
1931 dma_pte_clear_range(domain, *flush_addr, *flush_addr + *flush_size);
1932 /* free page tables */
1933 dma_pte_free_pagetable(domain, *flush_addr, *flush_addr + *flush_size);
1934 /* free iova */
1935 __free_iova(&domain->iovad, iova);
1936}
1937 1911
1938static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, 1912 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1939 size_t size, int dir) 1913 size = aligned_size((u64)dev_addr, size);
1940{
1941 struct pci_dev *pdev = to_pci_dev(dev);
1942 struct dmar_domain *domain;
1943 u64 flush_addr;
1944 unsigned int flush_size;
1945 1914
1946 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) 1915 pr_debug("Device %s unmapping: %lx@%llx\n",
1947 return; 1916 pci_name(pdev), size, (u64)start_addr);
1948 1917
1949 domain = find_domain(pdev); 1918 /* clear the whole page */
1950 __intel_unmap_single(dev, dev_addr, size, 1919 dma_pte_clear_range(domain, start_addr, start_addr + size);
1951 dir, &flush_addr, &flush_size); 1920 /* free page tables */
1952 if (flush_size == 0) 1921 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1953 return; 1922
1954 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, flush_addr, 1923 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1955 flush_size >> PAGE_SHIFT_4K, 0)) 1924 size >> PAGE_SHIFT_4K, 0))
1956 iommu_flush_write_buffer(domain->iommu); 1925 iommu_flush_write_buffer(domain->iommu);
1926
1927 /* free iova */
1928 __free_iova(&domain->iovad, iova);
1957} 1929}
1958 1930
1959static void * intel_alloc_coherent(struct device *hwdev, size_t size, 1931static void * intel_alloc_coherent(struct device *hwdev, size_t size,
@@ -1990,28 +1962,46 @@ static void intel_free_coherent(struct device *hwdev, size_t size,
1990 free_pages((unsigned long)vaddr, order); 1962 free_pages((unsigned long)vaddr, order);
1991} 1963}
1992 1964
1965#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
1993static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sg, 1966static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sg,
1994 int nelems, int dir) 1967 int nelems, int dir)
1995{ 1968{
1996 int i; 1969 int i;
1997 struct pci_dev *pdev = to_pci_dev(hwdev); 1970 struct pci_dev *pdev = to_pci_dev(hwdev);
1998 struct dmar_domain *domain; 1971 struct dmar_domain *domain;
1999 u64 flush_addr; 1972 unsigned long start_addr;
2000 unsigned int flush_size; 1973 struct iova *iova;
1974 size_t size = 0;
1975 void *addr;
2001 1976
2002 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) 1977 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
2003 return; 1978 return;
2004 1979
2005 domain = find_domain(pdev); 1980 domain = find_domain(pdev);
2006 for (i = 0; i < nelems; i++, sg++)
2007 __intel_unmap_single(hwdev, sg->dma_address,
2008 sg->dma_length, dir, &flush_addr, &flush_size);
2009 1981
2010 if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 0)) 1982 iova = find_iova(&domain->iovad, IOVA_PFN(sg[0].dma_address));
1983 if (!iova)
1984 return;
1985 for (i = 0; i < nelems; i++, sg++) {
1986 addr = SG_ENT_VIRT_ADDRESS(sg);
1987 size += aligned_size((u64)addr, sg->length);
1988 }
1989
1990 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1991
1992 /* clear the whole page */
1993 dma_pte_clear_range(domain, start_addr, start_addr + size);
1994 /* free page tables */
1995 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1996
1997 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1998 size >> PAGE_SHIFT_4K, 0))
2011 iommu_flush_write_buffer(domain->iommu); 1999 iommu_flush_write_buffer(domain->iommu);
2000
2001 /* free iova */
2002 __free_iova(&domain->iovad, iova);
2012} 2003}
2013 2004
2014#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
2015static int intel_nontranslate_map_sg(struct device *hddev, 2005static int intel_nontranslate_map_sg(struct device *hddev,
2016 struct scatterlist *sg, int nelems, int dir) 2006 struct scatterlist *sg, int nelems, int dir)
2017{ 2007{
@@ -2031,33 +2021,76 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sg,
2031{ 2021{
2032 void *addr; 2022 void *addr;
2033 int i; 2023 int i;
2034 dma_addr_t dma_handle;
2035 struct pci_dev *pdev = to_pci_dev(hwdev); 2024 struct pci_dev *pdev = to_pci_dev(hwdev);
2036 struct dmar_domain *domain; 2025 struct dmar_domain *domain;
2037 u64 flush_addr; 2026 size_t size = 0;
2038 unsigned int flush_size; 2027 int prot = 0;
2028 size_t offset = 0;
2029 struct iova *iova = NULL;
2030 int ret;
2031 struct scatterlist *orig_sg = sg;
2032 unsigned long start_addr;
2039 2033
2040 BUG_ON(dir == DMA_NONE); 2034 BUG_ON(dir == DMA_NONE);
2041 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) 2035 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
2042 return intel_nontranslate_map_sg(hwdev, sg, nelems, dir); 2036 return intel_nontranslate_map_sg(hwdev, sg, nelems, dir);
2043 2037
2038 domain = get_valid_domain_for_dev(pdev);
2039 if (!domain)
2040 return 0;
2041
2044 for (i = 0; i < nelems; i++, sg++) { 2042 for (i = 0; i < nelems; i++, sg++) {
2045 addr = SG_ENT_VIRT_ADDRESS(sg); 2043 addr = SG_ENT_VIRT_ADDRESS(sg);
2046 dma_handle = __intel_map_single(hwdev, addr, 2044 addr = (void *)virt_to_phys(addr);
2047 sg->length, dir, &flush_addr, &flush_size); 2045 size += aligned_size((u64)addr, sg->length);
2048 if (!dma_handle) { 2046 }
2049 intel_unmap_sg(hwdev, sg - i, i, dir); 2047
2050 sg[0].dma_length = 0; 2048 iova = __intel_alloc_iova(hwdev, domain, size);
2049 if (!iova) {
2050 orig_sg->dma_length = 0;
2051 return 0;
2052 }
2053
2054 /*
2055 * Check if DMAR supports zero-length reads on write only
2056 * mappings..
2057 */
2058 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2059 !cap_zlr(domain->iommu->cap))
2060 prot |= DMA_PTE_READ;
2061 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2062 prot |= DMA_PTE_WRITE;
2063
2064 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2065 offset = 0;
2066 sg = orig_sg;
2067 for (i = 0; i < nelems; i++, sg++) {
2068 addr = SG_ENT_VIRT_ADDRESS(sg);
2069 addr = (void *)virt_to_phys(addr);
2070 size = aligned_size((u64)addr, sg->length);
2071 ret = domain_page_mapping(domain, start_addr + offset,
2072 ((u64)addr) & PAGE_MASK_4K,
2073 size, prot);
2074 if (ret) {
2075 /* clear the page */
2076 dma_pte_clear_range(domain, start_addr,
2077 start_addr + offset);
2078 /* free page tables */
2079 dma_pte_free_pagetable(domain, start_addr,
2080 start_addr + offset);
2081 /* free iova */
2082 __free_iova(&domain->iovad, iova);
2051 return 0; 2083 return 0;
2052 } 2084 }
2053 sg->dma_address = dma_handle; 2085 sg->dma_address = start_addr + offset +
2086 ((u64)addr & (~PAGE_MASK_4K));
2054 sg->dma_length = sg->length; 2087 sg->dma_length = sg->length;
2088 offset += size;
2055 } 2089 }
2056 2090
2057 domain = find_domain(pdev);
2058
2059 /* it's a non-present to present mapping */ 2091 /* it's a non-present to present mapping */
2060 if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 1)) 2092 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2093 start_addr, offset >> PAGE_SHIFT_4K, 1))
2061 iommu_flush_write_buffer(domain->iommu); 2094 iommu_flush_write_buffer(domain->iommu);
2062 return nelems; 2095 return nelems;
2063} 2096}
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 717fafaa7e02..a84571c29360 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -57,12 +57,28 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
57 iovad->cached32_node = rb_next(&free->node); 57 iovad->cached32_node = rb_next(&free->node);
58} 58}
59 59
60static int __alloc_iova_range(struct iova_domain *iovad, 60/* Computes the padding size required, to make the
61 unsigned long size, unsigned long limit_pfn, struct iova *new) 61 * the start address naturally aligned on its size
62 */
63static int
64iova_get_pad_size(int size, unsigned int limit_pfn)
65{
66 unsigned int pad_size = 0;
67 unsigned int order = ilog2(size);
68
69 if (order)
70 pad_size = (limit_pfn + 1) % (1 << order);
71
72 return pad_size;
73}
74
75static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size,
76 unsigned long limit_pfn, struct iova *new, bool size_aligned)
62{ 77{
63 struct rb_node *curr = NULL; 78 struct rb_node *curr = NULL;
64 unsigned long flags; 79 unsigned long flags;
65 unsigned long saved_pfn; 80 unsigned long saved_pfn;
81 unsigned int pad_size = 0;
66 82
67 /* Walk the tree backwards */ 83 /* Walk the tree backwards */
68 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 84 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
@@ -72,22 +88,32 @@ static int __alloc_iova_range(struct iova_domain *iovad,
72 struct iova *curr_iova = container_of(curr, struct iova, node); 88 struct iova *curr_iova = container_of(curr, struct iova, node);
73 if (limit_pfn < curr_iova->pfn_lo) 89 if (limit_pfn < curr_iova->pfn_lo)
74 goto move_left; 90 goto move_left;
75 if (limit_pfn < curr_iova->pfn_hi) 91 else if (limit_pfn < curr_iova->pfn_hi)
76 goto adjust_limit_pfn; 92 goto adjust_limit_pfn;
77 if ((curr_iova->pfn_hi + size) <= limit_pfn) 93 else {
78 break; /* found a free slot */ 94 if (size_aligned)
95 pad_size = iova_get_pad_size(size, limit_pfn);
96 if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
97 break; /* found a free slot */
98 }
79adjust_limit_pfn: 99adjust_limit_pfn:
80 limit_pfn = curr_iova->pfn_lo - 1; 100 limit_pfn = curr_iova->pfn_lo - 1;
81move_left: 101move_left:
82 curr = rb_prev(curr); 102 curr = rb_prev(curr);
83 } 103 }
84 104
85 if ((!curr) && !(IOVA_START_PFN + size <= limit_pfn)) { 105 if (!curr) {
86 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 106 if (size_aligned)
87 return -ENOMEM; 107 pad_size = iova_get_pad_size(size, limit_pfn);
108 if ((IOVA_START_PFN + size + pad_size) > limit_pfn) {
109 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
110 return -ENOMEM;
111 }
88 } 112 }
89 new->pfn_hi = limit_pfn; 113
90 new->pfn_lo = limit_pfn - size + 1; 114 /* pfn_lo will point to size aligned address if size_aligned is set */
115 new->pfn_lo = limit_pfn - (size + pad_size) + 1;
116 new->pfn_hi = new->pfn_lo + size - 1;
91 117
92 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 118 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
93 return 0; 119 return 0;
@@ -119,12 +145,16 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova)
119 * @iovad - iova domain in question 145 * @iovad - iova domain in question
120 * @size - size of page frames to allocate 146 * @size - size of page frames to allocate
121 * @limit_pfn - max limit address 147 * @limit_pfn - max limit address
148 * @size_aligned - set if size_aligned address range is required
122 * This function allocates an iova in the range limit_pfn to IOVA_START_PFN 149 * This function allocates an iova in the range limit_pfn to IOVA_START_PFN
123 * looking from limit_pfn instead from IOVA_START_PFN. 150 * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned
151 * flag is set then the allocated address iova->pfn_lo will be naturally
152 * aligned on roundup_power_of_two(size).
124 */ 153 */
125struct iova * 154struct iova *
126alloc_iova(struct iova_domain *iovad, unsigned long size, 155alloc_iova(struct iova_domain *iovad, unsigned long size,
127 unsigned long limit_pfn) 156 unsigned long limit_pfn,
157 bool size_aligned)
128{ 158{
129 unsigned long flags; 159 unsigned long flags;
130 struct iova *new_iova; 160 struct iova *new_iova;
@@ -134,8 +164,15 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
134 if (!new_iova) 164 if (!new_iova)
135 return NULL; 165 return NULL;
136 166
167 /* If size aligned is set then round the size to
168 * to next power of two.
169 */
170 if (size_aligned)
171 size = __roundup_pow_of_two(size);
172
137 spin_lock_irqsave(&iovad->iova_alloc_lock, flags); 173 spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
138 ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova); 174 ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova,
175 size_aligned);
139 176
140 if (ret) { 177 if (ret) {
141 spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); 178 spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h
index 04c220708883..ae3028d5a941 100644
--- a/drivers/pci/iova.h
+++ b/drivers/pci/iova.h
@@ -51,7 +51,8 @@ void free_iova_mem(struct iova *iova);
51void free_iova(struct iova_domain *iovad, unsigned long pfn); 51void free_iova(struct iova_domain *iovad, unsigned long pfn);
52void __free_iova(struct iova_domain *iovad, struct iova *iova); 52void __free_iova(struct iova_domain *iovad, struct iova *iova);
53struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, 53struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
54 unsigned long limit_pfn); 54 unsigned long limit_pfn,
55 bool size_aligned);
55struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, 56struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
56 unsigned long pfn_hi); 57 unsigned long pfn_hi);
57void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); 58void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);