diff options
author | David Woodhouse <David.Woodhouse@intel.com> | 2009-08-04 11:19:20 -0400 |
---|---|---|
committer | David Woodhouse <David.Woodhouse@intel.com> | 2009-08-04 11:19:23 -0400 |
commit | 19943b0e30b05d42e494ae6fef78156ebc8c637e (patch) | |
tree | 1c7e1dffac6ee9f0fb5920078475ad19e8919c5b /drivers/pci | |
parent | 0815565adfe3f4c369110c57d8ffe83caefeed68 (diff) |
intel-iommu: Unify hardware and software passthrough support
This makes the hardware passthrough mode work a lot more like the
software version, so that the behaviour of a kernel with 'iommu=pt'
is the same whether the hardware supports passthrough or not.
In particular:
- We use a single si_domain for the pass-through devices.
- 32-bit devices can be taken out of the pass-through domain so that
they don't have to use swiotlb.
- Devices will work again after being removed from a KVM guest.
- A potential oops on OOM (in init_context_pass_through()) is fixed.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Diffstat (limited to 'drivers/pci')
-rw-r--r-- | drivers/pci/intel-iommu.c | 174 |
1 files changed, 74 insertions, 100 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 097d5da2fae1..147b3b960b61 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
@@ -251,7 +251,8 @@ static inline int first_pte_in_page(struct dma_pte *pte) | |||
251 | * 2. It maps to each iommu if successful. | 251 | * 2. It maps to each iommu if successful. |
252 | * 3. Each iommu mapps to this domain if successful. | 252 | * 3. Each iommu mapps to this domain if successful. |
253 | */ | 253 | */ |
254 | struct dmar_domain *si_domain; | 254 | static struct dmar_domain *si_domain; |
255 | static int hw_pass_through = 1; | ||
255 | 256 | ||
256 | /* devices under the same p2p bridge are owned in one domain */ | 257 | /* devices under the same p2p bridge are owned in one domain */ |
257 | #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) | 258 | #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) |
@@ -1948,14 +1949,24 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, | |||
1948 | struct dmar_domain *domain; | 1949 | struct dmar_domain *domain; |
1949 | int ret; | 1950 | int ret; |
1950 | 1951 | ||
1951 | printk(KERN_INFO | ||
1952 | "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", | ||
1953 | pci_name(pdev), start, end); | ||
1954 | |||
1955 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); | 1952 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); |
1956 | if (!domain) | 1953 | if (!domain) |
1957 | return -ENOMEM; | 1954 | return -ENOMEM; |
1958 | 1955 | ||
1956 | /* For _hardware_ passthrough, don't bother. But for software | ||
1957 | passthrough, we do it anyway -- it may indicate a memory | ||
1958 | range which is reserved in E820, so which didn't get set | ||
1959 | up to start with in si_domain */ | ||
1960 | if (domain == si_domain && hw_pass_through) { | ||
1961 | printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", | ||
1962 | pci_name(pdev), start, end); | ||
1963 | return 0; | ||
1964 | } | ||
1965 | |||
1966 | printk(KERN_INFO | ||
1967 | "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", | ||
1968 | pci_name(pdev), start, end); | ||
1969 | |||
1959 | ret = iommu_domain_identity_map(domain, start, end); | 1970 | ret = iommu_domain_identity_map(domain, start, end); |
1960 | if (ret) | 1971 | if (ret) |
1961 | goto error; | 1972 | goto error; |
@@ -2006,23 +2017,6 @@ static inline void iommu_prepare_isa(void) | |||
2006 | } | 2017 | } |
2007 | #endif /* !CONFIG_DMAR_FLPY_WA */ | 2018 | #endif /* !CONFIG_DMAR_FLPY_WA */ |
2008 | 2019 | ||
2009 | /* Initialize each context entry as pass through.*/ | ||
2010 | static int __init init_context_pass_through(void) | ||
2011 | { | ||
2012 | struct pci_dev *pdev = NULL; | ||
2013 | struct dmar_domain *domain; | ||
2014 | int ret; | ||
2015 | |||
2016 | for_each_pci_dev(pdev) { | ||
2017 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
2018 | ret = domain_context_mapping(domain, pdev, | ||
2019 | CONTEXT_TT_PASS_THROUGH); | ||
2020 | if (ret) | ||
2021 | return ret; | ||
2022 | } | ||
2023 | return 0; | ||
2024 | } | ||
2025 | |||
2026 | static int md_domain_init(struct dmar_domain *domain, int guest_width); | 2020 | static int md_domain_init(struct dmar_domain *domain, int guest_width); |
2027 | 2021 | ||
2028 | static int __init si_domain_work_fn(unsigned long start_pfn, | 2022 | static int __init si_domain_work_fn(unsigned long start_pfn, |
@@ -2037,7 +2031,7 @@ static int __init si_domain_work_fn(unsigned long start_pfn, | |||
2037 | 2031 | ||
2038 | } | 2032 | } |
2039 | 2033 | ||
2040 | static int si_domain_init(void) | 2034 | static int si_domain_init(int hw) |
2041 | { | 2035 | { |
2042 | struct dmar_drhd_unit *drhd; | 2036 | struct dmar_drhd_unit *drhd; |
2043 | struct intel_iommu *iommu; | 2037 | struct intel_iommu *iommu; |
@@ -2064,6 +2058,9 @@ static int si_domain_init(void) | |||
2064 | 2058 | ||
2065 | si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; | 2059 | si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; |
2066 | 2060 | ||
2061 | if (hw) | ||
2062 | return 0; | ||
2063 | |||
2067 | for_each_online_node(nid) { | 2064 | for_each_online_node(nid) { |
2068 | work_with_active_regions(nid, si_domain_work_fn, &ret); | 2065 | work_with_active_regions(nid, si_domain_work_fn, &ret); |
2069 | if (ret) | 2066 | if (ret) |
@@ -2155,24 +2152,26 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) | |||
2155 | return 1; | 2152 | return 1; |
2156 | } | 2153 | } |
2157 | 2154 | ||
2158 | static int iommu_prepare_static_identity_mapping(void) | 2155 | static int iommu_prepare_static_identity_mapping(int hw) |
2159 | { | 2156 | { |
2160 | struct pci_dev *pdev = NULL; | 2157 | struct pci_dev *pdev = NULL; |
2161 | int ret; | 2158 | int ret; |
2162 | 2159 | ||
2163 | ret = si_domain_init(); | 2160 | ret = si_domain_init(hw); |
2164 | if (ret) | 2161 | if (ret) |
2165 | return -EFAULT; | 2162 | return -EFAULT; |
2166 | 2163 | ||
2167 | for_each_pci_dev(pdev) { | 2164 | for_each_pci_dev(pdev) { |
2168 | if (iommu_should_identity_map(pdev, 1)) { | 2165 | if (iommu_should_identity_map(pdev, 1)) { |
2169 | printk(KERN_INFO "IOMMU: identity mapping for device %s\n", | 2166 | printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", |
2170 | pci_name(pdev)); | 2167 | hw ? "hardware" : "software", pci_name(pdev)); |
2171 | 2168 | ||
2172 | ret = domain_context_mapping(si_domain, pdev, | 2169 | ret = domain_context_mapping(si_domain, pdev, |
2170 | hw ? CONTEXT_TT_PASS_THROUGH : | ||
2173 | CONTEXT_TT_MULTI_LEVEL); | 2171 | CONTEXT_TT_MULTI_LEVEL); |
2174 | if (ret) | 2172 | if (ret) |
2175 | return ret; | 2173 | return ret; |
2174 | |||
2176 | ret = domain_add_dev_info(si_domain, pdev); | 2175 | ret = domain_add_dev_info(si_domain, pdev); |
2177 | if (ret) | 2176 | if (ret) |
2178 | return ret; | 2177 | return ret; |
@@ -2189,14 +2188,6 @@ int __init init_dmars(void) | |||
2189 | struct pci_dev *pdev; | 2188 | struct pci_dev *pdev; |
2190 | struct intel_iommu *iommu; | 2189 | struct intel_iommu *iommu; |
2191 | int i, ret; | 2190 | int i, ret; |
2192 | int pass_through = 1; | ||
2193 | |||
2194 | /* | ||
2195 | * In case pass through can not be enabled, iommu tries to use identity | ||
2196 | * mapping. | ||
2197 | */ | ||
2198 | if (iommu_pass_through) | ||
2199 | iommu_identity_mapping = 1; | ||
2200 | 2191 | ||
2201 | /* | 2192 | /* |
2202 | * for each drhd | 2193 | * for each drhd |
@@ -2250,14 +2241,8 @@ int __init init_dmars(void) | |||
2250 | goto error; | 2241 | goto error; |
2251 | } | 2242 | } |
2252 | if (!ecap_pass_through(iommu->ecap)) | 2243 | if (!ecap_pass_through(iommu->ecap)) |
2253 | pass_through = 0; | 2244 | hw_pass_through = 0; |
2254 | } | 2245 | } |
2255 | if (iommu_pass_through) | ||
2256 | if (!pass_through) { | ||
2257 | printk(KERN_INFO | ||
2258 | "Pass Through is not supported by hardware.\n"); | ||
2259 | iommu_pass_through = 0; | ||
2260 | } | ||
2261 | 2246 | ||
2262 | /* | 2247 | /* |
2263 | * Start from the sane iommu hardware state. | 2248 | * Start from the sane iommu hardware state. |
@@ -2312,64 +2297,57 @@ int __init init_dmars(void) | |||
2312 | } | 2297 | } |
2313 | } | 2298 | } |
2314 | 2299 | ||
2300 | if (iommu_pass_through) | ||
2301 | iommu_identity_mapping = 1; | ||
2302 | #ifdef CONFIG_DMAR_BROKEN_GFX_WA | ||
2303 | else | ||
2304 | iommu_identity_mapping = 2; | ||
2305 | #endif | ||
2315 | /* | 2306 | /* |
2316 | * If pass through is set and enabled, context entries of all pci | 2307 | * If pass through is not set or not enabled, setup context entries for |
2317 | * devices are intialized by pass through translation type. | 2308 | * identity mappings for rmrr, gfx, and isa and may fall back to static |
2309 | * identity mapping if iommu_identity_mapping is set. | ||
2318 | */ | 2310 | */ |
2319 | if (iommu_pass_through) { | 2311 | if (iommu_identity_mapping) { |
2320 | ret = init_context_pass_through(); | 2312 | ret = iommu_prepare_static_identity_mapping(hw_pass_through); |
2321 | if (ret) { | 2313 | if (ret) { |
2322 | printk(KERN_ERR "IOMMU: Pass through init failed.\n"); | 2314 | printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); |
2323 | iommu_pass_through = 0; | 2315 | goto error; |
2324 | } | 2316 | } |
2325 | } | 2317 | } |
2326 | |||
2327 | /* | 2318 | /* |
2328 | * If pass through is not set or not enabled, setup context entries for | 2319 | * For each rmrr |
2329 | * identity mappings for rmrr, gfx, and isa and may fall back to static | 2320 | * for each dev attached to rmrr |
2330 | * identity mapping if iommu_identity_mapping is set. | 2321 | * do |
2322 | * locate drhd for dev, alloc domain for dev | ||
2323 | * allocate free domain | ||
2324 | * allocate page table entries for rmrr | ||
2325 | * if context not allocated for bus | ||
2326 | * allocate and init context | ||
2327 | * set present in root table for this bus | ||
2328 | * init context with domain, translation etc | ||
2329 | * endfor | ||
2330 | * endfor | ||
2331 | */ | 2331 | */ |
2332 | if (!iommu_pass_through) { | 2332 | printk(KERN_INFO "IOMMU: Setting RMRR:\n"); |
2333 | #ifdef CONFIG_DMAR_BROKEN_GFX_WA | 2333 | for_each_rmrr_units(rmrr) { |
2334 | if (!iommu_identity_mapping) | 2334 | for (i = 0; i < rmrr->devices_cnt; i++) { |
2335 | iommu_identity_mapping = 2; | 2335 | pdev = rmrr->devices[i]; |
2336 | #endif | 2336 | /* |
2337 | if (iommu_identity_mapping) | 2337 | * some BIOS lists non-exist devices in DMAR |
2338 | iommu_prepare_static_identity_mapping(); | 2338 | * table. |
2339 | /* | 2339 | */ |
2340 | * For each rmrr | 2340 | if (!pdev) |
2341 | * for each dev attached to rmrr | 2341 | continue; |
2342 | * do | 2342 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); |
2343 | * locate drhd for dev, alloc domain for dev | 2343 | if (ret) |
2344 | * allocate free domain | 2344 | printk(KERN_ERR |
2345 | * allocate page table entries for rmrr | 2345 | "IOMMU: mapping reserved region failed\n"); |
2346 | * if context not allocated for bus | ||
2347 | * allocate and init context | ||
2348 | * set present in root table for this bus | ||
2349 | * init context with domain, translation etc | ||
2350 | * endfor | ||
2351 | * endfor | ||
2352 | */ | ||
2353 | printk(KERN_INFO "IOMMU: Setting RMRR:\n"); | ||
2354 | for_each_rmrr_units(rmrr) { | ||
2355 | for (i = 0; i < rmrr->devices_cnt; i++) { | ||
2356 | pdev = rmrr->devices[i]; | ||
2357 | /* | ||
2358 | * some BIOS lists non-exist devices in DMAR | ||
2359 | * table. | ||
2360 | */ | ||
2361 | if (!pdev) | ||
2362 | continue; | ||
2363 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | ||
2364 | if (ret) | ||
2365 | printk(KERN_ERR | ||
2366 | "IOMMU: mapping reserved region failed\n"); | ||
2367 | } | ||
2368 | } | 2346 | } |
2369 | |||
2370 | iommu_prepare_isa(); | ||
2371 | } | 2347 | } |
2372 | 2348 | ||
2349 | iommu_prepare_isa(); | ||
2350 | |||
2373 | /* | 2351 | /* |
2374 | * for each drhd | 2352 | * for each drhd |
2375 | * enable fault log | 2353 | * enable fault log |
@@ -2536,7 +2514,10 @@ static int iommu_no_mapping(struct device *dev) | |||
2536 | ret = domain_add_dev_info(si_domain, pdev); | 2514 | ret = domain_add_dev_info(si_domain, pdev); |
2537 | if (ret) | 2515 | if (ret) |
2538 | return 0; | 2516 | return 0; |
2539 | ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL); | 2517 | ret = domain_context_mapping(si_domain, pdev, |
2518 | hw_pass_through ? | ||
2519 | CONTEXT_TT_PASS_THROUGH : | ||
2520 | CONTEXT_TT_MULTI_LEVEL); | ||
2540 | if (!ret) { | 2521 | if (!ret) { |
2541 | printk(KERN_INFO "64bit %s uses identity mapping\n", | 2522 | printk(KERN_INFO "64bit %s uses identity mapping\n", |
2542 | pci_name(pdev)); | 2523 | pci_name(pdev)); |
@@ -3202,7 +3183,7 @@ int __init intel_iommu_init(void) | |||
3202 | * Check the need for DMA-remapping initialization now. | 3183 | * Check the need for DMA-remapping initialization now. |
3203 | * Above initialization will also be used by Interrupt-remapping. | 3184 | * Above initialization will also be used by Interrupt-remapping. |
3204 | */ | 3185 | */ |
3205 | if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) | 3186 | if (no_iommu || swiotlb || dmar_disabled) |
3206 | return -ENODEV; | 3187 | return -ENODEV; |
3207 | 3188 | ||
3208 | iommu_init_mempool(); | 3189 | iommu_init_mempool(); |
@@ -3222,14 +3203,7 @@ int __init intel_iommu_init(void) | |||
3222 | 3203 | ||
3223 | init_timer(&unmap_timer); | 3204 | init_timer(&unmap_timer); |
3224 | force_iommu = 1; | 3205 | force_iommu = 1; |
3225 | 3206 | dma_ops = &intel_dma_ops; | |
3226 | if (!iommu_pass_through) { | ||
3227 | printk(KERN_INFO | ||
3228 | "Multi-level page-table translation for DMAR.\n"); | ||
3229 | dma_ops = &intel_dma_ops; | ||
3230 | } else | ||
3231 | printk(KERN_INFO | ||
3232 | "DMAR: Pass through translation for DMAR.\n"); | ||
3233 | 3207 | ||
3234 | init_iommu_sysfs(); | 3208 | init_iommu_sysfs(); |
3235 | 3209 | ||