summaryrefslogtreecommitdiffstats
path: root/drivers/iommu/intel-iommu.c
diff options
context:
space:
mode:
authorPeter Xu <peterx@redhat.com>2019-06-20 22:32:05 -0400
committerJoerg Roedel <jroedel@suse.de>2019-06-22 15:19:58 -0400
commit0aafc8ae665f89b9031a914f80f5e58825b33021 (patch)
treeb4c6073696482bd1e805cc2c65e9f66797853c35 /drivers/iommu/intel-iommu.c
parent9e0babf2c06c73cda2c0cd37a1653d823adb40ec (diff)
Revert "iommu/vt-d: Fix lock inversion between iommu->lock and device_domain_lock"
This reverts commit 7560cc3ca7d9d11555f80c830544e463fcdb28b8. With 5.2.0-rc5 I can easily trigger this with lockdep and iommu=pt: ====================================================== WARNING: possible circular locking dependency detected 5.2.0-rc5 #78 Not tainted ------------------------------------------------------ swapper/0/1 is trying to acquire lock: 00000000ea2b3beb (&(&iommu->lock)->rlock){+.+.}, at: domain_context_mapping_one+0xa5/0x4e0 but task is already holding lock: 00000000a681907b (device_domain_lock){....}, at: domain_context_mapping_one+0x8d/0x4e0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (device_domain_lock){....}: _raw_spin_lock_irqsave+0x3c/0x50 dmar_insert_one_dev_info+0xbb/0x510 domain_add_dev_info+0x50/0x90 dev_prepare_static_identity_mapping+0x30/0x68 intel_iommu_init+0xddd/0x1422 pci_iommu_init+0x16/0x3f do_one_initcall+0x5d/0x2b4 kernel_init_freeable+0x218/0x2c1 kernel_init+0xa/0x100 ret_from_fork+0x3a/0x50 -> #0 (&(&iommu->lock)->rlock){+.+.}: lock_acquire+0x9e/0x170 _raw_spin_lock+0x25/0x30 domain_context_mapping_one+0xa5/0x4e0 pci_for_each_dma_alias+0x30/0x140 dmar_insert_one_dev_info+0x3b2/0x510 domain_add_dev_info+0x50/0x90 dev_prepare_static_identity_mapping+0x30/0x68 intel_iommu_init+0xddd/0x1422 pci_iommu_init+0x16/0x3f do_one_initcall+0x5d/0x2b4 kernel_init_freeable+0x218/0x2c1 kernel_init+0xa/0x100 ret_from_fork+0x3a/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(device_domain_lock); lock(&(&iommu->lock)->rlock); lock(device_domain_lock); lock(&(&iommu->lock)->rlock); *** DEADLOCK *** 2 locks held by swapper/0/1: #0: 00000000033eb13d (dmar_global_lock){++++}, at: intel_iommu_init+0x1e0/0x1422 #1: 00000000a681907b (device_domain_lock){....}, at: domain_context_mapping_one+0x8d/0x4e0 stack backtrace: CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.2.0-rc5 #78 Hardware name: LENOVO 20KGS35G01/20KGS35G01, BIOS N23ET50W (1.25 ) 06/25/2018 Call Trace: dump_stack+0x85/0xc0 print_circular_bug.cold.57+0x15c/0x195 __lock_acquire+0x152a/0x1710 lock_acquire+0x9e/0x170 ? domain_context_mapping_one+0xa5/0x4e0 _raw_spin_lock+0x25/0x30 ? domain_context_mapping_one+0xa5/0x4e0 domain_context_mapping_one+0xa5/0x4e0 ? domain_context_mapping_one+0x4e0/0x4e0 pci_for_each_dma_alias+0x30/0x140 dmar_insert_one_dev_info+0x3b2/0x510 domain_add_dev_info+0x50/0x90 dev_prepare_static_identity_mapping+0x30/0x68 intel_iommu_init+0xddd/0x1422 ? printk+0x58/0x6f ? lockdep_hardirqs_on+0xf0/0x180 ? do_early_param+0x8e/0x8e ? e820__memblock_setup+0x63/0x63 pci_iommu_init+0x16/0x3f do_one_initcall+0x5d/0x2b4 ? do_early_param+0x8e/0x8e ? rcu_read_lock_sched_held+0x55/0x60 ? do_early_param+0x8e/0x8e kernel_init_freeable+0x218/0x2c1 ? rest_init+0x230/0x230 kernel_init+0xa/0x100 ret_from_fork+0x3a/0x50 domain_context_mapping_one() is taking device_domain_lock first then iommu lock, while dmar_insert_one_dev_info() is doing the reverse. That should be introduced by commit: 7560cc3ca7d9 ("iommu/vt-d: Fix lock inversion between iommu->lock and device_domain_lock", 2019-05-27) So far I still cannot figure out how the previous deadlock was triggered (I cannot find iommu lock taken before calling of iommu_flush_dev_iotlb()), however I'm pretty sure that that change should be incomplete at least because it does not fix all the places so we're still taking the locks in different orders, while reverting that commit is very clean to me so far that we should always take device_domain_lock first then the iommu lock. We can continue to try to find the real culprit mentioned in 7560cc3ca7d9, but for now I think we should revert it to fix current breakage. CC: Joerg Roedel <joro@8bytes.org> CC: Lu Baolu <baolu.lu@linux.intel.com> CC: dave.jiang@intel.com Signed-off-by: Peter Xu <peterx@redhat.com> Tested-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Joerg Roedel <jroedel@suse.de>
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r--drivers/iommu/intel-iommu.c7
1 files changed, 3 insertions, 4 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 56297298d6ee..162b3236e72c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2504,7 +2504,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2504 } 2504 }
2505 } 2505 }
2506 2506
2507 spin_lock(&iommu->lock);
2508 spin_lock_irqsave(&device_domain_lock, flags); 2507 spin_lock_irqsave(&device_domain_lock, flags);
2509 if (dev) 2508 if (dev)
2510 found = find_domain(dev); 2509 found = find_domain(dev);
@@ -2520,16 +2519,17 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2520 2519
2521 if (found) { 2520 if (found) {
2522 spin_unlock_irqrestore(&device_domain_lock, flags); 2521 spin_unlock_irqrestore(&device_domain_lock, flags);
2523 spin_unlock(&iommu->lock);
2524 free_devinfo_mem(info); 2522 free_devinfo_mem(info);
2525 /* Caller must free the original domain */ 2523 /* Caller must free the original domain */
2526 return found; 2524 return found;
2527 } 2525 }
2528 2526
2527 spin_lock(&iommu->lock);
2529 ret = domain_attach_iommu(domain, iommu); 2528 ret = domain_attach_iommu(domain, iommu);
2529 spin_unlock(&iommu->lock);
2530
2530 if (ret) { 2531 if (ret) {
2531 spin_unlock_irqrestore(&device_domain_lock, flags); 2532 spin_unlock_irqrestore(&device_domain_lock, flags);
2532 spin_unlock(&iommu->lock);
2533 free_devinfo_mem(info); 2533 free_devinfo_mem(info);
2534 return NULL; 2534 return NULL;
2535 } 2535 }
@@ -2539,7 +2539,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2539 if (dev) 2539 if (dev)
2540 dev->archdata.iommu = info; 2540 dev->archdata.iommu = info;
2541 spin_unlock_irqrestore(&device_domain_lock, flags); 2541 spin_unlock_irqrestore(&device_domain_lock, flags);
2542 spin_unlock(&iommu->lock);
2543 2542
2544 /* PASID table is mandatory for a PCI device in scalable mode. */ 2543 /* PASID table is mandatory for a PCI device in scalable mode. */
2545 if (dev && dev_is_pci(dev) && sm_supported(iommu)) { 2544 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {