diff options
author | Will Deacon <will@kernel.org> | 2019-08-20 11:28:54 -0400 |
---|---|---|
committer | Will Deacon <will@kernel.org> | 2019-08-22 13:16:11 -0400 |
commit | cdb8a3c3463563b7bdb6f653bf4b0ffa3a95f366 (patch) | |
tree | eef77dbb6f7518f0d9756bbe5cf061d54ebc1e9d | |
parent | 353e3cf8590cf182a9f42e67993de3aca91e8090 (diff) |
iommu/arm-smmu-v3: Avoid locking on invalidation path when not using ATS
When ATS is not in use, we can avoid taking the 'devices_lock' for the
domain on the invalidation path by simply caching the number of ATS
masters currently attached. The fiddly part is handling a concurrent
->attach() of an ATS-enabled master to a domain that is being
invalidated, but we can handle this using an 'smp_mb()' to ensure that
our check of the count is ordered after completion of our prior TLB
invalidation.
This also makes our ->attach() and ->detach() flows symmetric wrt ATS
interactions.
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
-rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 37 |
1 files changed, 32 insertions, 5 deletions
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index ca504a60312d..0e43529d55fe 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c | |||
@@ -654,6 +654,7 @@ struct arm_smmu_domain { | |||
654 | 654 | ||
655 | struct io_pgtable_ops *pgtbl_ops; | 655 | struct io_pgtable_ops *pgtbl_ops; |
656 | bool non_strict; | 656 | bool non_strict; |
657 | atomic_t nr_ats_masters; | ||
657 | 658 | ||
658 | enum arm_smmu_domain_stage stage; | 659 | enum arm_smmu_domain_stage stage; |
659 | union { | 660 | union { |
@@ -1926,6 +1927,23 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, | |||
1926 | if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) | 1927 | if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) |
1927 | return 0; | 1928 | return 0; |
1928 | 1929 | ||
1930 | /* | ||
1931 | * Ensure that we've completed prior invalidation of the main TLBs | ||
1932 | * before we read 'nr_ats_masters' in case of a concurrent call to | ||
1933 | * arm_smmu_enable_ats(): | ||
1934 | * | ||
1935 | * // unmap() // arm_smmu_enable_ats() | ||
1936 | * TLBI+SYNC atomic_inc(&nr_ats_masters); | ||
1937 | * smp_mb(); [...] | ||
1938 | * atomic_read(&nr_ats_masters); pci_enable_ats() // writel() | ||
1939 | * | ||
1940 | * Ensures that we always see the incremented 'nr_ats_masters' count if | ||
1941 | * ATS was enabled at the PCI device before completion of the TLBI. | ||
1942 | */ | ||
1943 | smp_mb(); | ||
1944 | if (!atomic_read(&smmu_domain->nr_ats_masters)) | ||
1945 | return 0; | ||
1946 | |||
1929 | arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); | 1947 | arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); |
1930 | 1948 | ||
1931 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); | 1949 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); |
@@ -2312,6 +2330,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) | |||
2312 | size_t stu; | 2330 | size_t stu; |
2313 | struct pci_dev *pdev; | 2331 | struct pci_dev *pdev; |
2314 | struct arm_smmu_device *smmu = master->smmu; | 2332 | struct arm_smmu_device *smmu = master->smmu; |
2333 | struct arm_smmu_domain *smmu_domain = master->domain; | ||
2315 | 2334 | ||
2316 | /* Don't enable ATS at the endpoint if it's not enabled in the STE */ | 2335 | /* Don't enable ATS at the endpoint if it's not enabled in the STE */ |
2317 | if (!master->ats_enabled) | 2336 | if (!master->ats_enabled) |
@@ -2320,6 +2339,9 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) | |||
2320 | /* Smallest Translation Unit: log2 of the smallest supported granule */ | 2339 | /* Smallest Translation Unit: log2 of the smallest supported granule */ |
2321 | stu = __ffs(smmu->pgsize_bitmap); | 2340 | stu = __ffs(smmu->pgsize_bitmap); |
2322 | pdev = to_pci_dev(master->dev); | 2341 | pdev = to_pci_dev(master->dev); |
2342 | |||
2343 | atomic_inc(&smmu_domain->nr_ats_masters); | ||
2344 | arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); | ||
2323 | if (pci_enable_ats(pdev, stu)) | 2345 | if (pci_enable_ats(pdev, stu)) |
2324 | dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); | 2346 | dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); |
2325 | } | 2347 | } |
@@ -2327,6 +2349,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) | |||
2327 | static void arm_smmu_disable_ats(struct arm_smmu_master *master) | 2349 | static void arm_smmu_disable_ats(struct arm_smmu_master *master) |
2328 | { | 2350 | { |
2329 | struct arm_smmu_cmdq_ent cmd; | 2351 | struct arm_smmu_cmdq_ent cmd; |
2352 | struct arm_smmu_domain *smmu_domain = master->domain; | ||
2330 | 2353 | ||
2331 | if (!master->ats_enabled) | 2354 | if (!master->ats_enabled) |
2332 | return; | 2355 | return; |
@@ -2339,6 +2362,7 @@ static void arm_smmu_disable_ats(struct arm_smmu_master *master) | |||
2339 | wmb(); | 2362 | wmb(); |
2340 | arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); | 2363 | arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); |
2341 | arm_smmu_atc_inv_master(master, &cmd); | 2364 | arm_smmu_atc_inv_master(master, &cmd); |
2365 | atomic_dec(&smmu_domain->nr_ats_masters); | ||
2342 | } | 2366 | } |
2343 | 2367 | ||
2344 | static void arm_smmu_detach_dev(struct arm_smmu_master *master) | 2368 | static void arm_smmu_detach_dev(struct arm_smmu_master *master) |
@@ -2349,11 +2373,12 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) | |||
2349 | if (!smmu_domain) | 2373 | if (!smmu_domain) |
2350 | return; | 2374 | return; |
2351 | 2375 | ||
2376 | arm_smmu_disable_ats(master); | ||
2377 | |||
2352 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); | 2378 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); |
2353 | list_del(&master->domain_head); | 2379 | list_del(&master->domain_head); |
2354 | spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); | 2380 | spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); |
2355 | 2381 | ||
2356 | arm_smmu_disable_ats(master); | ||
2357 | master->domain = NULL; | 2382 | master->domain = NULL; |
2358 | master->ats_enabled = false; | 2383 | master->ats_enabled = false; |
2359 | arm_smmu_install_ste_for_dev(master); | 2384 | arm_smmu_install_ste_for_dev(master); |
@@ -2396,10 +2421,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) | |||
2396 | 2421 | ||
2397 | master->domain = smmu_domain; | 2422 | master->domain = smmu_domain; |
2398 | 2423 | ||
2399 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); | ||
2400 | list_add(&master->domain_head, &smmu_domain->devices); | ||
2401 | spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); | ||
2402 | |||
2403 | if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) | 2424 | if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) |
2404 | master->ats_enabled = arm_smmu_ats_supported(master); | 2425 | master->ats_enabled = arm_smmu_ats_supported(master); |
2405 | 2426 | ||
@@ -2407,7 +2428,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) | |||
2407 | arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); | 2428 | arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); |
2408 | 2429 | ||
2409 | arm_smmu_install_ste_for_dev(master); | 2430 | arm_smmu_install_ste_for_dev(master); |
2431 | |||
2432 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); | ||
2433 | list_add(&master->domain_head, &smmu_domain->devices); | ||
2434 | spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); | ||
2435 | |||
2410 | arm_smmu_enable_ats(master); | 2436 | arm_smmu_enable_ats(master); |
2437 | |||
2411 | out_unlock: | 2438 | out_unlock: |
2412 | mutex_unlock(&smmu_domain->init_mutex); | 2439 | mutex_unlock(&smmu_domain->init_mutex); |
2413 | return ret; | 2440 | return ret; |