summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorRobin Murphy <robin.murphy@arm.com>2017-10-18 10:04:26 -0400
committerWill Deacon <will.deacon@arm.com>2017-10-20 11:55:08 -0400
commit37de98f8f1cf330918b242cd3ce13751857243a6 (patch)
treebec5d901505368a1a229f2e1802d773e70c705a3 /drivers
parentdce032a15ced2ee9fa58ed7b52c492795d096a40 (diff)
iommu/arm-smmu-v3: Use CMD_SYNC completion MSI
As an IRQ, the CMD_SYNC interrupt is not particularly useful, not least because we often need to wait for sync completion within someone else's IRQ handler anyway. However, when the SMMU is both coherent and supports MSIs, we can have a lot more fun by not using it as an interrupt at all. Following the example suggested in the architecture and using a write targeting normal memory, we can let callers wait on a status variable outside the lock instead of having to stall the entire queue or even touch MMIO registers. Since multiple sync commands are guaranteed to complete in order, a simple incrementing sequence count is all we need to unambiguously support any realistic number of overlapping waiters. Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/iommu/arm-smmu-v3.c51
1 files changed, 49 insertions, 2 deletions
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index aa3bd3968290..ceb8f9ef4bad 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -378,7 +378,16 @@
378 378
379#define CMDQ_SYNC_0_CS_SHIFT 12 379#define CMDQ_SYNC_0_CS_SHIFT 12
380#define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT) 380#define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT)
381#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT)
381#define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) 382#define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT)
383#define CMDQ_SYNC_0_MSH_SHIFT 22
384#define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT)
385#define CMDQ_SYNC_0_MSIATTR_SHIFT 24
386#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
387#define CMDQ_SYNC_0_MSIDATA_SHIFT 32
388#define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL
389#define CMDQ_SYNC_1_MSIADDR_SHIFT 0
390#define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL
382 391
383/* Event queue */ 392/* Event queue */
384#define EVTQ_ENT_DWORDS 4 393#define EVTQ_ENT_DWORDS 4
@@ -410,6 +419,7 @@
410/* High-level queue structures */ 419/* High-level queue structures */
411#define ARM_SMMU_POLL_TIMEOUT_US 100 420#define ARM_SMMU_POLL_TIMEOUT_US 100
412#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ 421#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */
422#define ARM_SMMU_SYNC_TIMEOUT_US 1000000 /* 1s! */
413 423
414#define MSI_IOVA_BASE 0x8000000 424#define MSI_IOVA_BASE 0x8000000
415#define MSI_IOVA_LENGTH 0x100000 425#define MSI_IOVA_LENGTH 0x100000
@@ -496,6 +506,10 @@ struct arm_smmu_cmdq_ent {
496 } pri; 506 } pri;
497 507
498 #define CMDQ_OP_CMD_SYNC 0x46 508 #define CMDQ_OP_CMD_SYNC 0x46
509 struct {
510 u32 msidata;
511 u64 msiaddr;
512 } sync;
499 }; 513 };
500}; 514};
501 515
@@ -609,6 +623,7 @@ struct arm_smmu_device {
609 623
610 int gerr_irq; 624 int gerr_irq;
611 int combined_irq; 625 int combined_irq;
626 atomic_t sync_nr;
612 627
613 unsigned long ias; /* IPA */ 628 unsigned long ias; /* IPA */
614 unsigned long oas; /* PA */ 629 unsigned long oas; /* PA */
@@ -627,6 +642,8 @@ struct arm_smmu_device {
627 642
628 struct arm_smmu_strtab_cfg strtab_cfg; 643 struct arm_smmu_strtab_cfg strtab_cfg;
629 644
645 u32 sync_count;
646
630 /* IOMMU core code handle */ 647 /* IOMMU core code handle */
631 struct iommu_device iommu; 648 struct iommu_device iommu;
632}; 649};
@@ -871,7 +888,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
871 } 888 }
872 break; 889 break;
873 case CMDQ_OP_CMD_SYNC: 890 case CMDQ_OP_CMD_SYNC:
874 cmd[0] |= CMDQ_SYNC_0_CS_SEV; 891 if (ent->sync.msiaddr)
892 cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
893 else
894 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
895 cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
896 cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
897 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
875 break; 898 break;
876 default: 899 default:
877 return -ENOENT; 900 return -ENOENT;
@@ -957,21 +980,44 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
957 spin_unlock_irqrestore(&smmu->cmdq.lock, flags); 980 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
958} 981}
959 982
983/*
984 * The difference between val and sync_idx is bounded by the maximum size of
985 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
986 */
987static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
988{
989 ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US);
990 u32 val = smp_cond_load_acquire(&smmu->sync_count,
991 (int)(VAL - sync_idx) >= 0 ||
992 !ktime_before(ktime_get(), timeout));
993
994 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
995}
996
960static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) 997static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
961{ 998{
962 u64 cmd[CMDQ_ENT_DWORDS]; 999 u64 cmd[CMDQ_ENT_DWORDS];
963 unsigned long flags; 1000 unsigned long flags;
964 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 1001 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
1002 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1003 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
965 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; 1004 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
966 int ret; 1005 int ret;
967 1006
1007 if (msi) {
1008 ent.sync.msidata = atomic_inc_return_relaxed(&smmu->sync_nr);
1009 ent.sync.msiaddr = virt_to_phys(&smmu->sync_count);
1010 }
968 arm_smmu_cmdq_build_cmd(cmd, &ent); 1011 arm_smmu_cmdq_build_cmd(cmd, &ent);
969 1012
970 spin_lock_irqsave(&smmu->cmdq.lock, flags); 1013 spin_lock_irqsave(&smmu->cmdq.lock, flags);
971 arm_smmu_cmdq_insert_cmd(smmu, cmd); 1014 arm_smmu_cmdq_insert_cmd(smmu, cmd);
972 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); 1015 if (!msi)
1016 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
973 spin_unlock_irqrestore(&smmu->cmdq.lock, flags); 1017 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
974 1018
1019 if (msi)
1020 ret = arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
975 if (ret) 1021 if (ret)
976 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); 1022 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
977} 1023}
@@ -2159,6 +2205,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2159{ 2205{
2160 int ret; 2206 int ret;
2161 2207
2208 atomic_set(&smmu->sync_nr, 0);
2162 ret = arm_smmu_init_queues(smmu); 2209 ret = arm_smmu_init_queues(smmu);
2163 if (ret) 2210 if (ret)
2164 return ret; 2211 return ret;