summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/iommu/Kconfig1
-rw-r--r--drivers/iommu/intel-iommu.c104
-rw-r--r--drivers/iommu/intel-svm.c291
3 files changed, 396 insertions, 0 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 8d23f5ed8ae2..be18b214c31e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -139,6 +139,7 @@ config INTEL_IOMMU_SVM
139 bool "Support for Shared Virtual Memory with Intel IOMMU" 139 bool "Support for Shared Virtual Memory with Intel IOMMU"
140 depends on INTEL_IOMMU && X86 140 depends on INTEL_IOMMU && X86
141 select PCI_PASID 141 select PCI_PASID
142 select MMU_NOTIFIER
142 help 143 help
143 Shared Virtual Memory (SVM) provides a facility for devices 144 Shared Virtual Memory (SVM) provides a facility for devices
144 to access DMA resources through process address space by 145 to access DMA resources through process address space by
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 9995ea84e23a..60b66d27e655 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4929,6 +4929,110 @@ static void intel_iommu_remove_device(struct device *dev)
4929 iommu_device_unlink(iommu->iommu_dev, dev); 4929 iommu_device_unlink(iommu->iommu_dev, dev);
4930} 4930}
4931 4931
4932#ifdef CONFIG_INTEL_IOMMU_SVM
4933int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
4934{
4935 struct device_domain_info *info;
4936 struct context_entry *context;
4937 struct dmar_domain *domain;
4938 unsigned long flags;
4939 u64 ctx_lo;
4940 int ret;
4941
4942 domain = get_valid_domain_for_dev(sdev->dev);
4943 if (!domain)
4944 return -EINVAL;
4945
4946 spin_lock_irqsave(&device_domain_lock, flags);
4947 spin_lock(&iommu->lock);
4948
4949 ret = -EINVAL;
4950 info = sdev->dev->archdata.iommu;
4951 if (!info || !info->pasid_supported)
4952 goto out;
4953
4954 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
4955 if (WARN_ON(!context))
4956 goto out;
4957
4958 ctx_lo = context[0].lo;
4959
4960 sdev->did = domain->iommu_did[iommu->seq_id];
4961 sdev->sid = PCI_DEVID(info->bus, info->devfn);
4962
4963 if (!(ctx_lo & CONTEXT_PASIDE)) {
4964 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
4965 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
4966 wmb();
4967 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
4968 * extended to permit requests-with-PASID if the PASIDE bit
4969 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
4970 * however, the PASIDE bit is ignored and requests-with-PASID
4971 * are unconditionally blocked. Which makes less sense.
4972 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
4973 * "guest mode" translation types depending on whether ATS
4974 * is available or not. Annoyingly, we can't use the new
4975 * modes *unless* PASIDE is set. */
4976 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
4977 ctx_lo &= ~CONTEXT_TT_MASK;
4978 if (info->ats_supported)
4979 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
4980 else
4981 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
4982 }
4983 ctx_lo |= CONTEXT_PASIDE;
4984 context[0].lo = ctx_lo;
4985 wmb();
4986 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
4987 DMA_CCMD_MASK_NOBIT,
4988 DMA_CCMD_DEVICE_INVL);
4989 }
4990
4991 /* Enable PASID support in the device, if it wasn't already */
4992 if (!info->pasid_enabled)
4993 iommu_enable_dev_iotlb(info);
4994
4995 if (info->ats_enabled) {
4996 sdev->dev_iotlb = 1;
4997 sdev->qdep = info->ats_qdep;
4998 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
4999 sdev->qdep = 0;
5000 }
5001 ret = 0;
5002
5003 out:
5004 spin_unlock(&iommu->lock);
5005 spin_unlock_irqrestore(&device_domain_lock, flags);
5006
5007 return ret;
5008}
5009
5010struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5011{
5012 struct intel_iommu *iommu;
5013 u8 bus, devfn;
5014
5015 if (iommu_dummy(dev)) {
5016 dev_warn(dev,
5017 "No IOMMU translation for device; cannot enable SVM\n");
5018 return NULL;
5019 }
5020
5021 iommu = device_to_iommu(dev, &bus, &devfn);
5022 if ((!iommu)) {
5023 dev_dbg(dev, "No IOMMU for device; cannot enable SVM\n");
5024 return NULL;
5025 }
5026
5027 if (!iommu->pasid_table) {
5028 dev_dbg(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5029 return NULL;
5030 }
5031
5032 return iommu;
5033}
5034#endif /* CONFIG_INTEL_IOMMU_SVM */
5035
4932static const struct iommu_ops intel_iommu_ops = { 5036static const struct iommu_ops intel_iommu_ops = {
4933 .capable = intel_iommu_capable, 5037 .capable = intel_iommu_capable,
4934 .domain_alloc = intel_iommu_domain_alloc, 5038 .domain_alloc = intel_iommu_domain_alloc,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 5b42a95b3f80..82d53e15b865 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -14,6 +14,17 @@
14 */ 14 */
15 15
16#include <linux/intel-iommu.h> 16#include <linux/intel-iommu.h>
17#include <linux/mmu_notifier.h>
18#include <linux/sched.h>
19#include <linux/slab.h>
20#include <linux/intel-svm.h>
21#include <linux/rculist.h>
22#include <linux/pci.h>
23#include <linux/pci-ats.h>
24
25struct pasid_entry {
26 u64 val;
27};
17 28
18int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) 29int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
19{ 30{
@@ -42,6 +53,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
42 iommu->name); 53 iommu->name);
43 } 54 }
44 55
56 idr_init(&iommu->pasid_idr);
57
45 return 0; 58 return 0;
46} 59}
47 60
@@ -61,5 +74,283 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
61 free_pages((unsigned long)iommu->pasid_state_table, order); 74 free_pages((unsigned long)iommu->pasid_state_table, order);
62 iommu->pasid_state_table = NULL; 75 iommu->pasid_state_table = NULL;
63 } 76 }
77 idr_destroy(&iommu->pasid_idr);
64 return 0; 78 return 0;
65} 79}
80
81static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
82 unsigned long address, int pages, int ih)
83{
84 struct qi_desc desc;
85 int mask = ilog2(__roundup_pow_of_two(pages));
86
87 if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) ||
88 mask > cap_max_amask_val(svm->iommu->cap)) {
89 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
90 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
91 desc.high = 0;
92 } else {
93 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
94 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
95 desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) |
96 QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
97 }
98
99 qi_submit_sync(&desc, svm->iommu);
100
101 if (sdev->dev_iotlb) {
102 desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
103 QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
104 if (mask) {
105 unsigned long adr, delta;
106
107 /* Least significant zero bits in the address indicate the
108 * range of the request. So mask them out according to the
109 * size. */
110 adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1);
111
112 /* Now ensure that we round down further if the original
113 * request was not aligned w.r.t. its size */
114 delta = address - adr;
115 if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask)))
116 adr &= ~(1 << (VTD_PAGE_SHIFT + mask));
117 desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE;
118 } else {
119 desc.high = QI_DEV_EIOTLB_ADDR(address);
120 }
121 qi_submit_sync(&desc, svm->iommu);
122 }
123}
124
125static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
126 int pages, int ih)
127{
128 struct intel_svm_dev *sdev;
129
130 rcu_read_lock();
131 list_for_each_entry_rcu(sdev, &svm->devs, list)
132 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
133 rcu_read_unlock();
134}
135
136static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
137 unsigned long address, pte_t pte)
138{
139 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
140
141 intel_flush_svm_range(svm, address, 1, 1);
142}
143
144static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
145 unsigned long address)
146{
147 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
148
149 intel_flush_svm_range(svm, address, 1, 1);
150}
151
152/* Pages have been freed at this point */
153static void intel_invalidate_range(struct mmu_notifier *mn,
154 struct mm_struct *mm,
155 unsigned long start, unsigned long end)
156{
157 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
158
159 intel_flush_svm_range(svm, start,
160 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0);
161}
162
163
164static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev)
165{
166 struct qi_desc desc;
167
168 desc.high = 0;
169 desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid);
170
171 qi_submit_sync(&desc, svm->iommu);
172}
173
174static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
175{
176 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
177
178 svm->iommu->pasid_table[svm->pasid].val = 0;
179
180 /* There's no need to do any flush because we can't get here if there
181 * are any devices left anyway. */
182 WARN_ON(!list_empty(&svm->devs));
183}
184
185static const struct mmu_notifier_ops intel_mmuops = {
186 .release = intel_mm_release,
187 .change_pte = intel_change_pte,
188 .invalidate_page = intel_invalidate_page,
189 .invalidate_range = intel_invalidate_range,
190};
191
192static DEFINE_MUTEX(pasid_mutex);
193
194int intel_svm_bind_mm(struct device *dev, int *pasid)
195{
196 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
197 struct intel_svm_dev *sdev;
198 struct intel_svm *svm = NULL;
199 int pasid_max;
200 int ret;
201
202 BUG_ON(pasid && !current->mm);
203
204 if (WARN_ON(!iommu))
205 return -EINVAL;
206
207 if (dev_is_pci(dev)) {
208 pasid_max = pci_max_pasids(to_pci_dev(dev));
209 if (pasid_max < 0)
210 return -EINVAL;
211 } else
212 pasid_max = 1 << 20;
213
214 mutex_lock(&pasid_mutex);
215 if (pasid) {
216 int i;
217
218 idr_for_each_entry(&iommu->pasid_idr, svm, i) {
219 if (svm->mm != current->mm)
220 continue;
221
222 if (svm->pasid >= pasid_max) {
223 dev_warn(dev,
224 "Limited PASID width. Cannot use existing PASID %d\n",
225 svm->pasid);
226 ret = -ENOSPC;
227 goto out;
228 }
229
230 list_for_each_entry(sdev, &svm->devs, list) {
231 if (dev == sdev->dev) {
232 sdev->users++;
233 goto success;
234 }
235 }
236
237 break;
238 }
239 }
240
241 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
242 if (!sdev) {
243 ret = -ENOMEM;
244 goto out;
245 }
246 sdev->dev = dev;
247
248 ret = intel_iommu_enable_pasid(iommu, sdev);
249 if (ret || !pasid) {
250 /* If they don't actually want to assign a PASID, this is
251 * just an enabling check/preparation. */
252 kfree(sdev);
253 goto out;
254 }
255 /* Finish the setup now we know we're keeping it */
256 sdev->users = 1;
257 init_rcu_head(&sdev->rcu);
258
259 if (!svm) {
260 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
261 if (!svm) {
262 ret = -ENOMEM;
263 kfree(sdev);
264 goto out;
265 }
266 svm->iommu = iommu;
267
268 if (pasid_max > 2 << ecap_pss(iommu->ecap))
269 pasid_max = 2 << ecap_pss(iommu->ecap);
270
271 ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1,
272 GFP_KERNEL);
273 if (ret < 0) {
274 kfree(svm);
275 goto out;
276 }
277 svm->pasid = ret;
278 svm->notifier.ops = &intel_mmuops;
279 svm->mm = get_task_mm(current);
280 INIT_LIST_HEAD_RCU(&svm->devs);
281 ret = -ENOMEM;
282 if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
283 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
284 kfree(svm);
285 kfree(sdev);
286 goto out;
287 }
288 iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
289 wmb();
290 }
291 list_add_rcu(&sdev->list, &svm->devs);
292
293 success:
294 *pasid = svm->pasid;
295 ret = 0;
296 out:
297 mutex_unlock(&pasid_mutex);
298 return ret;
299}
300EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
301
302int intel_svm_unbind_mm(struct device *dev, int pasid)
303{
304 struct intel_svm_dev *sdev;
305 struct intel_iommu *iommu;
306 struct intel_svm *svm;
307 int ret = -EINVAL;
308
309 mutex_lock(&pasid_mutex);
310 iommu = intel_svm_device_to_iommu(dev);
311 if (!iommu || !iommu->pasid_table)
312 goto out;
313
314 svm = idr_find(&iommu->pasid_idr, pasid);
315 if (!svm)
316 goto out;
317
318 list_for_each_entry(sdev, &svm->devs, list) {
319 if (dev == sdev->dev) {
320 ret = 0;
321 sdev->users--;
322 if (!sdev->users) {
323 list_del_rcu(&sdev->list);
324 /* Flush the PASID cache and IOTLB for this device.
325 * Note that we do depend on the hardware *not* using
326 * the PASID any more. Just as we depend on other
327 * devices never using PASIDs that they have no right
328 * to use. We have a *shared* PASID table, because it's
329 * large and has to be physically contiguous. So it's
330 * hard to be as defensive as we might like. */
331 intel_flush_pasid_dev(svm, sdev);
332 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
333 kfree_rcu(sdev, rcu);
334
335 if (list_empty(&svm->devs)) {
336 mmu_notifier_unregister(&svm->notifier, svm->mm);
337
338 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
339 mmput(svm->mm);
340 /* We mandate that no page faults may be outstanding
341 * for the PASID when intel_svm_unbind_mm() is called.
342 * If that is not obeyed, subtle errors will happen.
343 * Let's make them less subtle... */
344 memset(svm, 0x6b, sizeof(*svm));
345 kfree(svm);
346 }
347 }
348 break;
349 }
350 }
351 out:
352 mutex_unlock(&pasid_mutex);
353
354 return ret;
355}
356EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);