aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-17 14:26:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-17 14:26:09 -0400
commit3a1d5384b7decbff6519daa9c65a35665e227323 (patch)
tree7442f1b74d452d82d6702f8cd25173cc81c0c634
parent37d4607ebbbf5d8b74cbcb9434a5ce6897a51864 (diff)
parent5e663f0410fa2f355042209154029842ba1abd43 (diff)
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio, vhost updates from Michael Tsirkin: "Fixes, features, performance: - new iommu device - vhost guest memory access using vmap (just meta-data for now) - minor fixes" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: virtio-mmio: add error check for platform_get_irq scsi: virtio_scsi: Use struct_size() helper iommu/virtio: Add event queue iommu/virtio: Add probe request iommu: Add virtio-iommu driver PCI: OF: Initialize dev->fwnode appropriately of: Allow the iommu-map property to omit untranslated devices dt-bindings: virtio: Add virtio-pci-iommu node dt-bindings: virtio-mmio: Add IOMMU description vhost: fix clang build warning vhost: access vq metadata through kernel virtual address vhost: factor out setting vring addr and num vhost: introduce helpers to get the size of metadata area vhost: rename vq_iotlb_prefetch() to vq_meta_prefetch() vhost: fine grain userspace memory accessors vhost: generalize adding used elem
-rw-r--r--Documentation/devicetree/bindings/virtio/iommu.txt66
-rw-r--r--Documentation/devicetree/bindings/virtio/mmio.txt30
-rw-r--r--MAINTAINERS7
-rw-r--r--drivers/iommu/Kconfig11
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/virtio-iommu.c1158
-rw-r--r--drivers/of/base.c10
-rw-r--r--drivers/pci/of.c8
-rw-r--r--drivers/scsi/virtio_scsi.c2
-rw-r--r--drivers/vhost/net.c4
-rw-r--r--drivers/vhost/vhost.c850
-rw-r--r--drivers/vhost/vhost.h43
-rw-r--r--drivers/virtio/virtio_mmio.c7
-rw-r--r--include/uapi/linux/virtio_ids.h1
-rw-r--r--include/uapi/linux/virtio_iommu.h161
15 files changed, 2228 insertions, 131 deletions
diff --git a/Documentation/devicetree/bindings/virtio/iommu.txt b/Documentation/devicetree/bindings/virtio/iommu.txt
new file mode 100644
index 000000000000..2407fea0651c
--- /dev/null
+++ b/Documentation/devicetree/bindings/virtio/iommu.txt
@@ -0,0 +1,66 @@
1* virtio IOMMU PCI device
2
3When virtio-iommu uses the PCI transport, its programming interface is
4discovered dynamically by the PCI probing infrastructure. However the
5device tree statically describes the relation between IOMMU and DMA
6masters. Therefore, the PCI root complex that hosts the virtio-iommu
7contains a child node representing the IOMMU device explicitly.
8
9Required properties:
10
11- compatible: Should be "virtio,pci-iommu"
12- reg: PCI address of the IOMMU. As defined in the PCI Bus
13 Binding reference [1], the reg property is a five-cell
14 address encoded as (phys.hi phys.mid phys.lo size.hi
15 size.lo). phys.hi should contain the device's BDF as
16 0b00000000 bbbbbbbb dddddfff 00000000. The other cells
17 should be zero.
18- #iommu-cells: Each platform DMA master managed by the IOMMU is assigned
19 an endpoint ID, described by the "iommus" property [2].
20 For virtio-iommu, #iommu-cells must be 1.
21
22Notes:
23
24- DMA from the IOMMU device isn't managed by another IOMMU. Therefore the
25 virtio-iommu node doesn't have an "iommus" property, and is omitted from
26 the iommu-map property of the root complex.
27
28Example:
29
30pcie@10000000 {
31 compatible = "pci-host-ecam-generic";
32 ...
33
34 /* The IOMMU programming interface uses slot 00:01.0 */
35 iommu0: iommu@0008 {
36 compatible = "virtio,pci-iommu";
37 reg = <0x00000800 0 0 0 0>;
38 #iommu-cells = <1>;
39 };
40
41 /*
42 * The IOMMU manages all functions in this PCI domain except
43 * itself. Omit BDF 00:01.0.
44 */
45 iommu-map = <0x0 &iommu0 0x0 0x8>
46 <0x9 &iommu0 0x9 0xfff7>;
47};
48
49pcie@20000000 {
50 compatible = "pci-host-ecam-generic";
51 ...
52 /*
53 * The IOMMU also manages all functions from this domain,
54 * with endpoint IDs 0x10000 - 0x1ffff
55 */
56 iommu-map = <0x0 &iommu0 0x10000 0x10000>;
57};
58
59ethernet@fe001000 {
60 ...
61 /* The IOMMU manages this platform device with endpoint ID 0x20000 */
62 iommus = <&iommu0 0x20000>;
63};
64
65[1] Documentation/devicetree/bindings/pci/pci.txt
66[2] Documentation/devicetree/bindings/iommu/iommu.txt
diff --git a/Documentation/devicetree/bindings/virtio/mmio.txt b/Documentation/devicetree/bindings/virtio/mmio.txt
index 5069c1b8e193..21af30fbb81f 100644
--- a/Documentation/devicetree/bindings/virtio/mmio.txt
+++ b/Documentation/devicetree/bindings/virtio/mmio.txt
@@ -8,10 +8,40 @@ Required properties:
8- reg: control registers base address and size including configuration space 8- reg: control registers base address and size including configuration space
9- interrupts: interrupt generated by the device 9- interrupts: interrupt generated by the device
10 10
11Required properties for virtio-iommu:
12
13- #iommu-cells: When the node corresponds to a virtio-iommu device, it is
14 linked to DMA masters using the "iommus" or "iommu-map"
15 properties [1][2]. #iommu-cells specifies the size of the
16 "iommus" property. For virtio-iommu #iommu-cells must be
17 1, each cell describing a single endpoint ID.
18
19Optional properties:
20
21- iommus: If the device accesses memory through an IOMMU, it should
22 have an "iommus" property [1]. Since virtio-iommu itself
23 does not access memory through an IOMMU, the "virtio,mmio"
24 node cannot have both an "#iommu-cells" and an "iommus"
25 property.
26
11Example: 27Example:
12 28
13 virtio_block@3000 { 29 virtio_block@3000 {
14 compatible = "virtio,mmio"; 30 compatible = "virtio,mmio";
15 reg = <0x3000 0x100>; 31 reg = <0x3000 0x100>;
16 interrupts = <41>; 32 interrupts = <41>;
33
34 /* Device has endpoint ID 23 */
35 iommus = <&viommu 23>
17 } 36 }
37
38 viommu: iommu@3100 {
39 compatible = "virtio,mmio";
40 reg = <0x3100 0x100>;
41 interrupts = <42>;
42
43 #iommu-cells = <1>
44 }
45
46[1] Documentation/devicetree/bindings/iommu/iommu.txt
47[2] Documentation/devicetree/bindings/pci/pci-iommu.txt
diff --git a/MAINTAINERS b/MAINTAINERS
index d452d7bbbaad..c04ea10cfb1b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17107,6 +17107,13 @@ S: Maintained
17107F: drivers/virtio/virtio_input.c 17107F: drivers/virtio/virtio_input.c
17108F: include/uapi/linux/virtio_input.h 17108F: include/uapi/linux/virtio_input.h
17109 17109
17110VIRTIO IOMMU DRIVER
17111M: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
17112L: virtualization@lists.linux-foundation.org
17113S: Maintained
17114F: drivers/iommu/virtio-iommu.c
17115F: include/uapi/linux/virtio_iommu.h
17116
17110VIRTUAL BOX GUEST DEVICE DRIVER 17117VIRTUAL BOX GUEST DEVICE DRIVER
17111M: Hans de Goede <hdegoede@redhat.com> 17118M: Hans de Goede <hdegoede@redhat.com>
17112M: Arnd Bergmann <arnd@arndb.de> 17119M: Arnd Bergmann <arnd@arndb.de>
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 83664db5221d..e15cdcd8cb3c 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -473,4 +473,15 @@ config HYPERV_IOMMU
473 Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux 473 Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux
474 guests to run with x2APIC mode enabled. 474 guests to run with x2APIC mode enabled.
475 475
476config VIRTIO_IOMMU
477 bool "Virtio IOMMU driver"
478 depends on VIRTIO=y
479 depends on ARM64
480 select IOMMU_API
481 select INTERVAL_TREE
482 help
483 Para-virtualised IOMMU driver with virtio.
484
485 Say Y here if you intend to run this kernel as a guest.
486
476endif # IOMMU_SUPPORT 487endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 8c71a15e986b..f13f36ae1af6 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -33,3 +33,4 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
33obj-$(CONFIG_S390_IOMMU) += s390-iommu.o 33obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
34obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o 34obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
35obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o 35obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
36obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
new file mode 100644
index 000000000000..433f4d2ee956
--- /dev/null
+++ b/drivers/iommu/virtio-iommu.c
@@ -0,0 +1,1158 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Virtio driver for the paravirtualized IOMMU
4 *
5 * Copyright (C) 2018 Arm Limited
6 */
7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10#include <linux/amba/bus.h>
11#include <linux/delay.h>
12#include <linux/dma-iommu.h>
13#include <linux/freezer.h>
14#include <linux/interval_tree.h>
15#include <linux/iommu.h>
16#include <linux/module.h>
17#include <linux/of_iommu.h>
18#include <linux/of_platform.h>
19#include <linux/pci.h>
20#include <linux/platform_device.h>
21#include <linux/virtio.h>
22#include <linux/virtio_config.h>
23#include <linux/virtio_ids.h>
24#include <linux/wait.h>
25
26#include <uapi/linux/virtio_iommu.h>
27
28#define MSI_IOVA_BASE 0x8000000
29#define MSI_IOVA_LENGTH 0x100000
30
31#define VIOMMU_REQUEST_VQ 0
32#define VIOMMU_EVENT_VQ 1
33#define VIOMMU_NR_VQS 2
34
35struct viommu_dev {
36 struct iommu_device iommu;
37 struct device *dev;
38 struct virtio_device *vdev;
39
40 struct ida domain_ids;
41
42 struct virtqueue *vqs[VIOMMU_NR_VQS];
43 spinlock_t request_lock;
44 struct list_head requests;
45 void *evts;
46
47 /* Device configuration */
48 struct iommu_domain_geometry geometry;
49 u64 pgsize_bitmap;
50 u8 domain_bits;
51 u32 probe_size;
52};
53
54struct viommu_mapping {
55 phys_addr_t paddr;
56 struct interval_tree_node iova;
57 u32 flags;
58};
59
60struct viommu_domain {
61 struct iommu_domain domain;
62 struct viommu_dev *viommu;
63 struct mutex mutex; /* protects viommu pointer */
64 unsigned int id;
65
66 spinlock_t mappings_lock;
67 struct rb_root_cached mappings;
68
69 unsigned long nr_endpoints;
70};
71
72struct viommu_endpoint {
73 struct device *dev;
74 struct viommu_dev *viommu;
75 struct viommu_domain *vdomain;
76 struct list_head resv_regions;
77};
78
79struct viommu_request {
80 struct list_head list;
81 void *writeback;
82 unsigned int write_offset;
83 unsigned int len;
84 char buf[];
85};
86
87#define VIOMMU_FAULT_RESV_MASK 0xffffff00
88
89struct viommu_event {
90 union {
91 u32 head;
92 struct virtio_iommu_fault fault;
93 };
94};
95
96#define to_viommu_domain(domain) \
97 container_of(domain, struct viommu_domain, domain)
98
99static int viommu_get_req_errno(void *buf, size_t len)
100{
101 struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail);
102
103 switch (tail->status) {
104 case VIRTIO_IOMMU_S_OK:
105 return 0;
106 case VIRTIO_IOMMU_S_UNSUPP:
107 return -ENOSYS;
108 case VIRTIO_IOMMU_S_INVAL:
109 return -EINVAL;
110 case VIRTIO_IOMMU_S_RANGE:
111 return -ERANGE;
112 case VIRTIO_IOMMU_S_NOENT:
113 return -ENOENT;
114 case VIRTIO_IOMMU_S_FAULT:
115 return -EFAULT;
116 case VIRTIO_IOMMU_S_IOERR:
117 case VIRTIO_IOMMU_S_DEVERR:
118 default:
119 return -EIO;
120 }
121}
122
123static void viommu_set_req_status(void *buf, size_t len, int status)
124{
125 struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail);
126
127 tail->status = status;
128}
129
130static off_t viommu_get_write_desc_offset(struct viommu_dev *viommu,
131 struct virtio_iommu_req_head *req,
132 size_t len)
133{
134 size_t tail_size = sizeof(struct virtio_iommu_req_tail);
135
136 if (req->type == VIRTIO_IOMMU_T_PROBE)
137 return len - viommu->probe_size - tail_size;
138
139 return len - tail_size;
140}
141
142/*
143 * __viommu_sync_req - Complete all in-flight requests
144 *
145 * Wait for all added requests to complete. When this function returns, all
146 * requests that were in-flight at the time of the call have completed.
147 */
148static int __viommu_sync_req(struct viommu_dev *viommu)
149{
150 int ret = 0;
151 unsigned int len;
152 size_t write_len;
153 struct viommu_request *req;
154 struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
155
156 assert_spin_locked(&viommu->request_lock);
157
158 virtqueue_kick(vq);
159
160 while (!list_empty(&viommu->requests)) {
161 len = 0;
162 req = virtqueue_get_buf(vq, &len);
163 if (!req)
164 continue;
165
166 if (!len)
167 viommu_set_req_status(req->buf, req->len,
168 VIRTIO_IOMMU_S_IOERR);
169
170 write_len = req->len - req->write_offset;
171 if (req->writeback && len == write_len)
172 memcpy(req->writeback, req->buf + req->write_offset,
173 write_len);
174
175 list_del(&req->list);
176 kfree(req);
177 }
178
179 return ret;
180}
181
182static int viommu_sync_req(struct viommu_dev *viommu)
183{
184 int ret;
185 unsigned long flags;
186
187 spin_lock_irqsave(&viommu->request_lock, flags);
188 ret = __viommu_sync_req(viommu);
189 if (ret)
190 dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret);
191 spin_unlock_irqrestore(&viommu->request_lock, flags);
192
193 return ret;
194}
195
196/*
197 * __viommu_add_request - Add one request to the queue
198 * @buf: pointer to the request buffer
199 * @len: length of the request buffer
200 * @writeback: copy data back to the buffer when the request completes.
201 *
202 * Add a request to the queue. Only synchronize the queue if it's already full.
203 * Otherwise don't kick the queue nor wait for requests to complete.
204 *
205 * When @writeback is true, data written by the device, including the request
206 * status, is copied into @buf after the request completes. This is unsafe if
207 * the caller allocates @buf on stack and drops the lock between add_req() and
208 * sync_req().
209 *
210 * Return 0 if the request was successfully added to the queue.
211 */
212static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len,
213 bool writeback)
214{
215 int ret;
216 off_t write_offset;
217 struct viommu_request *req;
218 struct scatterlist top_sg, bottom_sg;
219 struct scatterlist *sg[2] = { &top_sg, &bottom_sg };
220 struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
221
222 assert_spin_locked(&viommu->request_lock);
223
224 write_offset = viommu_get_write_desc_offset(viommu, buf, len);
225 if (write_offset <= 0)
226 return -EINVAL;
227
228 req = kzalloc(sizeof(*req) + len, GFP_ATOMIC);
229 if (!req)
230 return -ENOMEM;
231
232 req->len = len;
233 if (writeback) {
234 req->writeback = buf + write_offset;
235 req->write_offset = write_offset;
236 }
237 memcpy(&req->buf, buf, write_offset);
238
239 sg_init_one(&top_sg, req->buf, write_offset);
240 sg_init_one(&bottom_sg, req->buf + write_offset, len - write_offset);
241
242 ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC);
243 if (ret == -ENOSPC) {
244 /* If the queue is full, sync and retry */
245 if (!__viommu_sync_req(viommu))
246 ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC);
247 }
248 if (ret)
249 goto err_free;
250
251 list_add_tail(&req->list, &viommu->requests);
252 return 0;
253
254err_free:
255 kfree(req);
256 return ret;
257}
258
259static int viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len)
260{
261 int ret;
262 unsigned long flags;
263
264 spin_lock_irqsave(&viommu->request_lock, flags);
265 ret = __viommu_add_req(viommu, buf, len, false);
266 if (ret)
267 dev_dbg(viommu->dev, "could not add request: %d\n", ret);
268 spin_unlock_irqrestore(&viommu->request_lock, flags);
269
270 return ret;
271}
272
273/*
274 * Send a request and wait for it to complete. Return the request status (as an
275 * errno)
276 */
277static int viommu_send_req_sync(struct viommu_dev *viommu, void *buf,
278 size_t len)
279{
280 int ret;
281 unsigned long flags;
282
283 spin_lock_irqsave(&viommu->request_lock, flags);
284
285 ret = __viommu_add_req(viommu, buf, len, true);
286 if (ret) {
287 dev_dbg(viommu->dev, "could not add request (%d)\n", ret);
288 goto out_unlock;
289 }
290
291 ret = __viommu_sync_req(viommu);
292 if (ret) {
293 dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret);
294 /* Fall-through (get the actual request status) */
295 }
296
297 ret = viommu_get_req_errno(buf, len);
298out_unlock:
299 spin_unlock_irqrestore(&viommu->request_lock, flags);
300 return ret;
301}
302
303/*
304 * viommu_add_mapping - add a mapping to the internal tree
305 *
306 * On success, return the new mapping. Otherwise return NULL.
307 */
308static int viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova,
309 phys_addr_t paddr, size_t size, u32 flags)
310{
311 unsigned long irqflags;
312 struct viommu_mapping *mapping;
313
314 mapping = kzalloc(sizeof(*mapping), GFP_ATOMIC);
315 if (!mapping)
316 return -ENOMEM;
317
318 mapping->paddr = paddr;
319 mapping->iova.start = iova;
320 mapping->iova.last = iova + size - 1;
321 mapping->flags = flags;
322
323 spin_lock_irqsave(&vdomain->mappings_lock, irqflags);
324 interval_tree_insert(&mapping->iova, &vdomain->mappings);
325 spin_unlock_irqrestore(&vdomain->mappings_lock, irqflags);
326
327 return 0;
328}
329
330/*
331 * viommu_del_mappings - remove mappings from the internal tree
332 *
333 * @vdomain: the domain
334 * @iova: start of the range
335 * @size: size of the range. A size of 0 corresponds to the entire address
336 * space.
337 *
338 * On success, returns the number of unmapped bytes (>= size)
339 */
340static size_t viommu_del_mappings(struct viommu_domain *vdomain,
341 unsigned long iova, size_t size)
342{
343 size_t unmapped = 0;
344 unsigned long flags;
345 unsigned long last = iova + size - 1;
346 struct viommu_mapping *mapping = NULL;
347 struct interval_tree_node *node, *next;
348
349 spin_lock_irqsave(&vdomain->mappings_lock, flags);
350 next = interval_tree_iter_first(&vdomain->mappings, iova, last);
351 while (next) {
352 node = next;
353 mapping = container_of(node, struct viommu_mapping, iova);
354 next = interval_tree_iter_next(node, iova, last);
355
356 /* Trying to split a mapping? */
357 if (mapping->iova.start < iova)
358 break;
359
360 /*
361 * Virtio-iommu doesn't allow UNMAP to split a mapping created
362 * with a single MAP request, so remove the full mapping.
363 */
364 unmapped += mapping->iova.last - mapping->iova.start + 1;
365
366 interval_tree_remove(node, &vdomain->mappings);
367 kfree(mapping);
368 }
369 spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
370
371 return unmapped;
372}
373
374/*
375 * viommu_replay_mappings - re-send MAP requests
376 *
377 * When reattaching a domain that was previously detached from all endpoints,
378 * mappings were deleted from the device. Re-create the mappings available in
379 * the internal tree.
380 */
381static int viommu_replay_mappings(struct viommu_domain *vdomain)
382{
383 int ret = 0;
384 unsigned long flags;
385 struct viommu_mapping *mapping;
386 struct interval_tree_node *node;
387 struct virtio_iommu_req_map map;
388
389 spin_lock_irqsave(&vdomain->mappings_lock, flags);
390 node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
391 while (node) {
392 mapping = container_of(node, struct viommu_mapping, iova);
393 map = (struct virtio_iommu_req_map) {
394 .head.type = VIRTIO_IOMMU_T_MAP,
395 .domain = cpu_to_le32(vdomain->id),
396 .virt_start = cpu_to_le64(mapping->iova.start),
397 .virt_end = cpu_to_le64(mapping->iova.last),
398 .phys_start = cpu_to_le64(mapping->paddr),
399 .flags = cpu_to_le32(mapping->flags),
400 };
401
402 ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
403 if (ret)
404 break;
405
406 node = interval_tree_iter_next(node, 0, -1UL);
407 }
408 spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
409
410 return ret;
411}
412
413static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
414 struct virtio_iommu_probe_resv_mem *mem,
415 size_t len)
416{
417 size_t size;
418 u64 start64, end64;
419 phys_addr_t start, end;
420 struct iommu_resv_region *region = NULL;
421 unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
422
423 start = start64 = le64_to_cpu(mem->start);
424 end = end64 = le64_to_cpu(mem->end);
425 size = end64 - start64 + 1;
426
427 /* Catch any overflow, including the unlikely end64 - start64 + 1 = 0 */
428 if (start != start64 || end != end64 || size < end64 - start64)
429 return -EOVERFLOW;
430
431 if (len < sizeof(*mem))
432 return -EINVAL;
433
434 switch (mem->subtype) {
435 default:
436 dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
437 mem->subtype);
438 /* Fall-through */
439 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
440 region = iommu_alloc_resv_region(start, size, 0,
441 IOMMU_RESV_RESERVED);
442 break;
443 case VIRTIO_IOMMU_RESV_MEM_T_MSI:
444 region = iommu_alloc_resv_region(start, size, prot,
445 IOMMU_RESV_MSI);
446 break;
447 }
448 if (!region)
449 return -ENOMEM;
450
451 list_add(&vdev->resv_regions, &region->list);
452 return 0;
453}
454
455static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
456{
457 int ret;
458 u16 type, len;
459 size_t cur = 0;
460 size_t probe_len;
461 struct virtio_iommu_req_probe *probe;
462 struct virtio_iommu_probe_property *prop;
463 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
464 struct viommu_endpoint *vdev = fwspec->iommu_priv;
465
466 if (!fwspec->num_ids)
467 return -EINVAL;
468
469 probe_len = sizeof(*probe) + viommu->probe_size +
470 sizeof(struct virtio_iommu_req_tail);
471 probe = kzalloc(probe_len, GFP_KERNEL);
472 if (!probe)
473 return -ENOMEM;
474
475 probe->head.type = VIRTIO_IOMMU_T_PROBE;
476 /*
477 * For now, assume that properties of an endpoint that outputs multiple
478 * IDs are consistent. Only probe the first one.
479 */
480 probe->endpoint = cpu_to_le32(fwspec->ids[0]);
481
482 ret = viommu_send_req_sync(viommu, probe, probe_len);
483 if (ret)
484 goto out_free;
485
486 prop = (void *)probe->properties;
487 type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
488
489 while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
490 cur < viommu->probe_size) {
491 len = le16_to_cpu(prop->length) + sizeof(*prop);
492
493 switch (type) {
494 case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
495 ret = viommu_add_resv_mem(vdev, (void *)prop, len);
496 break;
497 default:
498 dev_err(dev, "unknown viommu prop 0x%x\n", type);
499 }
500
501 if (ret)
502 dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
503
504 cur += len;
505 if (cur >= viommu->probe_size)
506 break;
507
508 prop = (void *)probe->properties + cur;
509 type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
510 }
511
512out_free:
513 kfree(probe);
514 return ret;
515}
516
517static int viommu_fault_handler(struct viommu_dev *viommu,
518 struct virtio_iommu_fault *fault)
519{
520 char *reason_str;
521
522 u8 reason = fault->reason;
523 u32 flags = le32_to_cpu(fault->flags);
524 u32 endpoint = le32_to_cpu(fault->endpoint);
525 u64 address = le64_to_cpu(fault->address);
526
527 switch (reason) {
528 case VIRTIO_IOMMU_FAULT_R_DOMAIN:
529 reason_str = "domain";
530 break;
531 case VIRTIO_IOMMU_FAULT_R_MAPPING:
532 reason_str = "page";
533 break;
534 case VIRTIO_IOMMU_FAULT_R_UNKNOWN:
535 default:
536 reason_str = "unknown";
537 break;
538 }
539
540 /* TODO: find EP by ID and report_iommu_fault */
541 if (flags & VIRTIO_IOMMU_FAULT_F_ADDRESS)
542 dev_err_ratelimited(viommu->dev, "%s fault from EP %u at %#llx [%s%s%s]\n",
543 reason_str, endpoint, address,
544 flags & VIRTIO_IOMMU_FAULT_F_READ ? "R" : "",
545 flags & VIRTIO_IOMMU_FAULT_F_WRITE ? "W" : "",
546 flags & VIRTIO_IOMMU_FAULT_F_EXEC ? "X" : "");
547 else
548 dev_err_ratelimited(viommu->dev, "%s fault from EP %u\n",
549 reason_str, endpoint);
550 return 0;
551}
552
553static void viommu_event_handler(struct virtqueue *vq)
554{
555 int ret;
556 unsigned int len;
557 struct scatterlist sg[1];
558 struct viommu_event *evt;
559 struct viommu_dev *viommu = vq->vdev->priv;
560
561 while ((evt = virtqueue_get_buf(vq, &len)) != NULL) {
562 if (len > sizeof(*evt)) {
563 dev_err(viommu->dev,
564 "invalid event buffer (len %u != %zu)\n",
565 len, sizeof(*evt));
566 } else if (!(evt->head & VIOMMU_FAULT_RESV_MASK)) {
567 viommu_fault_handler(viommu, &evt->fault);
568 }
569
570 sg_init_one(sg, evt, sizeof(*evt));
571 ret = virtqueue_add_inbuf(vq, sg, 1, evt, GFP_ATOMIC);
572 if (ret)
573 dev_err(viommu->dev, "could not add event buffer\n");
574 }
575
576 virtqueue_kick(vq);
577}
578
579/* IOMMU API */
580
581static struct iommu_domain *viommu_domain_alloc(unsigned type)
582{
583 struct viommu_domain *vdomain;
584
585 if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
586 return NULL;
587
588 vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL);
589 if (!vdomain)
590 return NULL;
591
592 mutex_init(&vdomain->mutex);
593 spin_lock_init(&vdomain->mappings_lock);
594 vdomain->mappings = RB_ROOT_CACHED;
595
596 if (type == IOMMU_DOMAIN_DMA &&
597 iommu_get_dma_cookie(&vdomain->domain)) {
598 kfree(vdomain);
599 return NULL;
600 }
601
602 return &vdomain->domain;
603}
604
605static int viommu_domain_finalise(struct viommu_dev *viommu,
606 struct iommu_domain *domain)
607{
608 int ret;
609 struct viommu_domain *vdomain = to_viommu_domain(domain);
610 unsigned int max_domain = viommu->domain_bits > 31 ? ~0 :
611 (1U << viommu->domain_bits) - 1;
612
613 vdomain->viommu = viommu;
614
615 domain->pgsize_bitmap = viommu->pgsize_bitmap;
616 domain->geometry = viommu->geometry;
617
618 ret = ida_alloc_max(&viommu->domain_ids, max_domain, GFP_KERNEL);
619 if (ret >= 0)
620 vdomain->id = (unsigned int)ret;
621
622 return ret > 0 ? 0 : ret;
623}
624
625static void viommu_domain_free(struct iommu_domain *domain)
626{
627 struct viommu_domain *vdomain = to_viommu_domain(domain);
628
629 iommu_put_dma_cookie(domain);
630
631 /* Free all remaining mappings (size 2^64) */
632 viommu_del_mappings(vdomain, 0, 0);
633
634 if (vdomain->viommu)
635 ida_free(&vdomain->viommu->domain_ids, vdomain->id);
636
637 kfree(vdomain);
638}
639
640static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
641{
642 int i;
643 int ret = 0;
644 struct virtio_iommu_req_attach req;
645 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
646 struct viommu_endpoint *vdev = fwspec->iommu_priv;
647 struct viommu_domain *vdomain = to_viommu_domain(domain);
648
649 mutex_lock(&vdomain->mutex);
650 if (!vdomain->viommu) {
651 /*
652 * Properly initialize the domain now that we know which viommu
653 * owns it.
654 */
655 ret = viommu_domain_finalise(vdev->viommu, domain);
656 } else if (vdomain->viommu != vdev->viommu) {
657 dev_err(dev, "cannot attach to foreign vIOMMU\n");
658 ret = -EXDEV;
659 }
660 mutex_unlock(&vdomain->mutex);
661
662 if (ret)
663 return ret;
664
665 /*
666 * In the virtio-iommu device, when attaching the endpoint to a new
667 * domain, it is detached from the old one and, if as as a result the
668 * old domain isn't attached to any endpoint, all mappings are removed
669 * from the old domain and it is freed.
670 *
671 * In the driver the old domain still exists, and its mappings will be
672 * recreated if it gets reattached to an endpoint. Otherwise it will be
673 * freed explicitly.
674 *
675 * vdev->vdomain is protected by group->mutex
676 */
677 if (vdev->vdomain)
678 vdev->vdomain->nr_endpoints--;
679
680 req = (struct virtio_iommu_req_attach) {
681 .head.type = VIRTIO_IOMMU_T_ATTACH,
682 .domain = cpu_to_le32(vdomain->id),
683 };
684
685 for (i = 0; i < fwspec->num_ids; i++) {
686 req.endpoint = cpu_to_le32(fwspec->ids[i]);
687
688 ret = viommu_send_req_sync(vdomain->viommu, &req, sizeof(req));
689 if (ret)
690 return ret;
691 }
692
693 if (!vdomain->nr_endpoints) {
694 /*
695 * This endpoint is the first to be attached to the domain.
696 * Replay existing mappings (e.g. SW MSI).
697 */
698 ret = viommu_replay_mappings(vdomain);
699 if (ret)
700 return ret;
701 }
702
703 vdomain->nr_endpoints++;
704 vdev->vdomain = vdomain;
705
706 return 0;
707}
708
709static int viommu_map(struct iommu_domain *domain, unsigned long iova,
710 phys_addr_t paddr, size_t size, int prot)
711{
712 int ret;
713 int flags;
714 struct virtio_iommu_req_map map;
715 struct viommu_domain *vdomain = to_viommu_domain(domain);
716
717 flags = (prot & IOMMU_READ ? VIRTIO_IOMMU_MAP_F_READ : 0) |
718 (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0) |
719 (prot & IOMMU_MMIO ? VIRTIO_IOMMU_MAP_F_MMIO : 0);
720
721 ret = viommu_add_mapping(vdomain, iova, paddr, size, flags);
722 if (ret)
723 return ret;
724
725 map = (struct virtio_iommu_req_map) {
726 .head.type = VIRTIO_IOMMU_T_MAP,
727 .domain = cpu_to_le32(vdomain->id),
728 .virt_start = cpu_to_le64(iova),
729 .phys_start = cpu_to_le64(paddr),
730 .virt_end = cpu_to_le64(iova + size - 1),
731 .flags = cpu_to_le32(flags),
732 };
733
734 if (!vdomain->nr_endpoints)
735 return 0;
736
737 ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
738 if (ret)
739 viommu_del_mappings(vdomain, iova, size);
740
741 return ret;
742}
743
744static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
745 size_t size)
746{
747 int ret = 0;
748 size_t unmapped;
749 struct virtio_iommu_req_unmap unmap;
750 struct viommu_domain *vdomain = to_viommu_domain(domain);
751
752 unmapped = viommu_del_mappings(vdomain, iova, size);
753 if (unmapped < size)
754 return 0;
755
756 /* Device already removed all mappings after detach. */
757 if (!vdomain->nr_endpoints)
758 return unmapped;
759
760 unmap = (struct virtio_iommu_req_unmap) {
761 .head.type = VIRTIO_IOMMU_T_UNMAP,
762 .domain = cpu_to_le32(vdomain->id),
763 .virt_start = cpu_to_le64(iova),
764 .virt_end = cpu_to_le64(iova + unmapped - 1),
765 };
766
767 ret = viommu_add_req(vdomain->viommu, &unmap, sizeof(unmap));
768 return ret ? 0 : unmapped;
769}
770
771static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
772 dma_addr_t iova)
773{
774 u64 paddr = 0;
775 unsigned long flags;
776 struct viommu_mapping *mapping;
777 struct interval_tree_node *node;
778 struct viommu_domain *vdomain = to_viommu_domain(domain);
779
780 spin_lock_irqsave(&vdomain->mappings_lock, flags);
781 node = interval_tree_iter_first(&vdomain->mappings, iova, iova);
782 if (node) {
783 mapping = container_of(node, struct viommu_mapping, iova);
784 paddr = mapping->paddr + (iova - mapping->iova.start);
785 }
786 spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
787
788 return paddr;
789}
790
791static void viommu_iotlb_sync(struct iommu_domain *domain)
792{
793 struct viommu_domain *vdomain = to_viommu_domain(domain);
794
795 viommu_sync_req(vdomain->viommu);
796}
797
798static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
799{
800 struct iommu_resv_region *entry, *new_entry, *msi = NULL;
801 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
802 struct viommu_endpoint *vdev = fwspec->iommu_priv;
803 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
804
805 list_for_each_entry(entry, &vdev->resv_regions, list) {
806 if (entry->type == IOMMU_RESV_MSI)
807 msi = entry;
808
809 new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
810 if (!new_entry)
811 return;
812 list_add_tail(&new_entry->list, head);
813 }
814
815 /*
816 * If the device didn't register any bypass MSI window, add a
817 * software-mapped region.
818 */
819 if (!msi) {
820 msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
821 prot, IOMMU_RESV_SW_MSI);
822 if (!msi)
823 return;
824
825 list_add_tail(&msi->list, head);
826 }
827
828 iommu_dma_get_resv_regions(dev, head);
829}
830
831static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
832{
833 struct iommu_resv_region *entry, *next;
834
835 list_for_each_entry_safe(entry, next, head, list)
836 kfree(entry);
837}
838
839static struct iommu_ops viommu_ops;
840static struct virtio_driver virtio_iommu_drv;
841
842static int viommu_match_node(struct device *dev, const void *data)
843{
844 return dev->parent->fwnode == data;
845}
846
847static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode)
848{
849 struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL,
850 fwnode, viommu_match_node);
851 put_device(dev);
852
853 return dev ? dev_to_virtio(dev)->priv : NULL;
854}
855
856static int viommu_add_device(struct device *dev)
857{
858 int ret;
859 struct iommu_group *group;
860 struct viommu_endpoint *vdev;
861 struct viommu_dev *viommu = NULL;
862 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
863
864 if (!fwspec || fwspec->ops != &viommu_ops)
865 return -ENODEV;
866
867 viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode);
868 if (!viommu)
869 return -ENODEV;
870
871 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
872 if (!vdev)
873 return -ENOMEM;
874
875 vdev->dev = dev;
876 vdev->viommu = viommu;
877 INIT_LIST_HEAD(&vdev->resv_regions);
878 fwspec->iommu_priv = vdev;
879
880 if (viommu->probe_size) {
881 /* Get additional information for this endpoint */
882 ret = viommu_probe_endpoint(viommu, dev);
883 if (ret)
884 goto err_free_dev;
885 }
886
887 ret = iommu_device_link(&viommu->iommu, dev);
888 if (ret)
889 goto err_free_dev;
890
891 /*
892 * Last step creates a default domain and attaches to it. Everything
893 * must be ready.
894 */
895 group = iommu_group_get_for_dev(dev);
896 if (IS_ERR(group)) {
897 ret = PTR_ERR(group);
898 goto err_unlink_dev;
899 }
900
901 iommu_group_put(group);
902
903 return PTR_ERR_OR_ZERO(group);
904
905err_unlink_dev:
906 iommu_device_unlink(&viommu->iommu, dev);
907err_free_dev:
908 viommu_put_resv_regions(dev, &vdev->resv_regions);
909 kfree(vdev);
910
911 return ret;
912}
913
914static void viommu_remove_device(struct device *dev)
915{
916 struct viommu_endpoint *vdev;
917 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
918
919 if (!fwspec || fwspec->ops != &viommu_ops)
920 return;
921
922 vdev = fwspec->iommu_priv;
923
924 iommu_group_remove_device(dev);
925 iommu_device_unlink(&vdev->viommu->iommu, dev);
926 viommu_put_resv_regions(dev, &vdev->resv_regions);
927 kfree(vdev);
928}
929
930static struct iommu_group *viommu_device_group(struct device *dev)
931{
932 if (dev_is_pci(dev))
933 return pci_device_group(dev);
934 else
935 return generic_device_group(dev);
936}
937
938static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
939{
940 return iommu_fwspec_add_ids(dev, args->args, 1);
941}
942
943static struct iommu_ops viommu_ops = {
944 .domain_alloc = viommu_domain_alloc,
945 .domain_free = viommu_domain_free,
946 .attach_dev = viommu_attach_dev,
947 .map = viommu_map,
948 .unmap = viommu_unmap,
949 .iova_to_phys = viommu_iova_to_phys,
950 .iotlb_sync = viommu_iotlb_sync,
951 .add_device = viommu_add_device,
952 .remove_device = viommu_remove_device,
953 .device_group = viommu_device_group,
954 .get_resv_regions = viommu_get_resv_regions,
955 .put_resv_regions = viommu_put_resv_regions,
956 .of_xlate = viommu_of_xlate,
957};
958
959static int viommu_init_vqs(struct viommu_dev *viommu)
960{
961 struct virtio_device *vdev = dev_to_virtio(viommu->dev);
962 const char *names[] = { "request", "event" };
963 vq_callback_t *callbacks[] = {
964 NULL, /* No async requests */
965 viommu_event_handler,
966 };
967
968 return virtio_find_vqs(vdev, VIOMMU_NR_VQS, viommu->vqs, callbacks,
969 names, NULL);
970}
971
972static int viommu_fill_evtq(struct viommu_dev *viommu)
973{
974 int i, ret;
975 struct scatterlist sg[1];
976 struct viommu_event *evts;
977 struct virtqueue *vq = viommu->vqs[VIOMMU_EVENT_VQ];
978 size_t nr_evts = vq->num_free;
979
980 viommu->evts = evts = devm_kmalloc_array(viommu->dev, nr_evts,
981 sizeof(*evts), GFP_KERNEL);
982 if (!evts)
983 return -ENOMEM;
984
985 for (i = 0; i < nr_evts; i++) {
986 sg_init_one(sg, &evts[i], sizeof(*evts));
987 ret = virtqueue_add_inbuf(vq, sg, 1, &evts[i], GFP_KERNEL);
988 if (ret)
989 return ret;
990 }
991
992 return 0;
993}
994
995static int viommu_probe(struct virtio_device *vdev)
996{
997 struct device *parent_dev = vdev->dev.parent;
998 struct viommu_dev *viommu = NULL;
999 struct device *dev = &vdev->dev;
1000 u64 input_start = 0;
1001 u64 input_end = -1UL;
1002 int ret;
1003
1004 if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
1005 !virtio_has_feature(vdev, VIRTIO_IOMMU_F_MAP_UNMAP))
1006 return -ENODEV;
1007
1008 viommu = devm_kzalloc(dev, sizeof(*viommu), GFP_KERNEL);
1009 if (!viommu)
1010 return -ENOMEM;
1011
1012 spin_lock_init(&viommu->request_lock);
1013 ida_init(&viommu->domain_ids);
1014 viommu->dev = dev;
1015 viommu->vdev = vdev;
1016 INIT_LIST_HEAD(&viommu->requests);
1017
1018 ret = viommu_init_vqs(viommu);
1019 if (ret)
1020 return ret;
1021
1022 virtio_cread(vdev, struct virtio_iommu_config, page_size_mask,
1023 &viommu->pgsize_bitmap);
1024
1025 if (!viommu->pgsize_bitmap) {
1026 ret = -EINVAL;
1027 goto err_free_vqs;
1028 }
1029
1030 viommu->domain_bits = 32;
1031
1032 /* Optional features */
1033 virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
1034 struct virtio_iommu_config, input_range.start,
1035 &input_start);
1036
1037 virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
1038 struct virtio_iommu_config, input_range.end,
1039 &input_end);
1040
1041 virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_BITS,
1042 struct virtio_iommu_config, domain_bits,
1043 &viommu->domain_bits);
1044
1045 virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
1046 struct virtio_iommu_config, probe_size,
1047 &viommu->probe_size);
1048
1049 viommu->geometry = (struct iommu_domain_geometry) {
1050 .aperture_start = input_start,
1051 .aperture_end = input_end,
1052 .force_aperture = true,
1053 };
1054
1055 viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
1056
1057 virtio_device_ready(vdev);
1058
1059 /* Populate the event queue with buffers */
1060 ret = viommu_fill_evtq(viommu);
1061 if (ret)
1062 goto err_free_vqs;
1063
1064 ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s",
1065 virtio_bus_name(vdev));
1066 if (ret)
1067 goto err_free_vqs;
1068
1069 iommu_device_set_ops(&viommu->iommu, &viommu_ops);
1070 iommu_device_set_fwnode(&viommu->iommu, parent_dev->fwnode);
1071
1072 iommu_device_register(&viommu->iommu);
1073
1074#ifdef CONFIG_PCI
1075 if (pci_bus_type.iommu_ops != &viommu_ops) {
1076 pci_request_acs();
1077 ret = bus_set_iommu(&pci_bus_type, &viommu_ops);
1078 if (ret)
1079 goto err_unregister;
1080 }
1081#endif
1082#ifdef CONFIG_ARM_AMBA
1083 if (amba_bustype.iommu_ops != &viommu_ops) {
1084 ret = bus_set_iommu(&amba_bustype, &viommu_ops);
1085 if (ret)
1086 goto err_unregister;
1087 }
1088#endif
1089 if (platform_bus_type.iommu_ops != &viommu_ops) {
1090 ret = bus_set_iommu(&platform_bus_type, &viommu_ops);
1091 if (ret)
1092 goto err_unregister;
1093 }
1094
1095 vdev->priv = viommu;
1096
1097 dev_info(dev, "input address: %u bits\n",
1098 order_base_2(viommu->geometry.aperture_end));
1099 dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap);
1100
1101 return 0;
1102
1103err_unregister:
1104 iommu_device_sysfs_remove(&viommu->iommu);
1105 iommu_device_unregister(&viommu->iommu);
1106err_free_vqs:
1107 vdev->config->del_vqs(vdev);
1108
1109 return ret;
1110}
1111
1112static void viommu_remove(struct virtio_device *vdev)
1113{
1114 struct viommu_dev *viommu = vdev->priv;
1115
1116 iommu_device_sysfs_remove(&viommu->iommu);
1117 iommu_device_unregister(&viommu->iommu);
1118
1119 /* Stop all virtqueues */
1120 vdev->config->reset(vdev);
1121 vdev->config->del_vqs(vdev);
1122
1123 dev_info(&vdev->dev, "device removed\n");
1124}
1125
1126static void viommu_config_changed(struct virtio_device *vdev)
1127{
1128 dev_warn(&vdev->dev, "config changed\n");
1129}
1130
1131static unsigned int features[] = {
1132 VIRTIO_IOMMU_F_MAP_UNMAP,
1133 VIRTIO_IOMMU_F_DOMAIN_BITS,
1134 VIRTIO_IOMMU_F_INPUT_RANGE,
1135 VIRTIO_IOMMU_F_PROBE,
1136};
1137
1138static struct virtio_device_id id_table[] = {
1139 { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID },
1140 { 0 },
1141};
1142
1143static struct virtio_driver virtio_iommu_drv = {
1144 .driver.name = KBUILD_MODNAME,
1145 .driver.owner = THIS_MODULE,
1146 .id_table = id_table,
1147 .feature_table = features,
1148 .feature_table_size = ARRAY_SIZE(features),
1149 .probe = viommu_probe,
1150 .remove = viommu_remove,
1151 .config_changed = viommu_config_changed,
1152};
1153
1154module_virtio_driver(virtio_iommu_drv);
1155
1156MODULE_DESCRIPTION("Virtio IOMMU driver");
1157MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>");
1158MODULE_LICENSE("GPL v2");
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 20e0e7ee4edf..55e7f5bb0549 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -2294,8 +2294,12 @@ int of_map_rid(struct device_node *np, u32 rid,
2294 return 0; 2294 return 0;
2295 } 2295 }
2296 2296
2297 pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n", 2297 pr_info("%pOF: no %s translation for rid 0x%x on %pOF\n", np, map_name,
2298 np, map_name, rid, target && *target ? *target : NULL); 2298 rid, target && *target ? *target : NULL);
2299 return -EFAULT; 2299
2300 /* Bypasses translation */
2301 if (id_out)
2302 *id_out = rid;
2303 return 0;
2300} 2304}
2301EXPORT_SYMBOL_GPL(of_map_rid); 2305EXPORT_SYMBOL_GPL(of_map_rid);
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index 73d5adec0a28..bc7b27a28795 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -22,12 +22,15 @@ void pci_set_of_node(struct pci_dev *dev)
22 return; 22 return;
23 dev->dev.of_node = of_pci_find_child_device(dev->bus->dev.of_node, 23 dev->dev.of_node = of_pci_find_child_device(dev->bus->dev.of_node,
24 dev->devfn); 24 dev->devfn);
25 if (dev->dev.of_node)
26 dev->dev.fwnode = &dev->dev.of_node->fwnode;
25} 27}
26 28
27void pci_release_of_node(struct pci_dev *dev) 29void pci_release_of_node(struct pci_dev *dev)
28{ 30{
29 of_node_put(dev->dev.of_node); 31 of_node_put(dev->dev.of_node);
30 dev->dev.of_node = NULL; 32 dev->dev.of_node = NULL;
33 dev->dev.fwnode = NULL;
31} 34}
32 35
33void pci_set_bus_of_node(struct pci_bus *bus) 36void pci_set_bus_of_node(struct pci_bus *bus)
@@ -41,13 +44,18 @@ void pci_set_bus_of_node(struct pci_bus *bus)
41 if (node && of_property_read_bool(node, "external-facing")) 44 if (node && of_property_read_bool(node, "external-facing"))
42 bus->self->untrusted = true; 45 bus->self->untrusted = true;
43 } 46 }
47
44 bus->dev.of_node = node; 48 bus->dev.of_node = node;
49
50 if (bus->dev.of_node)
51 bus->dev.fwnode = &bus->dev.of_node->fwnode;
45} 52}
46 53
47void pci_release_bus_of_node(struct pci_bus *bus) 54void pci_release_bus_of_node(struct pci_bus *bus)
48{ 55{
49 of_node_put(bus->dev.of_node); 56 of_node_put(bus->dev.of_node);
50 bus->dev.of_node = NULL; 57 bus->dev.of_node = NULL;
58 bus->dev.fwnode = NULL;
51} 59}
52 60
53struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus) 61struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 1705398b026a..297e1076e571 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -792,7 +792,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
792 num_targets = virtscsi_config_get(vdev, max_target) + 1; 792 num_targets = virtscsi_config_get(vdev, max_target) + 1;
793 793
794 shost = scsi_host_alloc(&virtscsi_host_template, 794 shost = scsi_host_alloc(&virtscsi_host_template,
795 sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues); 795 struct_size(vscsi, req_vqs, num_queues));
796 if (!shost) 796 if (!shost)
797 return -ENOMEM; 797 return -ENOMEM;
798 798
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 247e5585af5d..1a2dd53caade 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -956,7 +956,7 @@ static void handle_tx(struct vhost_net *net)
956 if (!sock) 956 if (!sock)
957 goto out; 957 goto out;
958 958
959 if (!vq_iotlb_prefetch(vq)) 959 if (!vq_meta_prefetch(vq))
960 goto out; 960 goto out;
961 961
962 vhost_disable_notify(&net->dev, vq); 962 vhost_disable_notify(&net->dev, vq);
@@ -1125,7 +1125,7 @@ static void handle_rx(struct vhost_net *net)
1125 if (!sock) 1125 if (!sock)
1126 goto out; 1126 goto out;
1127 1127
1128 if (!vq_iotlb_prefetch(vq)) 1128 if (!vq_meta_prefetch(vq))
1129 goto out; 1129 goto out;
1130 1130
1131 vhost_disable_notify(&net->dev, vq); 1131 vhost_disable_notify(&net->dev, vq);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ff8892c38666..0536f8526359 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -298,6 +298,160 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
298 __vhost_vq_meta_reset(d->vqs[i]); 298 __vhost_vq_meta_reset(d->vqs[i]);
299} 299}
300 300
301#if VHOST_ARCH_CAN_ACCEL_UACCESS
302static void vhost_map_unprefetch(struct vhost_map *map)
303{
304 kfree(map->pages);
305 map->pages = NULL;
306 map->npages = 0;
307 map->addr = NULL;
308}
309
310static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
311{
312 struct vhost_map *map[VHOST_NUM_ADDRS];
313 int i;
314
315 spin_lock(&vq->mmu_lock);
316 for (i = 0; i < VHOST_NUM_ADDRS; i++) {
317 map[i] = rcu_dereference_protected(vq->maps[i],
318 lockdep_is_held(&vq->mmu_lock));
319 if (map[i])
320 rcu_assign_pointer(vq->maps[i], NULL);
321 }
322 spin_unlock(&vq->mmu_lock);
323
324 synchronize_rcu();
325
326 for (i = 0; i < VHOST_NUM_ADDRS; i++)
327 if (map[i])
328 vhost_map_unprefetch(map[i]);
329
330}
331
332static void vhost_reset_vq_maps(struct vhost_virtqueue *vq)
333{
334 int i;
335
336 vhost_uninit_vq_maps(vq);
337 for (i = 0; i < VHOST_NUM_ADDRS; i++)
338 vq->uaddrs[i].size = 0;
339}
340
341static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
342 unsigned long start,
343 unsigned long end)
344{
345 if (unlikely(!uaddr->size))
346 return false;
347
348 return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
349}
350
351static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
352 int index,
353 unsigned long start,
354 unsigned long end)
355{
356 struct vhost_uaddr *uaddr = &vq->uaddrs[index];
357 struct vhost_map *map;
358 int i;
359
360 if (!vhost_map_range_overlap(uaddr, start, end))
361 return;
362
363 spin_lock(&vq->mmu_lock);
364 ++vq->invalidate_count;
365
366 map = rcu_dereference_protected(vq->maps[index],
367 lockdep_is_held(&vq->mmu_lock));
368 if (map) {
369 if (uaddr->write) {
370 for (i = 0; i < map->npages; i++)
371 set_page_dirty(map->pages[i]);
372 }
373 rcu_assign_pointer(vq->maps[index], NULL);
374 }
375 spin_unlock(&vq->mmu_lock);
376
377 if (map) {
378 synchronize_rcu();
379 vhost_map_unprefetch(map);
380 }
381}
382
383static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
384 int index,
385 unsigned long start,
386 unsigned long end)
387{
388 if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end))
389 return;
390
391 spin_lock(&vq->mmu_lock);
392 --vq->invalidate_count;
393 spin_unlock(&vq->mmu_lock);
394}
395
396static int vhost_invalidate_range_start(struct mmu_notifier *mn,
397 const struct mmu_notifier_range *range)
398{
399 struct vhost_dev *dev = container_of(mn, struct vhost_dev,
400 mmu_notifier);
401 int i, j;
402
403 if (!mmu_notifier_range_blockable(range))
404 return -EAGAIN;
405
406 for (i = 0; i < dev->nvqs; i++) {
407 struct vhost_virtqueue *vq = dev->vqs[i];
408
409 for (j = 0; j < VHOST_NUM_ADDRS; j++)
410 vhost_invalidate_vq_start(vq, j,
411 range->start,
412 range->end);
413 }
414
415 return 0;
416}
417
418static void vhost_invalidate_range_end(struct mmu_notifier *mn,
419 const struct mmu_notifier_range *range)
420{
421 struct vhost_dev *dev = container_of(mn, struct vhost_dev,
422 mmu_notifier);
423 int i, j;
424
425 for (i = 0; i < dev->nvqs; i++) {
426 struct vhost_virtqueue *vq = dev->vqs[i];
427
428 for (j = 0; j < VHOST_NUM_ADDRS; j++)
429 vhost_invalidate_vq_end(vq, j,
430 range->start,
431 range->end);
432 }
433}
434
435static const struct mmu_notifier_ops vhost_mmu_notifier_ops = {
436 .invalidate_range_start = vhost_invalidate_range_start,
437 .invalidate_range_end = vhost_invalidate_range_end,
438};
439
440static void vhost_init_maps(struct vhost_dev *dev)
441{
442 struct vhost_virtqueue *vq;
443 int i, j;
444
445 dev->mmu_notifier.ops = &vhost_mmu_notifier_ops;
446
447 for (i = 0; i < dev->nvqs; ++i) {
448 vq = dev->vqs[i];
449 for (j = 0; j < VHOST_NUM_ADDRS; j++)
450 RCU_INIT_POINTER(vq->maps[j], NULL);
451 }
452}
453#endif
454
301static void vhost_vq_reset(struct vhost_dev *dev, 455static void vhost_vq_reset(struct vhost_dev *dev,
302 struct vhost_virtqueue *vq) 456 struct vhost_virtqueue *vq)
303{ 457{
@@ -326,7 +480,11 @@ static void vhost_vq_reset(struct vhost_dev *dev,
326 vq->busyloop_timeout = 0; 480 vq->busyloop_timeout = 0;
327 vq->umem = NULL; 481 vq->umem = NULL;
328 vq->iotlb = NULL; 482 vq->iotlb = NULL;
483 vq->invalidate_count = 0;
329 __vhost_vq_meta_reset(vq); 484 __vhost_vq_meta_reset(vq);
485#if VHOST_ARCH_CAN_ACCEL_UACCESS
486 vhost_reset_vq_maps(vq);
487#endif
330} 488}
331 489
332static int vhost_worker(void *data) 490static int vhost_worker(void *data)
@@ -427,6 +585,32 @@ bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
427} 585}
428EXPORT_SYMBOL_GPL(vhost_exceeds_weight); 586EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
429 587
588static size_t vhost_get_avail_size(struct vhost_virtqueue *vq,
589 unsigned int num)
590{
591 size_t event __maybe_unused =
592 vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
593
594 return sizeof(*vq->avail) +
595 sizeof(*vq->avail->ring) * num + event;
596}
597
598static size_t vhost_get_used_size(struct vhost_virtqueue *vq,
599 unsigned int num)
600{
601 size_t event __maybe_unused =
602 vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
603
604 return sizeof(*vq->used) +
605 sizeof(*vq->used->ring) * num + event;
606}
607
608static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,
609 unsigned int num)
610{
611 return sizeof(*vq->desc) * num;
612}
613
430void vhost_dev_init(struct vhost_dev *dev, 614void vhost_dev_init(struct vhost_dev *dev,
431 struct vhost_virtqueue **vqs, int nvqs, 615 struct vhost_virtqueue **vqs, int nvqs,
432 int iov_limit, int weight, int byte_weight) 616 int iov_limit, int weight, int byte_weight)
@@ -450,7 +634,9 @@ void vhost_dev_init(struct vhost_dev *dev,
450 INIT_LIST_HEAD(&dev->read_list); 634 INIT_LIST_HEAD(&dev->read_list);
451 INIT_LIST_HEAD(&dev->pending_list); 635 INIT_LIST_HEAD(&dev->pending_list);
452 spin_lock_init(&dev->iotlb_lock); 636 spin_lock_init(&dev->iotlb_lock);
453 637#if VHOST_ARCH_CAN_ACCEL_UACCESS
638 vhost_init_maps(dev);
639#endif
454 640
455 for (i = 0; i < dev->nvqs; ++i) { 641 for (i = 0; i < dev->nvqs; ++i) {
456 vq = dev->vqs[i]; 642 vq = dev->vqs[i];
@@ -459,6 +645,7 @@ void vhost_dev_init(struct vhost_dev *dev,
459 vq->heads = NULL; 645 vq->heads = NULL;
460 vq->dev = dev; 646 vq->dev = dev;
461 mutex_init(&vq->mutex); 647 mutex_init(&vq->mutex);
648 spin_lock_init(&vq->mmu_lock);
462 vhost_vq_reset(dev, vq); 649 vhost_vq_reset(dev, vq);
463 if (vq->handle_kick) 650 if (vq->handle_kick)
464 vhost_poll_init(&vq->poll, vq->handle_kick, 651 vhost_poll_init(&vq->poll, vq->handle_kick,
@@ -538,7 +725,18 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
538 if (err) 725 if (err)
539 goto err_cgroup; 726 goto err_cgroup;
540 727
728#if VHOST_ARCH_CAN_ACCEL_UACCESS
729 err = mmu_notifier_register(&dev->mmu_notifier, dev->mm);
730 if (err)
731 goto err_mmu_notifier;
732#endif
733
541 return 0; 734 return 0;
735
736#if VHOST_ARCH_CAN_ACCEL_UACCESS
737err_mmu_notifier:
738 vhost_dev_free_iovecs(dev);
739#endif
542err_cgroup: 740err_cgroup:
543 kthread_stop(worker); 741 kthread_stop(worker);
544 dev->worker = NULL; 742 dev->worker = NULL;
@@ -629,6 +827,107 @@ static void vhost_clear_msg(struct vhost_dev *dev)
629 spin_unlock(&dev->iotlb_lock); 827 spin_unlock(&dev->iotlb_lock);
630} 828}
631 829
830#if VHOST_ARCH_CAN_ACCEL_UACCESS
831static void vhost_setup_uaddr(struct vhost_virtqueue *vq,
832 int index, unsigned long uaddr,
833 size_t size, bool write)
834{
835 struct vhost_uaddr *addr = &vq->uaddrs[index];
836
837 addr->uaddr = uaddr;
838 addr->size = size;
839 addr->write = write;
840}
841
842static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq)
843{
844 vhost_setup_uaddr(vq, VHOST_ADDR_DESC,
845 (unsigned long)vq->desc,
846 vhost_get_desc_size(vq, vq->num),
847 false);
848 vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL,
849 (unsigned long)vq->avail,
850 vhost_get_avail_size(vq, vq->num),
851 false);
852 vhost_setup_uaddr(vq, VHOST_ADDR_USED,
853 (unsigned long)vq->used,
854 vhost_get_used_size(vq, vq->num),
855 true);
856}
857
858static int vhost_map_prefetch(struct vhost_virtqueue *vq,
859 int index)
860{
861 struct vhost_map *map;
862 struct vhost_uaddr *uaddr = &vq->uaddrs[index];
863 struct page **pages;
864 int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE);
865 int npinned;
866 void *vaddr, *v;
867 int err;
868 int i;
869
870 spin_lock(&vq->mmu_lock);
871
872 err = -EFAULT;
873 if (vq->invalidate_count)
874 goto err;
875
876 err = -ENOMEM;
877 map = kmalloc(sizeof(*map), GFP_ATOMIC);
878 if (!map)
879 goto err;
880
881 pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC);
882 if (!pages)
883 goto err_pages;
884
885 err = EFAULT;
886 npinned = __get_user_pages_fast(uaddr->uaddr, npages,
887 uaddr->write, pages);
888 if (npinned > 0)
889 release_pages(pages, npinned);
890 if (npinned != npages)
891 goto err_gup;
892
893 for (i = 0; i < npinned; i++)
894 if (PageHighMem(pages[i]))
895 goto err_gup;
896
897 vaddr = v = page_address(pages[0]);
898
899 /* For simplicity, fallback to userspace address if VA is not
900 * contigious.
901 */
902 for (i = 1; i < npinned; i++) {
903 v += PAGE_SIZE;
904 if (v != page_address(pages[i]))
905 goto err_gup;
906 }
907
908 map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1));
909 map->npages = npages;
910 map->pages = pages;
911
912 rcu_assign_pointer(vq->maps[index], map);
913 /* No need for a synchronize_rcu(). This function should be
914 * called by dev->worker so we are serialized with all
915 * readers.
916 */
917 spin_unlock(&vq->mmu_lock);
918
919 return 0;
920
921err_gup:
922 kfree(pages);
923err_pages:
924 kfree(map);
925err:
926 spin_unlock(&vq->mmu_lock);
927 return err;
928}
929#endif
930
632void vhost_dev_cleanup(struct vhost_dev *dev) 931void vhost_dev_cleanup(struct vhost_dev *dev)
633{ 932{
634 int i; 933 int i;
@@ -658,8 +957,16 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
658 kthread_stop(dev->worker); 957 kthread_stop(dev->worker);
659 dev->worker = NULL; 958 dev->worker = NULL;
660 } 959 }
661 if (dev->mm) 960 if (dev->mm) {
961#if VHOST_ARCH_CAN_ACCEL_UACCESS
962 mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
963#endif
662 mmput(dev->mm); 964 mmput(dev->mm);
965 }
966#if VHOST_ARCH_CAN_ACCEL_UACCESS
967 for (i = 0; i < dev->nvqs; i++)
968 vhost_uninit_vq_maps(dev->vqs[i]);
969#endif
663 dev->mm = NULL; 970 dev->mm = NULL;
664} 971}
665EXPORT_SYMBOL_GPL(vhost_dev_cleanup); 972EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@@ -886,6 +1193,113 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
886 ret; \ 1193 ret; \
887}) 1194})
888 1195
1196static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
1197{
1198#if VHOST_ARCH_CAN_ACCEL_UACCESS
1199 struct vhost_map *map;
1200 struct vring_used *used;
1201
1202 if (!vq->iotlb) {
1203 rcu_read_lock();
1204
1205 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1206 if (likely(map)) {
1207 used = map->addr;
1208 *((__virtio16 *)&used->ring[vq->num]) =
1209 cpu_to_vhost16(vq, vq->avail_idx);
1210 rcu_read_unlock();
1211 return 0;
1212 }
1213
1214 rcu_read_unlock();
1215 }
1216#endif
1217
1218 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
1219 vhost_avail_event(vq));
1220}
1221
1222static inline int vhost_put_used(struct vhost_virtqueue *vq,
1223 struct vring_used_elem *head, int idx,
1224 int count)
1225{
1226#if VHOST_ARCH_CAN_ACCEL_UACCESS
1227 struct vhost_map *map;
1228 struct vring_used *used;
1229 size_t size;
1230
1231 if (!vq->iotlb) {
1232 rcu_read_lock();
1233
1234 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1235 if (likely(map)) {
1236 used = map->addr;
1237 size = count * sizeof(*head);
1238 memcpy(used->ring + idx, head, size);
1239 rcu_read_unlock();
1240 return 0;
1241 }
1242
1243 rcu_read_unlock();
1244 }
1245#endif
1246
1247 return vhost_copy_to_user(vq, vq->used->ring + idx, head,
1248 count * sizeof(*head));
1249}
1250
1251static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
1252
1253{
1254#if VHOST_ARCH_CAN_ACCEL_UACCESS
1255 struct vhost_map *map;
1256 struct vring_used *used;
1257
1258 if (!vq->iotlb) {
1259 rcu_read_lock();
1260
1261 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1262 if (likely(map)) {
1263 used = map->addr;
1264 used->flags = cpu_to_vhost16(vq, vq->used_flags);
1265 rcu_read_unlock();
1266 return 0;
1267 }
1268
1269 rcu_read_unlock();
1270 }
1271#endif
1272
1273 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
1274 &vq->used->flags);
1275}
1276
1277static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
1278
1279{
1280#if VHOST_ARCH_CAN_ACCEL_UACCESS
1281 struct vhost_map *map;
1282 struct vring_used *used;
1283
1284 if (!vq->iotlb) {
1285 rcu_read_lock();
1286
1287 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1288 if (likely(map)) {
1289 used = map->addr;
1290 used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
1291 rcu_read_unlock();
1292 return 0;
1293 }
1294
1295 rcu_read_unlock();
1296 }
1297#endif
1298
1299 return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
1300 &vq->used->idx);
1301}
1302
889#define vhost_get_user(vq, x, ptr, type) \ 1303#define vhost_get_user(vq, x, ptr, type) \
890({ \ 1304({ \
891 int ret; \ 1305 int ret; \
@@ -924,6 +1338,155 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
924 mutex_unlock(&d->vqs[i]->mutex); 1338 mutex_unlock(&d->vqs[i]->mutex);
925} 1339}
926 1340
1341static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
1342 __virtio16 *idx)
1343{
1344#if VHOST_ARCH_CAN_ACCEL_UACCESS
1345 struct vhost_map *map;
1346 struct vring_avail *avail;
1347
1348 if (!vq->iotlb) {
1349 rcu_read_lock();
1350
1351 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1352 if (likely(map)) {
1353 avail = map->addr;
1354 *idx = avail->idx;
1355 rcu_read_unlock();
1356 return 0;
1357 }
1358
1359 rcu_read_unlock();
1360 }
1361#endif
1362
1363 return vhost_get_avail(vq, *idx, &vq->avail->idx);
1364}
1365
1366static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
1367 __virtio16 *head, int idx)
1368{
1369#if VHOST_ARCH_CAN_ACCEL_UACCESS
1370 struct vhost_map *map;
1371 struct vring_avail *avail;
1372
1373 if (!vq->iotlb) {
1374 rcu_read_lock();
1375
1376 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1377 if (likely(map)) {
1378 avail = map->addr;
1379 *head = avail->ring[idx & (vq->num - 1)];
1380 rcu_read_unlock();
1381 return 0;
1382 }
1383
1384 rcu_read_unlock();
1385 }
1386#endif
1387
1388 return vhost_get_avail(vq, *head,
1389 &vq->avail->ring[idx & (vq->num - 1)]);
1390}
1391
1392static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
1393 __virtio16 *flags)
1394{
1395#if VHOST_ARCH_CAN_ACCEL_UACCESS
1396 struct vhost_map *map;
1397 struct vring_avail *avail;
1398
1399 if (!vq->iotlb) {
1400 rcu_read_lock();
1401
1402 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1403 if (likely(map)) {
1404 avail = map->addr;
1405 *flags = avail->flags;
1406 rcu_read_unlock();
1407 return 0;
1408 }
1409
1410 rcu_read_unlock();
1411 }
1412#endif
1413
1414 return vhost_get_avail(vq, *flags, &vq->avail->flags);
1415}
1416
1417static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
1418 __virtio16 *event)
1419{
1420#if VHOST_ARCH_CAN_ACCEL_UACCESS
1421 struct vhost_map *map;
1422 struct vring_avail *avail;
1423
1424 if (!vq->iotlb) {
1425 rcu_read_lock();
1426 map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
1427 if (likely(map)) {
1428 avail = map->addr;
1429 *event = (__virtio16)avail->ring[vq->num];
1430 rcu_read_unlock();
1431 return 0;
1432 }
1433 rcu_read_unlock();
1434 }
1435#endif
1436
1437 return vhost_get_avail(vq, *event, vhost_used_event(vq));
1438}
1439
1440static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
1441 __virtio16 *idx)
1442{
1443#if VHOST_ARCH_CAN_ACCEL_UACCESS
1444 struct vhost_map *map;
1445 struct vring_used *used;
1446
1447 if (!vq->iotlb) {
1448 rcu_read_lock();
1449
1450 map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
1451 if (likely(map)) {
1452 used = map->addr;
1453 *idx = used->idx;
1454 rcu_read_unlock();
1455 return 0;
1456 }
1457
1458 rcu_read_unlock();
1459 }
1460#endif
1461
1462 return vhost_get_used(vq, *idx, &vq->used->idx);
1463}
1464
1465static inline int vhost_get_desc(struct vhost_virtqueue *vq,
1466 struct vring_desc *desc, int idx)
1467{
1468#if VHOST_ARCH_CAN_ACCEL_UACCESS
1469 struct vhost_map *map;
1470 struct vring_desc *d;
1471
1472 if (!vq->iotlb) {
1473 rcu_read_lock();
1474
1475 map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]);
1476 if (likely(map)) {
1477 d = map->addr;
1478 *desc = *(d + idx);
1479 rcu_read_unlock();
1480 return 0;
1481 }
1482
1483 rcu_read_unlock();
1484 }
1485#endif
1486
1487 return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
1488}
1489
927static int vhost_new_umem_range(struct vhost_umem *umem, 1490static int vhost_new_umem_range(struct vhost_umem *umem,
928 u64 start, u64 size, u64 end, 1491 u64 start, u64 size, u64 end,
929 u64 userspace_addr, int perm) 1492 u64 userspace_addr, int perm)
@@ -1209,13 +1772,9 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
1209 struct vring_used __user *used) 1772 struct vring_used __user *used)
1210 1773
1211{ 1774{
1212 size_t s __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 1775 return access_ok(desc, vhost_get_desc_size(vq, num)) &&
1213 1776 access_ok(avail, vhost_get_avail_size(vq, num)) &&
1214 return access_ok(desc, num * sizeof *desc) && 1777 access_ok(used, vhost_get_used_size(vq, num));
1215 access_ok(avail,
1216 sizeof *avail + num * sizeof *avail->ring + s) &&
1217 access_ok(used,
1218 sizeof *used + num * sizeof *used->ring + s);
1219} 1778}
1220 1779
1221static void vhost_vq_meta_update(struct vhost_virtqueue *vq, 1780static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
@@ -1265,26 +1824,42 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
1265 return true; 1824 return true;
1266} 1825}
1267 1826
1268int vq_iotlb_prefetch(struct vhost_virtqueue *vq) 1827#if VHOST_ARCH_CAN_ACCEL_UACCESS
1828static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq)
1829{
1830 struct vhost_map __rcu *map;
1831 int i;
1832
1833 for (i = 0; i < VHOST_NUM_ADDRS; i++) {
1834 rcu_read_lock();
1835 map = rcu_dereference(vq->maps[i]);
1836 rcu_read_unlock();
1837 if (unlikely(!map))
1838 vhost_map_prefetch(vq, i);
1839 }
1840}
1841#endif
1842
1843int vq_meta_prefetch(struct vhost_virtqueue *vq)
1269{ 1844{
1270 size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
1271 unsigned int num = vq->num; 1845 unsigned int num = vq->num;
1272 1846
1273 if (!vq->iotlb) 1847 if (!vq->iotlb) {
1848#if VHOST_ARCH_CAN_ACCEL_UACCESS
1849 vhost_vq_map_prefetch(vq);
1850#endif
1274 return 1; 1851 return 1;
1852 }
1275 1853
1276 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, 1854 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
1277 num * sizeof(*vq->desc), VHOST_ADDR_DESC) && 1855 vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
1278 iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, 1856 iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
1279 sizeof *vq->avail + 1857 vhost_get_avail_size(vq, num),
1280 num * sizeof(*vq->avail->ring) + s,
1281 VHOST_ADDR_AVAIL) && 1858 VHOST_ADDR_AVAIL) &&
1282 iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, 1859 iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
1283 sizeof *vq->used + 1860 vhost_get_used_size(vq, num), VHOST_ADDR_USED);
1284 num * sizeof(*vq->used->ring) + s,
1285 VHOST_ADDR_USED);
1286} 1861}
1287EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); 1862EXPORT_SYMBOL_GPL(vq_meta_prefetch);
1288 1863
1289/* Can we log writes? */ 1864/* Can we log writes? */
1290/* Caller should have device mutex but not vq mutex */ 1865/* Caller should have device mutex but not vq mutex */
@@ -1299,13 +1874,10 @@ EXPORT_SYMBOL_GPL(vhost_log_access_ok);
1299static bool vq_log_access_ok(struct vhost_virtqueue *vq, 1874static bool vq_log_access_ok(struct vhost_virtqueue *vq,
1300 void __user *log_base) 1875 void __user *log_base)
1301{ 1876{
1302 size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
1303
1304 return vq_memory_access_ok(log_base, vq->umem, 1877 return vq_memory_access_ok(log_base, vq->umem,
1305 vhost_has_feature(vq, VHOST_F_LOG_ALL)) && 1878 vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
1306 (!vq->log_used || log_access_ok(log_base, vq->log_addr, 1879 (!vq->log_used || log_access_ok(log_base, vq->log_addr,
1307 sizeof *vq->used + 1880 vhost_get_used_size(vq, vq->num)));
1308 vq->num * sizeof *vq->used->ring + s));
1309} 1881}
1310 1882
1311/* Can we start vq? */ 1883/* Can we start vq? */
@@ -1405,6 +1977,121 @@ err:
1405 return -EFAULT; 1977 return -EFAULT;
1406} 1978}
1407 1979
1980static long vhost_vring_set_num(struct vhost_dev *d,
1981 struct vhost_virtqueue *vq,
1982 void __user *argp)
1983{
1984 struct vhost_vring_state s;
1985
1986 /* Resizing ring with an active backend?
1987 * You don't want to do that. */
1988 if (vq->private_data)
1989 return -EBUSY;
1990
1991 if (copy_from_user(&s, argp, sizeof s))
1992 return -EFAULT;
1993
1994 if (!s.num || s.num > 0xffff || (s.num & (s.num - 1)))
1995 return -EINVAL;
1996 vq->num = s.num;
1997
1998 return 0;
1999}
2000
2001static long vhost_vring_set_addr(struct vhost_dev *d,
2002 struct vhost_virtqueue *vq,
2003 void __user *argp)
2004{
2005 struct vhost_vring_addr a;
2006
2007 if (copy_from_user(&a, argp, sizeof a))
2008 return -EFAULT;
2009 if (a.flags & ~(0x1 << VHOST_VRING_F_LOG))
2010 return -EOPNOTSUPP;
2011
2012 /* For 32bit, verify that the top 32bits of the user
2013 data are set to zero. */
2014 if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
2015 (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
2016 (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr)
2017 return -EFAULT;
2018
2019 /* Make sure it's safe to cast pointers to vring types. */
2020 BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE);
2021 BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
2022 if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
2023 (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
2024 (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1)))
2025 return -EINVAL;
2026
2027 /* We only verify access here if backend is configured.
2028 * If it is not, we don't as size might not have been setup.
2029 * We will verify when backend is configured. */
2030 if (vq->private_data) {
2031 if (!vq_access_ok(vq, vq->num,
2032 (void __user *)(unsigned long)a.desc_user_addr,
2033 (void __user *)(unsigned long)a.avail_user_addr,
2034 (void __user *)(unsigned long)a.used_user_addr))
2035 return -EINVAL;
2036
2037 /* Also validate log access for used ring if enabled. */
2038 if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) &&
2039 !log_access_ok(vq->log_base, a.log_guest_addr,
2040 sizeof *vq->used +
2041 vq->num * sizeof *vq->used->ring))
2042 return -EINVAL;
2043 }
2044
2045 vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
2046 vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
2047 vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
2048 vq->log_addr = a.log_guest_addr;
2049 vq->used = (void __user *)(unsigned long)a.used_user_addr;
2050
2051 return 0;
2052}
2053
2054static long vhost_vring_set_num_addr(struct vhost_dev *d,
2055 struct vhost_virtqueue *vq,
2056 unsigned int ioctl,
2057 void __user *argp)
2058{
2059 long r;
2060
2061 mutex_lock(&vq->mutex);
2062
2063#if VHOST_ARCH_CAN_ACCEL_UACCESS
2064 /* Unregister MMU notifer to allow invalidation callback
2065 * can access vq->uaddrs[] without holding a lock.
2066 */
2067 if (d->mm)
2068 mmu_notifier_unregister(&d->mmu_notifier, d->mm);
2069
2070 vhost_uninit_vq_maps(vq);
2071#endif
2072
2073 switch (ioctl) {
2074 case VHOST_SET_VRING_NUM:
2075 r = vhost_vring_set_num(d, vq, argp);
2076 break;
2077 case VHOST_SET_VRING_ADDR:
2078 r = vhost_vring_set_addr(d, vq, argp);
2079 break;
2080 default:
2081 BUG();
2082 }
2083
2084#if VHOST_ARCH_CAN_ACCEL_UACCESS
2085 vhost_setup_vq_uaddr(vq);
2086
2087 if (d->mm)
2088 mmu_notifier_register(&d->mmu_notifier, d->mm);
2089#endif
2090
2091 mutex_unlock(&vq->mutex);
2092
2093 return r;
2094}
1408long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) 2095long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
1409{ 2096{
1410 struct file *eventfp, *filep = NULL; 2097 struct file *eventfp, *filep = NULL;
@@ -1414,7 +2101,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
1414 struct vhost_virtqueue *vq; 2101 struct vhost_virtqueue *vq;
1415 struct vhost_vring_state s; 2102 struct vhost_vring_state s;
1416 struct vhost_vring_file f; 2103 struct vhost_vring_file f;
1417 struct vhost_vring_addr a;
1418 u32 idx; 2104 u32 idx;
1419 long r; 2105 long r;
1420 2106
@@ -1427,26 +2113,14 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
1427 idx = array_index_nospec(idx, d->nvqs); 2113 idx = array_index_nospec(idx, d->nvqs);
1428 vq = d->vqs[idx]; 2114 vq = d->vqs[idx];
1429 2115
2116 if (ioctl == VHOST_SET_VRING_NUM ||
2117 ioctl == VHOST_SET_VRING_ADDR) {
2118 return vhost_vring_set_num_addr(d, vq, ioctl, argp);
2119 }
2120
1430 mutex_lock(&vq->mutex); 2121 mutex_lock(&vq->mutex);
1431 2122
1432 switch (ioctl) { 2123 switch (ioctl) {
1433 case VHOST_SET_VRING_NUM:
1434 /* Resizing ring with an active backend?
1435 * You don't want to do that. */
1436 if (vq->private_data) {
1437 r = -EBUSY;
1438 break;
1439 }
1440 if (copy_from_user(&s, argp, sizeof s)) {
1441 r = -EFAULT;
1442 break;
1443 }
1444 if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) {
1445 r = -EINVAL;
1446 break;
1447 }
1448 vq->num = s.num;
1449 break;
1450 case VHOST_SET_VRING_BASE: 2124 case VHOST_SET_VRING_BASE:
1451 /* Moving base with an active backend? 2125 /* Moving base with an active backend?
1452 * You don't want to do that. */ 2126 * You don't want to do that. */
@@ -1472,62 +2146,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
1472 if (copy_to_user(argp, &s, sizeof s)) 2146 if (copy_to_user(argp, &s, sizeof s))
1473 r = -EFAULT; 2147 r = -EFAULT;
1474 break; 2148 break;
1475 case VHOST_SET_VRING_ADDR:
1476 if (copy_from_user(&a, argp, sizeof a)) {
1477 r = -EFAULT;
1478 break;
1479 }
1480 if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) {
1481 r = -EOPNOTSUPP;
1482 break;
1483 }
1484 /* For 32bit, verify that the top 32bits of the user
1485 data are set to zero. */
1486 if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
1487 (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
1488 (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) {
1489 r = -EFAULT;
1490 break;
1491 }
1492
1493 /* Make sure it's safe to cast pointers to vring types. */
1494 BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE);
1495 BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
1496 if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
1497 (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
1498 (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) {
1499 r = -EINVAL;
1500 break;
1501 }
1502
1503 /* We only verify access here if backend is configured.
1504 * If it is not, we don't as size might not have been setup.
1505 * We will verify when backend is configured. */
1506 if (vq->private_data) {
1507 if (!vq_access_ok(vq, vq->num,
1508 (void __user *)(unsigned long)a.desc_user_addr,
1509 (void __user *)(unsigned long)a.avail_user_addr,
1510 (void __user *)(unsigned long)a.used_user_addr)) {
1511 r = -EINVAL;
1512 break;
1513 }
1514
1515 /* Also validate log access for used ring if enabled. */
1516 if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) &&
1517 !log_access_ok(vq->log_base, a.log_guest_addr,
1518 sizeof *vq->used +
1519 vq->num * sizeof *vq->used->ring)) {
1520 r = -EINVAL;
1521 break;
1522 }
1523 }
1524
1525 vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
1526 vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
1527 vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
1528 vq->log_addr = a.log_guest_addr;
1529 vq->used = (void __user *)(unsigned long)a.used_user_addr;
1530 break;
1531 case VHOST_SET_VRING_KICK: 2149 case VHOST_SET_VRING_KICK:
1532 if (copy_from_user(&f, argp, sizeof f)) { 2150 if (copy_from_user(&f, argp, sizeof f)) {
1533 r = -EFAULT; 2151 r = -EFAULT;
@@ -1861,8 +2479,7 @@ EXPORT_SYMBOL_GPL(vhost_log_write);
1861static int vhost_update_used_flags(struct vhost_virtqueue *vq) 2479static int vhost_update_used_flags(struct vhost_virtqueue *vq)
1862{ 2480{
1863 void __user *used; 2481 void __user *used;
1864 if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), 2482 if (vhost_put_used_flags(vq))
1865 &vq->used->flags) < 0)
1866 return -EFAULT; 2483 return -EFAULT;
1867 if (unlikely(vq->log_used)) { 2484 if (unlikely(vq->log_used)) {
1868 /* Make sure the flag is seen before log. */ 2485 /* Make sure the flag is seen before log. */
@@ -1879,8 +2496,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
1879 2496
1880static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) 2497static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
1881{ 2498{
1882 if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), 2499 if (vhost_put_avail_event(vq))
1883 vhost_avail_event(vq)))
1884 return -EFAULT; 2500 return -EFAULT;
1885 if (unlikely(vq->log_used)) { 2501 if (unlikely(vq->log_used)) {
1886 void __user *used; 2502 void __user *used;
@@ -1916,7 +2532,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
1916 r = -EFAULT; 2532 r = -EFAULT;
1917 goto err; 2533 goto err;
1918 } 2534 }
1919 r = vhost_get_used(vq, last_used_idx, &vq->used->idx); 2535 r = vhost_get_used_idx(vq, &last_used_idx);
1920 if (r) { 2536 if (r) {
1921 vq_err(vq, "Can't access used idx at %p\n", 2537 vq_err(vq, "Can't access used idx at %p\n",
1922 &vq->used->idx); 2538 &vq->used->idx);
@@ -2115,7 +2731,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
2115 last_avail_idx = vq->last_avail_idx; 2731 last_avail_idx = vq->last_avail_idx;
2116 2732
2117 if (vq->avail_idx == vq->last_avail_idx) { 2733 if (vq->avail_idx == vq->last_avail_idx) {
2118 if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { 2734 if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
2119 vq_err(vq, "Failed to access avail idx at %p\n", 2735 vq_err(vq, "Failed to access avail idx at %p\n",
2120 &vq->avail->idx); 2736 &vq->avail->idx);
2121 return -EFAULT; 2737 return -EFAULT;
@@ -2142,8 +2758,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
2142 2758
2143 /* Grab the next descriptor number they're advertising, and increment 2759 /* Grab the next descriptor number they're advertising, and increment
2144 * the index we've seen. */ 2760 * the index we've seen. */
2145 if (unlikely(vhost_get_avail(vq, ring_head, 2761 if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
2146 &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
2147 vq_err(vq, "Failed to read head: idx %d address %p\n", 2762 vq_err(vq, "Failed to read head: idx %d address %p\n",
2148 last_avail_idx, 2763 last_avail_idx,
2149 &vq->avail->ring[last_avail_idx % vq->num]); 2764 &vq->avail->ring[last_avail_idx % vq->num]);
@@ -2178,8 +2793,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
2178 i, vq->num, head); 2793 i, vq->num, head);
2179 return -EINVAL; 2794 return -EINVAL;
2180 } 2795 }
2181 ret = vhost_copy_from_user(vq, &desc, vq->desc + i, 2796 ret = vhost_get_desc(vq, &desc, i);
2182 sizeof desc);
2183 if (unlikely(ret)) { 2797 if (unlikely(ret)) {
2184 vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", 2798 vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
2185 i, vq->desc + i); 2799 i, vq->desc + i);
@@ -2272,16 +2886,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
2272 2886
2273 start = vq->last_used_idx & (vq->num - 1); 2887 start = vq->last_used_idx & (vq->num - 1);
2274 used = vq->used->ring + start; 2888 used = vq->used->ring + start;
2275 if (count == 1) { 2889 if (vhost_put_used(vq, heads, start, count)) {
2276 if (vhost_put_user(vq, heads[0].id, &used->id)) {
2277 vq_err(vq, "Failed to write used id");
2278 return -EFAULT;
2279 }
2280 if (vhost_put_user(vq, heads[0].len, &used->len)) {
2281 vq_err(vq, "Failed to write used len");
2282 return -EFAULT;
2283 }
2284 } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) {
2285 vq_err(vq, "Failed to write used"); 2890 vq_err(vq, "Failed to write used");
2286 return -EFAULT; 2891 return -EFAULT;
2287 } 2892 }
@@ -2323,8 +2928,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
2323 2928
2324 /* Make sure buffer is written before we update index. */ 2929 /* Make sure buffer is written before we update index. */
2325 smp_wmb(); 2930 smp_wmb();
2326 if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), 2931 if (vhost_put_used_idx(vq)) {
2327 &vq->used->idx)) {
2328 vq_err(vq, "Failed to increment used idx"); 2932 vq_err(vq, "Failed to increment used idx");
2329 return -EFAULT; 2933 return -EFAULT;
2330 } 2934 }
@@ -2357,7 +2961,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2357 2961
2358 if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { 2962 if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
2359 __virtio16 flags; 2963 __virtio16 flags;
2360 if (vhost_get_avail(vq, flags, &vq->avail->flags)) { 2964 if (vhost_get_avail_flags(vq, &flags)) {
2361 vq_err(vq, "Failed to get flags"); 2965 vq_err(vq, "Failed to get flags");
2362 return true; 2966 return true;
2363 } 2967 }
@@ -2371,7 +2975,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2371 if (unlikely(!v)) 2975 if (unlikely(!v))
2372 return true; 2976 return true;
2373 2977
2374 if (vhost_get_avail(vq, event, vhost_used_event(vq))) { 2978 if (vhost_get_used_event(vq, &event)) {
2375 vq_err(vq, "Failed to get used event idx"); 2979 vq_err(vq, "Failed to get used event idx");
2376 return true; 2980 return true;
2377 } 2981 }
@@ -2416,7 +3020,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2416 if (vq->avail_idx != vq->last_avail_idx) 3020 if (vq->avail_idx != vq->last_avail_idx)
2417 return false; 3021 return false;
2418 3022
2419 r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); 3023 r = vhost_get_avail_idx(vq, &avail_idx);
2420 if (unlikely(r)) 3024 if (unlikely(r))
2421 return false; 3025 return false;
2422 vq->avail_idx = vhost16_to_cpu(vq, avail_idx); 3026 vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
@@ -2452,7 +3056,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2452 /* They could have slipped one in as we were doing that: make 3056 /* They could have slipped one in as we were doing that: make
2453 * sure it's written, then check again. */ 3057 * sure it's written, then check again. */
2454 smp_mb(); 3058 smp_mb();
2455 r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); 3059 r = vhost_get_avail_idx(vq, &avail_idx);
2456 if (r) { 3060 if (r) {
2457 vq_err(vq, "Failed to check avail idx at %p: %d\n", 3061 vq_err(vq, "Failed to check avail idx at %p: %d\n",
2458 &vq->avail->idx, r); 3062 &vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 27a78a9b8cc7..819296332913 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -12,6 +12,9 @@
12#include <linux/virtio_config.h> 12#include <linux/virtio_config.h>
13#include <linux/virtio_ring.h> 13#include <linux/virtio_ring.h>
14#include <linux/atomic.h> 14#include <linux/atomic.h>
15#include <linux/pagemap.h>
16#include <linux/mmu_notifier.h>
17#include <asm/cacheflush.h>
15 18
16struct vhost_work; 19struct vhost_work;
17typedef void (*vhost_work_fn_t)(struct vhost_work *work); 20typedef void (*vhost_work_fn_t)(struct vhost_work *work);
@@ -80,6 +83,24 @@ enum vhost_uaddr_type {
80 VHOST_NUM_ADDRS = 3, 83 VHOST_NUM_ADDRS = 3,
81}; 84};
82 85
86struct vhost_map {
87 int npages;
88 void *addr;
89 struct page **pages;
90};
91
92struct vhost_uaddr {
93 unsigned long uaddr;
94 size_t size;
95 bool write;
96};
97
98#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
99#define VHOST_ARCH_CAN_ACCEL_UACCESS 1
100#else
101#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
102#endif
103
83/* The virtqueue structure describes a queue attached to a device. */ 104/* The virtqueue structure describes a queue attached to a device. */
84struct vhost_virtqueue { 105struct vhost_virtqueue {
85 struct vhost_dev *dev; 106 struct vhost_dev *dev;
@@ -90,7 +111,22 @@ struct vhost_virtqueue {
90 struct vring_desc __user *desc; 111 struct vring_desc __user *desc;
91 struct vring_avail __user *avail; 112 struct vring_avail __user *avail;
92 struct vring_used __user *used; 113 struct vring_used __user *used;
114
115#if VHOST_ARCH_CAN_ACCEL_UACCESS
116 /* Read by memory accessors, modified by meta data
117 * prefetching, MMU notifier and vring ioctl().
118 * Synchonrized through mmu_lock (writers) and RCU (writers
119 * and readers).
120 */
121 struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
122 /* Read by MMU notifier, modified by vring ioctl(),
123 * synchronized through MMU notifier
124 * registering/unregistering.
125 */
126 struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
127#endif
93 const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; 128 const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
129
94 struct file *kick; 130 struct file *kick;
95 struct eventfd_ctx *call_ctx; 131 struct eventfd_ctx *call_ctx;
96 struct eventfd_ctx *error_ctx; 132 struct eventfd_ctx *error_ctx;
@@ -145,6 +181,8 @@ struct vhost_virtqueue {
145 bool user_be; 181 bool user_be;
146#endif 182#endif
147 u32 busyloop_timeout; 183 u32 busyloop_timeout;
184 spinlock_t mmu_lock;
185 int invalidate_count;
148}; 186};
149 187
150struct vhost_msg_node { 188struct vhost_msg_node {
@@ -158,6 +196,9 @@ struct vhost_msg_node {
158 196
159struct vhost_dev { 197struct vhost_dev {
160 struct mm_struct *mm; 198 struct mm_struct *mm;
199#ifdef CONFIG_MMU_NOTIFIER
200 struct mmu_notifier mmu_notifier;
201#endif
161 struct mutex mutex; 202 struct mutex mutex;
162 struct vhost_virtqueue **vqs; 203 struct vhost_virtqueue **vqs;
163 int nvqs; 204 int nvqs;
@@ -212,7 +253,7 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
212int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, 253int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
213 unsigned int log_num, u64 len, 254 unsigned int log_num, u64 len,
214 struct iovec *iov, int count); 255 struct iovec *iov, int count);
215int vq_iotlb_prefetch(struct vhost_virtqueue *vq); 256int vq_meta_prefetch(struct vhost_virtqueue *vq);
216 257
217struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); 258struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);
218void vhost_enqueue_msg(struct vhost_dev *dev, 259void vhost_enqueue_msg(struct vhost_dev *dev,
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index f363fbeb5ab0..e09edb5c5e06 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -463,9 +463,14 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
463 struct irq_affinity *desc) 463 struct irq_affinity *desc)
464{ 464{
465 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); 465 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
466 unsigned int irq = platform_get_irq(vm_dev->pdev, 0); 466 int irq = platform_get_irq(vm_dev->pdev, 0);
467 int i, err, queue_idx = 0; 467 int i, err, queue_idx = 0;
468 468
469 if (irq < 0) {
470 dev_err(&vdev->dev, "Cannot get IRQ resource\n");
471 return irq;
472 }
473
469 err = request_irq(irq, vm_interrupt, IRQF_SHARED, 474 err = request_irq(irq, vm_interrupt, IRQF_SHARED,
470 dev_name(&vdev->dev), vm_dev); 475 dev_name(&vdev->dev), vm_dev);
471 if (err) 476 if (err)
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 6d5c3b2d4f4d..cfe47c5d9a56 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -43,5 +43,6 @@
43#define VIRTIO_ID_INPUT 18 /* virtio input */ 43#define VIRTIO_ID_INPUT 18 /* virtio input */
44#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ 44#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
45#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ 45#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
46#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
46 47
47#endif /* _LINUX_VIRTIO_IDS_H */ 48#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
new file mode 100644
index 000000000000..ba1b460c9944
--- /dev/null
+++ b/include/uapi/linux/virtio_iommu.h
@@ -0,0 +1,161 @@
1/* SPDX-License-Identifier: BSD-3-Clause */
2/*
3 * Virtio-iommu definition v0.9
4 *
5 * Copyright (C) 2018 Arm Ltd.
6 */
7#ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
8#define _UAPI_LINUX_VIRTIO_IOMMU_H
9
10#include <linux/types.h>
11
12/* Feature bits */
13#define VIRTIO_IOMMU_F_INPUT_RANGE 0
14#define VIRTIO_IOMMU_F_DOMAIN_BITS 1
15#define VIRTIO_IOMMU_F_MAP_UNMAP 2
16#define VIRTIO_IOMMU_F_BYPASS 3
17#define VIRTIO_IOMMU_F_PROBE 4
18
19struct virtio_iommu_range {
20 __u64 start;
21 __u64 end;
22};
23
24struct virtio_iommu_config {
25 /* Supported page sizes */
26 __u64 page_size_mask;
27 /* Supported IOVA range */
28 struct virtio_iommu_range input_range;
29 /* Max domain ID size */
30 __u8 domain_bits;
31 __u8 padding[3];
32 /* Probe buffer size */
33 __u32 probe_size;
34};
35
36/* Request types */
37#define VIRTIO_IOMMU_T_ATTACH 0x01
38#define VIRTIO_IOMMU_T_DETACH 0x02
39#define VIRTIO_IOMMU_T_MAP 0x03
40#define VIRTIO_IOMMU_T_UNMAP 0x04
41#define VIRTIO_IOMMU_T_PROBE 0x05
42
43/* Status types */
44#define VIRTIO_IOMMU_S_OK 0x00
45#define VIRTIO_IOMMU_S_IOERR 0x01
46#define VIRTIO_IOMMU_S_UNSUPP 0x02
47#define VIRTIO_IOMMU_S_DEVERR 0x03
48#define VIRTIO_IOMMU_S_INVAL 0x04
49#define VIRTIO_IOMMU_S_RANGE 0x05
50#define VIRTIO_IOMMU_S_NOENT 0x06
51#define VIRTIO_IOMMU_S_FAULT 0x07
52
53struct virtio_iommu_req_head {
54 __u8 type;
55 __u8 reserved[3];
56};
57
58struct virtio_iommu_req_tail {
59 __u8 status;
60 __u8 reserved[3];
61};
62
63struct virtio_iommu_req_attach {
64 struct virtio_iommu_req_head head;
65 __le32 domain;
66 __le32 endpoint;
67 __u8 reserved[8];
68 struct virtio_iommu_req_tail tail;
69};
70
71struct virtio_iommu_req_detach {
72 struct virtio_iommu_req_head head;
73 __le32 domain;
74 __le32 endpoint;
75 __u8 reserved[8];
76 struct virtio_iommu_req_tail tail;
77};
78
79#define VIRTIO_IOMMU_MAP_F_READ (1 << 0)
80#define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1)
81#define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2)
82#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 3)
83
84#define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \
85 VIRTIO_IOMMU_MAP_F_WRITE | \
86 VIRTIO_IOMMU_MAP_F_EXEC | \
87 VIRTIO_IOMMU_MAP_F_MMIO)
88
89struct virtio_iommu_req_map {
90 struct virtio_iommu_req_head head;
91 __le32 domain;
92 __le64 virt_start;
93 __le64 virt_end;
94 __le64 phys_start;
95 __le32 flags;
96 struct virtio_iommu_req_tail tail;
97};
98
99struct virtio_iommu_req_unmap {
100 struct virtio_iommu_req_head head;
101 __le32 domain;
102 __le64 virt_start;
103 __le64 virt_end;
104 __u8 reserved[4];
105 struct virtio_iommu_req_tail tail;
106};
107
108#define VIRTIO_IOMMU_PROBE_T_NONE 0
109#define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
110
111#define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
112
113struct virtio_iommu_probe_property {
114 __le16 type;
115 __le16 length;
116};
117
118#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
119#define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
120
121struct virtio_iommu_probe_resv_mem {
122 struct virtio_iommu_probe_property head;
123 __u8 subtype;
124 __u8 reserved[3];
125 __le64 start;
126 __le64 end;
127};
128
129struct virtio_iommu_req_probe {
130 struct virtio_iommu_req_head head;
131 __le32 endpoint;
132 __u8 reserved[64];
133
134 __u8 properties[];
135
136 /*
137 * Tail follows the variable-length properties array. No padding,
138 * property lengths are all aligned on 8 bytes.
139 */
140};
141
142/* Fault types */
143#define VIRTIO_IOMMU_FAULT_R_UNKNOWN 0
144#define VIRTIO_IOMMU_FAULT_R_DOMAIN 1
145#define VIRTIO_IOMMU_FAULT_R_MAPPING 2
146
147#define VIRTIO_IOMMU_FAULT_F_READ (1 << 0)
148#define VIRTIO_IOMMU_FAULT_F_WRITE (1 << 1)
149#define VIRTIO_IOMMU_FAULT_F_EXEC (1 << 2)
150#define VIRTIO_IOMMU_FAULT_F_ADDRESS (1 << 8)
151
152struct virtio_iommu_fault {
153 __u8 reason;
154 __u8 reserved[3];
155 __le32 flags;
156 __le32 endpoint;
157 __u8 reserved2[4];
158 __le64 address;
159};
160
161#endif