diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-17 14:26:09 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-17 14:26:09 -0400 |
commit | 3a1d5384b7decbff6519daa9c65a35665e227323 (patch) | |
tree | 7442f1b74d452d82d6702f8cd25173cc81c0c634 | |
parent | 37d4607ebbbf5d8b74cbcb9434a5ce6897a51864 (diff) | |
parent | 5e663f0410fa2f355042209154029842ba1abd43 (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio, vhost updates from Michael Tsirkin:
"Fixes, features, performance:
- new iommu device
- vhost guest memory access using vmap (just meta-data for now)
- minor fixes"
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
virtio-mmio: add error check for platform_get_irq
scsi: virtio_scsi: Use struct_size() helper
iommu/virtio: Add event queue
iommu/virtio: Add probe request
iommu: Add virtio-iommu driver
PCI: OF: Initialize dev->fwnode appropriately
of: Allow the iommu-map property to omit untranslated devices
dt-bindings: virtio: Add virtio-pci-iommu node
dt-bindings: virtio-mmio: Add IOMMU description
vhost: fix clang build warning
vhost: access vq metadata through kernel virtual address
vhost: factor out setting vring addr and num
vhost: introduce helpers to get the size of metadata area
vhost: rename vq_iotlb_prefetch() to vq_meta_prefetch()
vhost: fine grain userspace memory accessors
vhost: generalize adding used elem
-rw-r--r-- | Documentation/devicetree/bindings/virtio/iommu.txt | 66 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/virtio/mmio.txt | 30 | ||||
-rw-r--r-- | MAINTAINERS | 7 | ||||
-rw-r--r-- | drivers/iommu/Kconfig | 11 | ||||
-rw-r--r-- | drivers/iommu/Makefile | 1 | ||||
-rw-r--r-- | drivers/iommu/virtio-iommu.c | 1158 | ||||
-rw-r--r-- | drivers/of/base.c | 10 | ||||
-rw-r--r-- | drivers/pci/of.c | 8 | ||||
-rw-r--r-- | drivers/scsi/virtio_scsi.c | 2 | ||||
-rw-r--r-- | drivers/vhost/net.c | 4 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 850 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 43 | ||||
-rw-r--r-- | drivers/virtio/virtio_mmio.c | 7 | ||||
-rw-r--r-- | include/uapi/linux/virtio_ids.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/virtio_iommu.h | 161 |
15 files changed, 2228 insertions, 131 deletions
diff --git a/Documentation/devicetree/bindings/virtio/iommu.txt b/Documentation/devicetree/bindings/virtio/iommu.txt new file mode 100644 index 000000000000..2407fea0651c --- /dev/null +++ b/Documentation/devicetree/bindings/virtio/iommu.txt | |||
@@ -0,0 +1,66 @@ | |||
1 | * virtio IOMMU PCI device | ||
2 | |||
3 | When virtio-iommu uses the PCI transport, its programming interface is | ||
4 | discovered dynamically by the PCI probing infrastructure. However the | ||
5 | device tree statically describes the relation between IOMMU and DMA | ||
6 | masters. Therefore, the PCI root complex that hosts the virtio-iommu | ||
7 | contains a child node representing the IOMMU device explicitly. | ||
8 | |||
9 | Required properties: | ||
10 | |||
11 | - compatible: Should be "virtio,pci-iommu" | ||
12 | - reg: PCI address of the IOMMU. As defined in the PCI Bus | ||
13 | Binding reference [1], the reg property is a five-cell | ||
14 | address encoded as (phys.hi phys.mid phys.lo size.hi | ||
15 | size.lo). phys.hi should contain the device's BDF as | ||
16 | 0b00000000 bbbbbbbb dddddfff 00000000. The other cells | ||
17 | should be zero. | ||
18 | - #iommu-cells: Each platform DMA master managed by the IOMMU is assigned | ||
19 | an endpoint ID, described by the "iommus" property [2]. | ||
20 | For virtio-iommu, #iommu-cells must be 1. | ||
21 | |||
22 | Notes: | ||
23 | |||
24 | - DMA from the IOMMU device isn't managed by another IOMMU. Therefore the | ||
25 | virtio-iommu node doesn't have an "iommus" property, and is omitted from | ||
26 | the iommu-map property of the root complex. | ||
27 | |||
28 | Example: | ||
29 | |||
30 | pcie@10000000 { | ||
31 | compatible = "pci-host-ecam-generic"; | ||
32 | ... | ||
33 | |||
34 | /* The IOMMU programming interface uses slot 00:01.0 */ | ||
35 | iommu0: iommu@0008 { | ||
36 | compatible = "virtio,pci-iommu"; | ||
37 | reg = <0x00000800 0 0 0 0>; | ||
38 | #iommu-cells = <1>; | ||
39 | }; | ||
40 | |||
41 | /* | ||
42 | * The IOMMU manages all functions in this PCI domain except | ||
43 | * itself. Omit BDF 00:01.0. | ||
44 | */ | ||
45 | iommu-map = <0x0 &iommu0 0x0 0x8> | ||
46 | <0x9 &iommu0 0x9 0xfff7>; | ||
47 | }; | ||
48 | |||
49 | pcie@20000000 { | ||
50 | compatible = "pci-host-ecam-generic"; | ||
51 | ... | ||
52 | /* | ||
53 | * The IOMMU also manages all functions from this domain, | ||
54 | * with endpoint IDs 0x10000 - 0x1ffff | ||
55 | */ | ||
56 | iommu-map = <0x0 &iommu0 0x10000 0x10000>; | ||
57 | }; | ||
58 | |||
59 | ethernet@fe001000 { | ||
60 | ... | ||
61 | /* The IOMMU manages this platform device with endpoint ID 0x20000 */ | ||
62 | iommus = <&iommu0 0x20000>; | ||
63 | }; | ||
64 | |||
65 | [1] Documentation/devicetree/bindings/pci/pci.txt | ||
66 | [2] Documentation/devicetree/bindings/iommu/iommu.txt | ||
diff --git a/Documentation/devicetree/bindings/virtio/mmio.txt b/Documentation/devicetree/bindings/virtio/mmio.txt index 5069c1b8e193..21af30fbb81f 100644 --- a/Documentation/devicetree/bindings/virtio/mmio.txt +++ b/Documentation/devicetree/bindings/virtio/mmio.txt | |||
@@ -8,10 +8,40 @@ Required properties: | |||
8 | - reg: control registers base address and size including configuration space | 8 | - reg: control registers base address and size including configuration space |
9 | - interrupts: interrupt generated by the device | 9 | - interrupts: interrupt generated by the device |
10 | 10 | ||
11 | Required properties for virtio-iommu: | ||
12 | |||
13 | - #iommu-cells: When the node corresponds to a virtio-iommu device, it is | ||
14 | linked to DMA masters using the "iommus" or "iommu-map" | ||
15 | properties [1][2]. #iommu-cells specifies the size of the | ||
16 | "iommus" property. For virtio-iommu #iommu-cells must be | ||
17 | 1, each cell describing a single endpoint ID. | ||
18 | |||
19 | Optional properties: | ||
20 | |||
21 | - iommus: If the device accesses memory through an IOMMU, it should | ||
22 | have an "iommus" property [1]. Since virtio-iommu itself | ||
23 | does not access memory through an IOMMU, the "virtio,mmio" | ||
24 | node cannot have both an "#iommu-cells" and an "iommus" | ||
25 | property. | ||
26 | |||
11 | Example: | 27 | Example: |
12 | 28 | ||
13 | virtio_block@3000 { | 29 | virtio_block@3000 { |
14 | compatible = "virtio,mmio"; | 30 | compatible = "virtio,mmio"; |
15 | reg = <0x3000 0x100>; | 31 | reg = <0x3000 0x100>; |
16 | interrupts = <41>; | 32 | interrupts = <41>; |
33 | |||
34 | /* Device has endpoint ID 23 */ | ||
35 | iommus = <&viommu 23> | ||
17 | } | 36 | } |
37 | |||
38 | viommu: iommu@3100 { | ||
39 | compatible = "virtio,mmio"; | ||
40 | reg = <0x3100 0x100>; | ||
41 | interrupts = <42>; | ||
42 | |||
43 | #iommu-cells = <1> | ||
44 | } | ||
45 | |||
46 | [1] Documentation/devicetree/bindings/iommu/iommu.txt | ||
47 | [2] Documentation/devicetree/bindings/pci/pci-iommu.txt | ||
diff --git a/MAINTAINERS b/MAINTAINERS index d452d7bbbaad..c04ea10cfb1b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -17107,6 +17107,13 @@ S: Maintained | |||
17107 | F: drivers/virtio/virtio_input.c | 17107 | F: drivers/virtio/virtio_input.c |
17108 | F: include/uapi/linux/virtio_input.h | 17108 | F: include/uapi/linux/virtio_input.h |
17109 | 17109 | ||
17110 | VIRTIO IOMMU DRIVER | ||
17111 | M: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> | ||
17112 | L: virtualization@lists.linux-foundation.org | ||
17113 | S: Maintained | ||
17114 | F: drivers/iommu/virtio-iommu.c | ||
17115 | F: include/uapi/linux/virtio_iommu.h | ||
17116 | |||
17110 | VIRTUAL BOX GUEST DEVICE DRIVER | 17117 | VIRTUAL BOX GUEST DEVICE DRIVER |
17111 | M: Hans de Goede <hdegoede@redhat.com> | 17118 | M: Hans de Goede <hdegoede@redhat.com> |
17112 | M: Arnd Bergmann <arnd@arndb.de> | 17119 | M: Arnd Bergmann <arnd@arndb.de> |
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 83664db5221d..e15cdcd8cb3c 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig | |||
@@ -473,4 +473,15 @@ config HYPERV_IOMMU | |||
473 | Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux | 473 | Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux |
474 | guests to run with x2APIC mode enabled. | 474 | guests to run with x2APIC mode enabled. |
475 | 475 | ||
476 | config VIRTIO_IOMMU | ||
477 | bool "Virtio IOMMU driver" | ||
478 | depends on VIRTIO=y | ||
479 | depends on ARM64 | ||
480 | select IOMMU_API | ||
481 | select INTERVAL_TREE | ||
482 | help | ||
483 | Para-virtualised IOMMU driver with virtio. | ||
484 | |||
485 | Say Y here if you intend to run this kernel as a guest. | ||
486 | |||
476 | endif # IOMMU_SUPPORT | 487 | endif # IOMMU_SUPPORT |
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 8c71a15e986b..f13f36ae1af6 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile | |||
@@ -33,3 +33,4 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o | |||
33 | obj-$(CONFIG_S390_IOMMU) += s390-iommu.o | 33 | obj-$(CONFIG_S390_IOMMU) += s390-iommu.o |
34 | obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o | 34 | obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o |
35 | obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o | 35 | obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o |
36 | obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o | ||
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c new file mode 100644 index 000000000000..433f4d2ee956 --- /dev/null +++ b/drivers/iommu/virtio-iommu.c | |||
@@ -0,0 +1,1158 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Virtio driver for the paravirtualized IOMMU | ||
4 | * | ||
5 | * Copyright (C) 2018 Arm Limited | ||
6 | */ | ||
7 | |||
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
9 | |||
10 | #include <linux/amba/bus.h> | ||
11 | #include <linux/delay.h> | ||
12 | #include <linux/dma-iommu.h> | ||
13 | #include <linux/freezer.h> | ||
14 | #include <linux/interval_tree.h> | ||
15 | #include <linux/iommu.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/of_iommu.h> | ||
18 | #include <linux/of_platform.h> | ||
19 | #include <linux/pci.h> | ||
20 | #include <linux/platform_device.h> | ||
21 | #include <linux/virtio.h> | ||
22 | #include <linux/virtio_config.h> | ||
23 | #include <linux/virtio_ids.h> | ||
24 | #include <linux/wait.h> | ||
25 | |||
26 | #include <uapi/linux/virtio_iommu.h> | ||
27 | |||
28 | #define MSI_IOVA_BASE 0x8000000 | ||
29 | #define MSI_IOVA_LENGTH 0x100000 | ||
30 | |||
31 | #define VIOMMU_REQUEST_VQ 0 | ||
32 | #define VIOMMU_EVENT_VQ 1 | ||
33 | #define VIOMMU_NR_VQS 2 | ||
34 | |||
35 | struct viommu_dev { | ||
36 | struct iommu_device iommu; | ||
37 | struct device *dev; | ||
38 | struct virtio_device *vdev; | ||
39 | |||
40 | struct ida domain_ids; | ||
41 | |||
42 | struct virtqueue *vqs[VIOMMU_NR_VQS]; | ||
43 | spinlock_t request_lock; | ||
44 | struct list_head requests; | ||
45 | void *evts; | ||
46 | |||
47 | /* Device configuration */ | ||
48 | struct iommu_domain_geometry geometry; | ||
49 | u64 pgsize_bitmap; | ||
50 | u8 domain_bits; | ||
51 | u32 probe_size; | ||
52 | }; | ||
53 | |||
54 | struct viommu_mapping { | ||
55 | phys_addr_t paddr; | ||
56 | struct interval_tree_node iova; | ||
57 | u32 flags; | ||
58 | }; | ||
59 | |||
60 | struct viommu_domain { | ||
61 | struct iommu_domain domain; | ||
62 | struct viommu_dev *viommu; | ||
63 | struct mutex mutex; /* protects viommu pointer */ | ||
64 | unsigned int id; | ||
65 | |||
66 | spinlock_t mappings_lock; | ||
67 | struct rb_root_cached mappings; | ||
68 | |||
69 | unsigned long nr_endpoints; | ||
70 | }; | ||
71 | |||
72 | struct viommu_endpoint { | ||
73 | struct device *dev; | ||
74 | struct viommu_dev *viommu; | ||
75 | struct viommu_domain *vdomain; | ||
76 | struct list_head resv_regions; | ||
77 | }; | ||
78 | |||
79 | struct viommu_request { | ||
80 | struct list_head list; | ||
81 | void *writeback; | ||
82 | unsigned int write_offset; | ||
83 | unsigned int len; | ||
84 | char buf[]; | ||
85 | }; | ||
86 | |||
87 | #define VIOMMU_FAULT_RESV_MASK 0xffffff00 | ||
88 | |||
89 | struct viommu_event { | ||
90 | union { | ||
91 | u32 head; | ||
92 | struct virtio_iommu_fault fault; | ||
93 | }; | ||
94 | }; | ||
95 | |||
96 | #define to_viommu_domain(domain) \ | ||
97 | container_of(domain, struct viommu_domain, domain) | ||
98 | |||
99 | static int viommu_get_req_errno(void *buf, size_t len) | ||
100 | { | ||
101 | struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail); | ||
102 | |||
103 | switch (tail->status) { | ||
104 | case VIRTIO_IOMMU_S_OK: | ||
105 | return 0; | ||
106 | case VIRTIO_IOMMU_S_UNSUPP: | ||
107 | return -ENOSYS; | ||
108 | case VIRTIO_IOMMU_S_INVAL: | ||
109 | return -EINVAL; | ||
110 | case VIRTIO_IOMMU_S_RANGE: | ||
111 | return -ERANGE; | ||
112 | case VIRTIO_IOMMU_S_NOENT: | ||
113 | return -ENOENT; | ||
114 | case VIRTIO_IOMMU_S_FAULT: | ||
115 | return -EFAULT; | ||
116 | case VIRTIO_IOMMU_S_IOERR: | ||
117 | case VIRTIO_IOMMU_S_DEVERR: | ||
118 | default: | ||
119 | return -EIO; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | static void viommu_set_req_status(void *buf, size_t len, int status) | ||
124 | { | ||
125 | struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail); | ||
126 | |||
127 | tail->status = status; | ||
128 | } | ||
129 | |||
130 | static off_t viommu_get_write_desc_offset(struct viommu_dev *viommu, | ||
131 | struct virtio_iommu_req_head *req, | ||
132 | size_t len) | ||
133 | { | ||
134 | size_t tail_size = sizeof(struct virtio_iommu_req_tail); | ||
135 | |||
136 | if (req->type == VIRTIO_IOMMU_T_PROBE) | ||
137 | return len - viommu->probe_size - tail_size; | ||
138 | |||
139 | return len - tail_size; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * __viommu_sync_req - Complete all in-flight requests | ||
144 | * | ||
145 | * Wait for all added requests to complete. When this function returns, all | ||
146 | * requests that were in-flight at the time of the call have completed. | ||
147 | */ | ||
148 | static int __viommu_sync_req(struct viommu_dev *viommu) | ||
149 | { | ||
150 | int ret = 0; | ||
151 | unsigned int len; | ||
152 | size_t write_len; | ||
153 | struct viommu_request *req; | ||
154 | struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ]; | ||
155 | |||
156 | assert_spin_locked(&viommu->request_lock); | ||
157 | |||
158 | virtqueue_kick(vq); | ||
159 | |||
160 | while (!list_empty(&viommu->requests)) { | ||
161 | len = 0; | ||
162 | req = virtqueue_get_buf(vq, &len); | ||
163 | if (!req) | ||
164 | continue; | ||
165 | |||
166 | if (!len) | ||
167 | viommu_set_req_status(req->buf, req->len, | ||
168 | VIRTIO_IOMMU_S_IOERR); | ||
169 | |||
170 | write_len = req->len - req->write_offset; | ||
171 | if (req->writeback && len == write_len) | ||
172 | memcpy(req->writeback, req->buf + req->write_offset, | ||
173 | write_len); | ||
174 | |||
175 | list_del(&req->list); | ||
176 | kfree(req); | ||
177 | } | ||
178 | |||
179 | return ret; | ||
180 | } | ||
181 | |||
182 | static int viommu_sync_req(struct viommu_dev *viommu) | ||
183 | { | ||
184 | int ret; | ||
185 | unsigned long flags; | ||
186 | |||
187 | spin_lock_irqsave(&viommu->request_lock, flags); | ||
188 | ret = __viommu_sync_req(viommu); | ||
189 | if (ret) | ||
190 | dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret); | ||
191 | spin_unlock_irqrestore(&viommu->request_lock, flags); | ||
192 | |||
193 | return ret; | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * __viommu_add_request - Add one request to the queue | ||
198 | * @buf: pointer to the request buffer | ||
199 | * @len: length of the request buffer | ||
200 | * @writeback: copy data back to the buffer when the request completes. | ||
201 | * | ||
202 | * Add a request to the queue. Only synchronize the queue if it's already full. | ||
203 | * Otherwise don't kick the queue nor wait for requests to complete. | ||
204 | * | ||
205 | * When @writeback is true, data written by the device, including the request | ||
206 | * status, is copied into @buf after the request completes. This is unsafe if | ||
207 | * the caller allocates @buf on stack and drops the lock between add_req() and | ||
208 | * sync_req(). | ||
209 | * | ||
210 | * Return 0 if the request was successfully added to the queue. | ||
211 | */ | ||
212 | static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len, | ||
213 | bool writeback) | ||
214 | { | ||
215 | int ret; | ||
216 | off_t write_offset; | ||
217 | struct viommu_request *req; | ||
218 | struct scatterlist top_sg, bottom_sg; | ||
219 | struct scatterlist *sg[2] = { &top_sg, &bottom_sg }; | ||
220 | struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ]; | ||
221 | |||
222 | assert_spin_locked(&viommu->request_lock); | ||
223 | |||
224 | write_offset = viommu_get_write_desc_offset(viommu, buf, len); | ||
225 | if (write_offset <= 0) | ||
226 | return -EINVAL; | ||
227 | |||
228 | req = kzalloc(sizeof(*req) + len, GFP_ATOMIC); | ||
229 | if (!req) | ||
230 | return -ENOMEM; | ||
231 | |||
232 | req->len = len; | ||
233 | if (writeback) { | ||
234 | req->writeback = buf + write_offset; | ||
235 | req->write_offset = write_offset; | ||
236 | } | ||
237 | memcpy(&req->buf, buf, write_offset); | ||
238 | |||
239 | sg_init_one(&top_sg, req->buf, write_offset); | ||
240 | sg_init_one(&bottom_sg, req->buf + write_offset, len - write_offset); | ||
241 | |||
242 | ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC); | ||
243 | if (ret == -ENOSPC) { | ||
244 | /* If the queue is full, sync and retry */ | ||
245 | if (!__viommu_sync_req(viommu)) | ||
246 | ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC); | ||
247 | } | ||
248 | if (ret) | ||
249 | goto err_free; | ||
250 | |||
251 | list_add_tail(&req->list, &viommu->requests); | ||
252 | return 0; | ||
253 | |||
254 | err_free: | ||
255 | kfree(req); | ||
256 | return ret; | ||
257 | } | ||
258 | |||
259 | static int viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len) | ||
260 | { | ||
261 | int ret; | ||
262 | unsigned long flags; | ||
263 | |||
264 | spin_lock_irqsave(&viommu->request_lock, flags); | ||
265 | ret = __viommu_add_req(viommu, buf, len, false); | ||
266 | if (ret) | ||
267 | dev_dbg(viommu->dev, "could not add request: %d\n", ret); | ||
268 | spin_unlock_irqrestore(&viommu->request_lock, flags); | ||
269 | |||
270 | return ret; | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * Send a request and wait for it to complete. Return the request status (as an | ||
275 | * errno) | ||
276 | */ | ||
277 | static int viommu_send_req_sync(struct viommu_dev *viommu, void *buf, | ||
278 | size_t len) | ||
279 | { | ||
280 | int ret; | ||
281 | unsigned long flags; | ||
282 | |||
283 | spin_lock_irqsave(&viommu->request_lock, flags); | ||
284 | |||
285 | ret = __viommu_add_req(viommu, buf, len, true); | ||
286 | if (ret) { | ||
287 | dev_dbg(viommu->dev, "could not add request (%d)\n", ret); | ||
288 | goto out_unlock; | ||
289 | } | ||
290 | |||
291 | ret = __viommu_sync_req(viommu); | ||
292 | if (ret) { | ||
293 | dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret); | ||
294 | /* Fall-through (get the actual request status) */ | ||
295 | } | ||
296 | |||
297 | ret = viommu_get_req_errno(buf, len); | ||
298 | out_unlock: | ||
299 | spin_unlock_irqrestore(&viommu->request_lock, flags); | ||
300 | return ret; | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * viommu_add_mapping - add a mapping to the internal tree | ||
305 | * | ||
306 | * On success, return the new mapping. Otherwise return NULL. | ||
307 | */ | ||
308 | static int viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova, | ||
309 | phys_addr_t paddr, size_t size, u32 flags) | ||
310 | { | ||
311 | unsigned long irqflags; | ||
312 | struct viommu_mapping *mapping; | ||
313 | |||
314 | mapping = kzalloc(sizeof(*mapping), GFP_ATOMIC); | ||
315 | if (!mapping) | ||
316 | return -ENOMEM; | ||
317 | |||
318 | mapping->paddr = paddr; | ||
319 | mapping->iova.start = iova; | ||
320 | mapping->iova.last = iova + size - 1; | ||
321 | mapping->flags = flags; | ||
322 | |||
323 | spin_lock_irqsave(&vdomain->mappings_lock, irqflags); | ||
324 | interval_tree_insert(&mapping->iova, &vdomain->mappings); | ||
325 | spin_unlock_irqrestore(&vdomain->mappings_lock, irqflags); | ||
326 | |||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * viommu_del_mappings - remove mappings from the internal tree | ||
332 | * | ||
333 | * @vdomain: the domain | ||
334 | * @iova: start of the range | ||
335 | * @size: size of the range. A size of 0 corresponds to the entire address | ||
336 | * space. | ||
337 | * | ||
338 | * On success, returns the number of unmapped bytes (>= size) | ||
339 | */ | ||
340 | static size_t viommu_del_mappings(struct viommu_domain *vdomain, | ||
341 | unsigned long iova, size_t size) | ||
342 | { | ||
343 | size_t unmapped = 0; | ||
344 | unsigned long flags; | ||
345 | unsigned long last = iova + size - 1; | ||
346 | struct viommu_mapping *mapping = NULL; | ||
347 | struct interval_tree_node *node, *next; | ||
348 | |||
349 | spin_lock_irqsave(&vdomain->mappings_lock, flags); | ||
350 | next = interval_tree_iter_first(&vdomain->mappings, iova, last); | ||
351 | while (next) { | ||
352 | node = next; | ||
353 | mapping = container_of(node, struct viommu_mapping, iova); | ||
354 | next = interval_tree_iter_next(node, iova, last); | ||
355 | |||
356 | /* Trying to split a mapping? */ | ||
357 | if (mapping->iova.start < iova) | ||
358 | break; | ||
359 | |||
360 | /* | ||
361 | * Virtio-iommu doesn't allow UNMAP to split a mapping created | ||
362 | * with a single MAP request, so remove the full mapping. | ||
363 | */ | ||
364 | unmapped += mapping->iova.last - mapping->iova.start + 1; | ||
365 | |||
366 | interval_tree_remove(node, &vdomain->mappings); | ||
367 | kfree(mapping); | ||
368 | } | ||
369 | spin_unlock_irqrestore(&vdomain->mappings_lock, flags); | ||
370 | |||
371 | return unmapped; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * viommu_replay_mappings - re-send MAP requests | ||
376 | * | ||
377 | * When reattaching a domain that was previously detached from all endpoints, | ||
378 | * mappings were deleted from the device. Re-create the mappings available in | ||
379 | * the internal tree. | ||
380 | */ | ||
381 | static int viommu_replay_mappings(struct viommu_domain *vdomain) | ||
382 | { | ||
383 | int ret = 0; | ||
384 | unsigned long flags; | ||
385 | struct viommu_mapping *mapping; | ||
386 | struct interval_tree_node *node; | ||
387 | struct virtio_iommu_req_map map; | ||
388 | |||
389 | spin_lock_irqsave(&vdomain->mappings_lock, flags); | ||
390 | node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL); | ||
391 | while (node) { | ||
392 | mapping = container_of(node, struct viommu_mapping, iova); | ||
393 | map = (struct virtio_iommu_req_map) { | ||
394 | .head.type = VIRTIO_IOMMU_T_MAP, | ||
395 | .domain = cpu_to_le32(vdomain->id), | ||
396 | .virt_start = cpu_to_le64(mapping->iova.start), | ||
397 | .virt_end = cpu_to_le64(mapping->iova.last), | ||
398 | .phys_start = cpu_to_le64(mapping->paddr), | ||
399 | .flags = cpu_to_le32(mapping->flags), | ||
400 | }; | ||
401 | |||
402 | ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); | ||
403 | if (ret) | ||
404 | break; | ||
405 | |||
406 | node = interval_tree_iter_next(node, 0, -1UL); | ||
407 | } | ||
408 | spin_unlock_irqrestore(&vdomain->mappings_lock, flags); | ||
409 | |||
410 | return ret; | ||
411 | } | ||
412 | |||
413 | static int viommu_add_resv_mem(struct viommu_endpoint *vdev, | ||
414 | struct virtio_iommu_probe_resv_mem *mem, | ||
415 | size_t len) | ||
416 | { | ||
417 | size_t size; | ||
418 | u64 start64, end64; | ||
419 | phys_addr_t start, end; | ||
420 | struct iommu_resv_region *region = NULL; | ||
421 | unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; | ||
422 | |||
423 | start = start64 = le64_to_cpu(mem->start); | ||
424 | end = end64 = le64_to_cpu(mem->end); | ||
425 | size = end64 - start64 + 1; | ||
426 | |||
427 | /* Catch any overflow, including the unlikely end64 - start64 + 1 = 0 */ | ||
428 | if (start != start64 || end != end64 || size < end64 - start64) | ||
429 | return -EOVERFLOW; | ||
430 | |||
431 | if (len < sizeof(*mem)) | ||
432 | return -EINVAL; | ||
433 | |||
434 | switch (mem->subtype) { | ||
435 | default: | ||
436 | dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n", | ||
437 | mem->subtype); | ||
438 | /* Fall-through */ | ||
439 | case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: | ||
440 | region = iommu_alloc_resv_region(start, size, 0, | ||
441 | IOMMU_RESV_RESERVED); | ||
442 | break; | ||
443 | case VIRTIO_IOMMU_RESV_MEM_T_MSI: | ||
444 | region = iommu_alloc_resv_region(start, size, prot, | ||
445 | IOMMU_RESV_MSI); | ||
446 | break; | ||
447 | } | ||
448 | if (!region) | ||
449 | return -ENOMEM; | ||
450 | |||
451 | list_add(&vdev->resv_regions, ®ion->list); | ||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev) | ||
456 | { | ||
457 | int ret; | ||
458 | u16 type, len; | ||
459 | size_t cur = 0; | ||
460 | size_t probe_len; | ||
461 | struct virtio_iommu_req_probe *probe; | ||
462 | struct virtio_iommu_probe_property *prop; | ||
463 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); | ||
464 | struct viommu_endpoint *vdev = fwspec->iommu_priv; | ||
465 | |||
466 | if (!fwspec->num_ids) | ||
467 | return -EINVAL; | ||
468 | |||
469 | probe_len = sizeof(*probe) + viommu->probe_size + | ||
470 | sizeof(struct virtio_iommu_req_tail); | ||
471 | probe = kzalloc(probe_len, GFP_KERNEL); | ||
472 | if (!probe) | ||
473 | return -ENOMEM; | ||
474 | |||
475 | probe->head.type = VIRTIO_IOMMU_T_PROBE; | ||
476 | /* | ||
477 | * For now, assume that properties of an endpoint that outputs multiple | ||
478 | * IDs are consistent. Only probe the first one. | ||
479 | */ | ||
480 | probe->endpoint = cpu_to_le32(fwspec->ids[0]); | ||
481 | |||
482 | ret = viommu_send_req_sync(viommu, probe, probe_len); | ||
483 | if (ret) | ||
484 | goto out_free; | ||
485 | |||
486 | prop = (void *)probe->properties; | ||
487 | type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK; | ||
488 | |||
489 | while (type != VIRTIO_IOMMU_PROBE_T_NONE && | ||
490 | cur < viommu->probe_size) { | ||
491 | len = le16_to_cpu(prop->length) + sizeof(*prop); | ||
492 | |||
493 | switch (type) { | ||
494 | case VIRTIO_IOMMU_PROBE_T_RESV_MEM: | ||
495 | ret = viommu_add_resv_mem(vdev, (void *)prop, len); | ||
496 | break; | ||
497 | default: | ||
498 | dev_err(dev, "unknown viommu prop 0x%x\n", type); | ||
499 | } | ||
500 | |||
501 | if (ret) | ||
502 | dev_err(dev, "failed to parse viommu prop 0x%x\n", type); | ||
503 | |||
504 | cur += len; | ||
505 | if (cur >= viommu->probe_size) | ||
506 | break; | ||
507 | |||
508 | prop = (void *)probe->properties + cur; | ||
509 | type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK; | ||
510 | } | ||
511 | |||
512 | out_free: | ||
513 | kfree(probe); | ||
514 | return ret; | ||
515 | } | ||
516 | |||
517 | static int viommu_fault_handler(struct viommu_dev *viommu, | ||
518 | struct virtio_iommu_fault *fault) | ||
519 | { | ||
520 | char *reason_str; | ||
521 | |||
522 | u8 reason = fault->reason; | ||
523 | u32 flags = le32_to_cpu(fault->flags); | ||
524 | u32 endpoint = le32_to_cpu(fault->endpoint); | ||
525 | u64 address = le64_to_cpu(fault->address); | ||
526 | |||
527 | switch (reason) { | ||
528 | case VIRTIO_IOMMU_FAULT_R_DOMAIN: | ||
529 | reason_str = "domain"; | ||
530 | break; | ||
531 | case VIRTIO_IOMMU_FAULT_R_MAPPING: | ||
532 | reason_str = "page"; | ||
533 | break; | ||
534 | case VIRTIO_IOMMU_FAULT_R_UNKNOWN: | ||
535 | default: | ||
536 | reason_str = "unknown"; | ||
537 | break; | ||
538 | } | ||
539 | |||
540 | /* TODO: find EP by ID and report_iommu_fault */ | ||
541 | if (flags & VIRTIO_IOMMU_FAULT_F_ADDRESS) | ||
542 | dev_err_ratelimited(viommu->dev, "%s fault from EP %u at %#llx [%s%s%s]\n", | ||
543 | reason_str, endpoint, address, | ||
544 | flags & VIRTIO_IOMMU_FAULT_F_READ ? "R" : "", | ||
545 | flags & VIRTIO_IOMMU_FAULT_F_WRITE ? "W" : "", | ||
546 | flags & VIRTIO_IOMMU_FAULT_F_EXEC ? "X" : ""); | ||
547 | else | ||
548 | dev_err_ratelimited(viommu->dev, "%s fault from EP %u\n", | ||
549 | reason_str, endpoint); | ||
550 | return 0; | ||
551 | } | ||
552 | |||
553 | static void viommu_event_handler(struct virtqueue *vq) | ||
554 | { | ||
555 | int ret; | ||
556 | unsigned int len; | ||
557 | struct scatterlist sg[1]; | ||
558 | struct viommu_event *evt; | ||
559 | struct viommu_dev *viommu = vq->vdev->priv; | ||
560 | |||
561 | while ((evt = virtqueue_get_buf(vq, &len)) != NULL) { | ||
562 | if (len > sizeof(*evt)) { | ||
563 | dev_err(viommu->dev, | ||
564 | "invalid event buffer (len %u != %zu)\n", | ||
565 | len, sizeof(*evt)); | ||
566 | } else if (!(evt->head & VIOMMU_FAULT_RESV_MASK)) { | ||
567 | viommu_fault_handler(viommu, &evt->fault); | ||
568 | } | ||
569 | |||
570 | sg_init_one(sg, evt, sizeof(*evt)); | ||
571 | ret = virtqueue_add_inbuf(vq, sg, 1, evt, GFP_ATOMIC); | ||
572 | if (ret) | ||
573 | dev_err(viommu->dev, "could not add event buffer\n"); | ||
574 | } | ||
575 | |||
576 | virtqueue_kick(vq); | ||
577 | } | ||
578 | |||
579 | /* IOMMU API */ | ||
580 | |||
581 | static struct iommu_domain *viommu_domain_alloc(unsigned type) | ||
582 | { | ||
583 | struct viommu_domain *vdomain; | ||
584 | |||
585 | if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) | ||
586 | return NULL; | ||
587 | |||
588 | vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL); | ||
589 | if (!vdomain) | ||
590 | return NULL; | ||
591 | |||
592 | mutex_init(&vdomain->mutex); | ||
593 | spin_lock_init(&vdomain->mappings_lock); | ||
594 | vdomain->mappings = RB_ROOT_CACHED; | ||
595 | |||
596 | if (type == IOMMU_DOMAIN_DMA && | ||
597 | iommu_get_dma_cookie(&vdomain->domain)) { | ||
598 | kfree(vdomain); | ||
599 | return NULL; | ||
600 | } | ||
601 | |||
602 | return &vdomain->domain; | ||
603 | } | ||
604 | |||
605 | static int viommu_domain_finalise(struct viommu_dev *viommu, | ||
606 | struct iommu_domain *domain) | ||
607 | { | ||
608 | int ret; | ||
609 | struct viommu_domain *vdomain = to_viommu_domain(domain); | ||
610 | unsigned int max_domain = viommu->domain_bits > 31 ? ~0 : | ||
611 | (1U << viommu->domain_bits) - 1; | ||
612 | |||
613 | vdomain->viommu = viommu; | ||
614 | |||
615 | domain->pgsize_bitmap = viommu->pgsize_bitmap; | ||
616 | domain->geometry = viommu->geometry; | ||
617 | |||
618 | ret = ida_alloc_max(&viommu->domain_ids, max_domain, GFP_KERNEL); | ||
619 | if (ret >= 0) | ||
620 | vdomain->id = (unsigned int)ret; | ||
621 | |||
622 | return ret > 0 ? 0 : ret; | ||
623 | } | ||
624 | |||
625 | static void viommu_domain_free(struct iommu_domain *domain) | ||
626 | { | ||
627 | struct viommu_domain *vdomain = to_viommu_domain(domain); | ||
628 | |||
629 | iommu_put_dma_cookie(domain); | ||
630 | |||
631 | /* Free all remaining mappings (size 2^64) */ | ||
632 | viommu_del_mappings(vdomain, 0, 0); | ||
633 | |||
634 | if (vdomain->viommu) | ||
635 | ida_free(&vdomain->viommu->domain_ids, vdomain->id); | ||
636 | |||
637 | kfree(vdomain); | ||
638 | } | ||
639 | |||
640 | static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) | ||
641 | { | ||
642 | int i; | ||
643 | int ret = 0; | ||
644 | struct virtio_iommu_req_attach req; | ||
645 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); | ||
646 | struct viommu_endpoint *vdev = fwspec->iommu_priv; | ||
647 | struct viommu_domain *vdomain = to_viommu_domain(domain); | ||
648 | |||
649 | mutex_lock(&vdomain->mutex); | ||
650 | if (!vdomain->viommu) { | ||
651 | /* | ||
652 | * Properly initialize the domain now that we know which viommu | ||
653 | * owns it. | ||
654 | */ | ||
655 | ret = viommu_domain_finalise(vdev->viommu, domain); | ||
656 | } else if (vdomain->viommu != vdev->viommu) { | ||
657 | dev_err(dev, "cannot attach to foreign vIOMMU\n"); | ||
658 | ret = -EXDEV; | ||
659 | } | ||
660 | mutex_unlock(&vdomain->mutex); | ||
661 | |||
662 | if (ret) | ||
663 | return ret; | ||
664 | |||
665 | /* | ||
666 | * In the virtio-iommu device, when attaching the endpoint to a new | ||
667 | * domain, it is detached from the old one and, if as as a result the | ||
668 | * old domain isn't attached to any endpoint, all mappings are removed | ||
669 | * from the old domain and it is freed. | ||
670 | * | ||
671 | * In the driver the old domain still exists, and its mappings will be | ||
672 | * recreated if it gets reattached to an endpoint. Otherwise it will be | ||
673 | * freed explicitly. | ||
674 | * | ||
675 | * vdev->vdomain is protected by group->mutex | ||
676 | */ | ||
677 | if (vdev->vdomain) | ||
678 | vdev->vdomain->nr_endpoints--; | ||
679 | |||
680 | req = (struct virtio_iommu_req_attach) { | ||
681 | .head.type = VIRTIO_IOMMU_T_ATTACH, | ||
682 | .domain = cpu_to_le32(vdomain->id), | ||
683 | }; | ||
684 | |||
685 | for (i = 0; i < fwspec->num_ids; i++) { | ||
686 | req.endpoint = cpu_to_le32(fwspec->ids[i]); | ||
687 | |||
688 | ret = viommu_send_req_sync(vdomain->viommu, &req, sizeof(req)); | ||
689 | if (ret) | ||
690 | return ret; | ||
691 | } | ||
692 | |||
693 | if (!vdomain->nr_endpoints) { | ||
694 | /* | ||
695 | * This endpoint is the first to be attached to the domain. | ||
696 | * Replay existing mappings (e.g. SW MSI). | ||
697 | */ | ||
698 | ret = viommu_replay_mappings(vdomain); | ||
699 | if (ret) | ||
700 | return ret; | ||
701 | } | ||
702 | |||
703 | vdomain->nr_endpoints++; | ||
704 | vdev->vdomain = vdomain; | ||
705 | |||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | static int viommu_map(struct iommu_domain *domain, unsigned long iova, | ||
710 | phys_addr_t paddr, size_t size, int prot) | ||
711 | { | ||
712 | int ret; | ||
713 | int flags; | ||
714 | struct virtio_iommu_req_map map; | ||
715 | struct viommu_domain *vdomain = to_viommu_domain(domain); | ||
716 | |||
717 | flags = (prot & IOMMU_READ ? VIRTIO_IOMMU_MAP_F_READ : 0) | | ||
718 | (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0) | | ||
719 | (prot & IOMMU_MMIO ? VIRTIO_IOMMU_MAP_F_MMIO : 0); | ||
720 | |||
721 | ret = viommu_add_mapping(vdomain, iova, paddr, size, flags); | ||
722 | if (ret) | ||
723 | return ret; | ||
724 | |||
725 | map = (struct virtio_iommu_req_map) { | ||
726 | .head.type = VIRTIO_IOMMU_T_MAP, | ||
727 | .domain = cpu_to_le32(vdomain->id), | ||
728 | .virt_start = cpu_to_le64(iova), | ||
729 | .phys_start = cpu_to_le64(paddr), | ||
730 | .virt_end = cpu_to_le64(iova + size - 1), | ||
731 | .flags = cpu_to_le32(flags), | ||
732 | }; | ||
733 | |||
734 | if (!vdomain->nr_endpoints) | ||
735 | return 0; | ||
736 | |||
737 | ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); | ||
738 | if (ret) | ||
739 | viommu_del_mappings(vdomain, iova, size); | ||
740 | |||
741 | return ret; | ||
742 | } | ||
743 | |||
744 | static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova, | ||
745 | size_t size) | ||
746 | { | ||
747 | int ret = 0; | ||
748 | size_t unmapped; | ||
749 | struct virtio_iommu_req_unmap unmap; | ||
750 | struct viommu_domain *vdomain = to_viommu_domain(domain); | ||
751 | |||
752 | unmapped = viommu_del_mappings(vdomain, iova, size); | ||
753 | if (unmapped < size) | ||
754 | return 0; | ||
755 | |||
756 | /* Device already removed all mappings after detach. */ | ||
757 | if (!vdomain->nr_endpoints) | ||
758 | return unmapped; | ||
759 | |||
760 | unmap = (struct virtio_iommu_req_unmap) { | ||
761 | .head.type = VIRTIO_IOMMU_T_UNMAP, | ||
762 | .domain = cpu_to_le32(vdomain->id), | ||
763 | .virt_start = cpu_to_le64(iova), | ||
764 | .virt_end = cpu_to_le64(iova + unmapped - 1), | ||
765 | }; | ||
766 | |||
767 | ret = viommu_add_req(vdomain->viommu, &unmap, sizeof(unmap)); | ||
768 | return ret ? 0 : unmapped; | ||
769 | } | ||
770 | |||
771 | static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain, | ||
772 | dma_addr_t iova) | ||
773 | { | ||
774 | u64 paddr = 0; | ||
775 | unsigned long flags; | ||
776 | struct viommu_mapping *mapping; | ||
777 | struct interval_tree_node *node; | ||
778 | struct viommu_domain *vdomain = to_viommu_domain(domain); | ||
779 | |||
780 | spin_lock_irqsave(&vdomain->mappings_lock, flags); | ||
781 | node = interval_tree_iter_first(&vdomain->mappings, iova, iova); | ||
782 | if (node) { | ||
783 | mapping = container_of(node, struct viommu_mapping, iova); | ||
784 | paddr = mapping->paddr + (iova - mapping->iova.start); | ||
785 | } | ||
786 | spin_unlock_irqrestore(&vdomain->mappings_lock, flags); | ||
787 | |||
788 | return paddr; | ||
789 | } | ||
790 | |||
791 | static void viommu_iotlb_sync(struct iommu_domain *domain) | ||
792 | { | ||
793 | struct viommu_domain *vdomain = to_viommu_domain(domain); | ||
794 | |||
795 | viommu_sync_req(vdomain->viommu); | ||
796 | } | ||
797 | |||
798 | static void viommu_get_resv_regions(struct device *dev, struct list_head *head) | ||
799 | { | ||
800 | struct iommu_resv_region *entry, *new_entry, *msi = NULL; | ||
801 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); | ||
802 | struct viommu_endpoint *vdev = fwspec->iommu_priv; | ||
803 | int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; | ||
804 | |||
805 | list_for_each_entry(entry, &vdev->resv_regions, list) { | ||
806 | if (entry->type == IOMMU_RESV_MSI) | ||
807 | msi = entry; | ||
808 | |||
809 | new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL); | ||
810 | if (!new_entry) | ||
811 | return; | ||
812 | list_add_tail(&new_entry->list, head); | ||
813 | } | ||
814 | |||
815 | /* | ||
816 | * If the device didn't register any bypass MSI window, add a | ||
817 | * software-mapped region. | ||
818 | */ | ||
819 | if (!msi) { | ||
820 | msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, | ||
821 | prot, IOMMU_RESV_SW_MSI); | ||
822 | if (!msi) | ||
823 | return; | ||
824 | |||
825 | list_add_tail(&msi->list, head); | ||
826 | } | ||
827 | |||
828 | iommu_dma_get_resv_regions(dev, head); | ||
829 | } | ||
830 | |||
831 | static void viommu_put_resv_regions(struct device *dev, struct list_head *head) | ||
832 | { | ||
833 | struct iommu_resv_region *entry, *next; | ||
834 | |||
835 | list_for_each_entry_safe(entry, next, head, list) | ||
836 | kfree(entry); | ||
837 | } | ||
838 | |||
839 | static struct iommu_ops viommu_ops; | ||
840 | static struct virtio_driver virtio_iommu_drv; | ||
841 | |||
842 | static int viommu_match_node(struct device *dev, const void *data) | ||
843 | { | ||
844 | return dev->parent->fwnode == data; | ||
845 | } | ||
846 | |||
847 | static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode) | ||
848 | { | ||
849 | struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL, | ||
850 | fwnode, viommu_match_node); | ||
851 | put_device(dev); | ||
852 | |||
853 | return dev ? dev_to_virtio(dev)->priv : NULL; | ||
854 | } | ||
855 | |||
856 | static int viommu_add_device(struct device *dev) | ||
857 | { | ||
858 | int ret; | ||
859 | struct iommu_group *group; | ||
860 | struct viommu_endpoint *vdev; | ||
861 | struct viommu_dev *viommu = NULL; | ||
862 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); | ||
863 | |||
864 | if (!fwspec || fwspec->ops != &viommu_ops) | ||
865 | return -ENODEV; | ||
866 | |||
867 | viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode); | ||
868 | if (!viommu) | ||
869 | return -ENODEV; | ||
870 | |||
871 | vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); | ||
872 | if (!vdev) | ||
873 | return -ENOMEM; | ||
874 | |||
875 | vdev->dev = dev; | ||
876 | vdev->viommu = viommu; | ||
877 | INIT_LIST_HEAD(&vdev->resv_regions); | ||
878 | fwspec->iommu_priv = vdev; | ||
879 | |||
880 | if (viommu->probe_size) { | ||
881 | /* Get additional information for this endpoint */ | ||
882 | ret = viommu_probe_endpoint(viommu, dev); | ||
883 | if (ret) | ||
884 | goto err_free_dev; | ||
885 | } | ||
886 | |||
887 | ret = iommu_device_link(&viommu->iommu, dev); | ||
888 | if (ret) | ||
889 | goto err_free_dev; | ||
890 | |||
891 | /* | ||
892 | * Last step creates a default domain and attaches to it. Everything | ||
893 | * must be ready. | ||
894 | */ | ||
895 | group = iommu_group_get_for_dev(dev); | ||
896 | if (IS_ERR(group)) { | ||
897 | ret = PTR_ERR(group); | ||
898 | goto err_unlink_dev; | ||
899 | } | ||
900 | |||
901 | iommu_group_put(group); | ||
902 | |||
903 | return PTR_ERR_OR_ZERO(group); | ||
904 | |||
905 | err_unlink_dev: | ||
906 | iommu_device_unlink(&viommu->iommu, dev); | ||
907 | err_free_dev: | ||
908 | viommu_put_resv_regions(dev, &vdev->resv_regions); | ||
909 | kfree(vdev); | ||
910 | |||
911 | return ret; | ||
912 | } | ||
913 | |||
914 | static void viommu_remove_device(struct device *dev) | ||
915 | { | ||
916 | struct viommu_endpoint *vdev; | ||
917 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); | ||
918 | |||
919 | if (!fwspec || fwspec->ops != &viommu_ops) | ||
920 | return; | ||
921 | |||
922 | vdev = fwspec->iommu_priv; | ||
923 | |||
924 | iommu_group_remove_device(dev); | ||
925 | iommu_device_unlink(&vdev->viommu->iommu, dev); | ||
926 | viommu_put_resv_regions(dev, &vdev->resv_regions); | ||
927 | kfree(vdev); | ||
928 | } | ||
929 | |||
930 | static struct iommu_group *viommu_device_group(struct device *dev) | ||
931 | { | ||
932 | if (dev_is_pci(dev)) | ||
933 | return pci_device_group(dev); | ||
934 | else | ||
935 | return generic_device_group(dev); | ||
936 | } | ||
937 | |||
938 | static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args) | ||
939 | { | ||
940 | return iommu_fwspec_add_ids(dev, args->args, 1); | ||
941 | } | ||
942 | |||
943 | static struct iommu_ops viommu_ops = { | ||
944 | .domain_alloc = viommu_domain_alloc, | ||
945 | .domain_free = viommu_domain_free, | ||
946 | .attach_dev = viommu_attach_dev, | ||
947 | .map = viommu_map, | ||
948 | .unmap = viommu_unmap, | ||
949 | .iova_to_phys = viommu_iova_to_phys, | ||
950 | .iotlb_sync = viommu_iotlb_sync, | ||
951 | .add_device = viommu_add_device, | ||
952 | .remove_device = viommu_remove_device, | ||
953 | .device_group = viommu_device_group, | ||
954 | .get_resv_regions = viommu_get_resv_regions, | ||
955 | .put_resv_regions = viommu_put_resv_regions, | ||
956 | .of_xlate = viommu_of_xlate, | ||
957 | }; | ||
958 | |||
959 | static int viommu_init_vqs(struct viommu_dev *viommu) | ||
960 | { | ||
961 | struct virtio_device *vdev = dev_to_virtio(viommu->dev); | ||
962 | const char *names[] = { "request", "event" }; | ||
963 | vq_callback_t *callbacks[] = { | ||
964 | NULL, /* No async requests */ | ||
965 | viommu_event_handler, | ||
966 | }; | ||
967 | |||
968 | return virtio_find_vqs(vdev, VIOMMU_NR_VQS, viommu->vqs, callbacks, | ||
969 | names, NULL); | ||
970 | } | ||
971 | |||
972 | static int viommu_fill_evtq(struct viommu_dev *viommu) | ||
973 | { | ||
974 | int i, ret; | ||
975 | struct scatterlist sg[1]; | ||
976 | struct viommu_event *evts; | ||
977 | struct virtqueue *vq = viommu->vqs[VIOMMU_EVENT_VQ]; | ||
978 | size_t nr_evts = vq->num_free; | ||
979 | |||
980 | viommu->evts = evts = devm_kmalloc_array(viommu->dev, nr_evts, | ||
981 | sizeof(*evts), GFP_KERNEL); | ||
982 | if (!evts) | ||
983 | return -ENOMEM; | ||
984 | |||
985 | for (i = 0; i < nr_evts; i++) { | ||
986 | sg_init_one(sg, &evts[i], sizeof(*evts)); | ||
987 | ret = virtqueue_add_inbuf(vq, sg, 1, &evts[i], GFP_KERNEL); | ||
988 | if (ret) | ||
989 | return ret; | ||
990 | } | ||
991 | |||
992 | return 0; | ||
993 | } | ||
994 | |||
995 | static int viommu_probe(struct virtio_device *vdev) | ||
996 | { | ||
997 | struct device *parent_dev = vdev->dev.parent; | ||
998 | struct viommu_dev *viommu = NULL; | ||
999 | struct device *dev = &vdev->dev; | ||
1000 | u64 input_start = 0; | ||
1001 | u64 input_end = -1UL; | ||
1002 | int ret; | ||
1003 | |||
1004 | if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || | ||
1005 | !virtio_has_feature(vdev, VIRTIO_IOMMU_F_MAP_UNMAP)) | ||
1006 | return -ENODEV; | ||
1007 | |||
1008 | viommu = devm_kzalloc(dev, sizeof(*viommu), GFP_KERNEL); | ||
1009 | if (!viommu) | ||
1010 | return -ENOMEM; | ||
1011 | |||
1012 | spin_lock_init(&viommu->request_lock); | ||
1013 | ida_init(&viommu->domain_ids); | ||
1014 | viommu->dev = dev; | ||
1015 | viommu->vdev = vdev; | ||
1016 | INIT_LIST_HEAD(&viommu->requests); | ||
1017 | |||
1018 | ret = viommu_init_vqs(viommu); | ||
1019 | if (ret) | ||
1020 | return ret; | ||
1021 | |||
1022 | virtio_cread(vdev, struct virtio_iommu_config, page_size_mask, | ||
1023 | &viommu->pgsize_bitmap); | ||
1024 | |||
1025 | if (!viommu->pgsize_bitmap) { | ||
1026 | ret = -EINVAL; | ||
1027 | goto err_free_vqs; | ||
1028 | } | ||
1029 | |||
1030 | viommu->domain_bits = 32; | ||
1031 | |||
1032 | /* Optional features */ | ||
1033 | virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, | ||
1034 | struct virtio_iommu_config, input_range.start, | ||
1035 | &input_start); | ||
1036 | |||
1037 | virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, | ||
1038 | struct virtio_iommu_config, input_range.end, | ||
1039 | &input_end); | ||
1040 | |||
1041 | virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_BITS, | ||
1042 | struct virtio_iommu_config, domain_bits, | ||
1043 | &viommu->domain_bits); | ||
1044 | |||
1045 | virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE, | ||
1046 | struct virtio_iommu_config, probe_size, | ||
1047 | &viommu->probe_size); | ||
1048 | |||
1049 | viommu->geometry = (struct iommu_domain_geometry) { | ||
1050 | .aperture_start = input_start, | ||
1051 | .aperture_end = input_end, | ||
1052 | .force_aperture = true, | ||
1053 | }; | ||
1054 | |||
1055 | viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap; | ||
1056 | |||
1057 | virtio_device_ready(vdev); | ||
1058 | |||
1059 | /* Populate the event queue with buffers */ | ||
1060 | ret = viommu_fill_evtq(viommu); | ||
1061 | if (ret) | ||
1062 | goto err_free_vqs; | ||
1063 | |||
1064 | ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s", | ||
1065 | virtio_bus_name(vdev)); | ||
1066 | if (ret) | ||
1067 | goto err_free_vqs; | ||
1068 | |||
1069 | iommu_device_set_ops(&viommu->iommu, &viommu_ops); | ||
1070 | iommu_device_set_fwnode(&viommu->iommu, parent_dev->fwnode); | ||
1071 | |||
1072 | iommu_device_register(&viommu->iommu); | ||
1073 | |||
1074 | #ifdef CONFIG_PCI | ||
1075 | if (pci_bus_type.iommu_ops != &viommu_ops) { | ||
1076 | pci_request_acs(); | ||
1077 | ret = bus_set_iommu(&pci_bus_type, &viommu_ops); | ||
1078 | if (ret) | ||
1079 | goto err_unregister; | ||
1080 | } | ||
1081 | #endif | ||
1082 | #ifdef CONFIG_ARM_AMBA | ||
1083 | if (amba_bustype.iommu_ops != &viommu_ops) { | ||
1084 | ret = bus_set_iommu(&amba_bustype, &viommu_ops); | ||
1085 | if (ret) | ||
1086 | goto err_unregister; | ||
1087 | } | ||
1088 | #endif | ||
1089 | if (platform_bus_type.iommu_ops != &viommu_ops) { | ||
1090 | ret = bus_set_iommu(&platform_bus_type, &viommu_ops); | ||
1091 | if (ret) | ||
1092 | goto err_unregister; | ||
1093 | } | ||
1094 | |||
1095 | vdev->priv = viommu; | ||
1096 | |||
1097 | dev_info(dev, "input address: %u bits\n", | ||
1098 | order_base_2(viommu->geometry.aperture_end)); | ||
1099 | dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap); | ||
1100 | |||
1101 | return 0; | ||
1102 | |||
1103 | err_unregister: | ||
1104 | iommu_device_sysfs_remove(&viommu->iommu); | ||
1105 | iommu_device_unregister(&viommu->iommu); | ||
1106 | err_free_vqs: | ||
1107 | vdev->config->del_vqs(vdev); | ||
1108 | |||
1109 | return ret; | ||
1110 | } | ||
1111 | |||
1112 | static void viommu_remove(struct virtio_device *vdev) | ||
1113 | { | ||
1114 | struct viommu_dev *viommu = vdev->priv; | ||
1115 | |||
1116 | iommu_device_sysfs_remove(&viommu->iommu); | ||
1117 | iommu_device_unregister(&viommu->iommu); | ||
1118 | |||
1119 | /* Stop all virtqueues */ | ||
1120 | vdev->config->reset(vdev); | ||
1121 | vdev->config->del_vqs(vdev); | ||
1122 | |||
1123 | dev_info(&vdev->dev, "device removed\n"); | ||
1124 | } | ||
1125 | |||
1126 | static void viommu_config_changed(struct virtio_device *vdev) | ||
1127 | { | ||
1128 | dev_warn(&vdev->dev, "config changed\n"); | ||
1129 | } | ||
1130 | |||
1131 | static unsigned int features[] = { | ||
1132 | VIRTIO_IOMMU_F_MAP_UNMAP, | ||
1133 | VIRTIO_IOMMU_F_DOMAIN_BITS, | ||
1134 | VIRTIO_IOMMU_F_INPUT_RANGE, | ||
1135 | VIRTIO_IOMMU_F_PROBE, | ||
1136 | }; | ||
1137 | |||
1138 | static struct virtio_device_id id_table[] = { | ||
1139 | { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID }, | ||
1140 | { 0 }, | ||
1141 | }; | ||
1142 | |||
1143 | static struct virtio_driver virtio_iommu_drv = { | ||
1144 | .driver.name = KBUILD_MODNAME, | ||
1145 | .driver.owner = THIS_MODULE, | ||
1146 | .id_table = id_table, | ||
1147 | .feature_table = features, | ||
1148 | .feature_table_size = ARRAY_SIZE(features), | ||
1149 | .probe = viommu_probe, | ||
1150 | .remove = viommu_remove, | ||
1151 | .config_changed = viommu_config_changed, | ||
1152 | }; | ||
1153 | |||
1154 | module_virtio_driver(virtio_iommu_drv); | ||
1155 | |||
1156 | MODULE_DESCRIPTION("Virtio IOMMU driver"); | ||
1157 | MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>"); | ||
1158 | MODULE_LICENSE("GPL v2"); | ||
diff --git a/drivers/of/base.c b/drivers/of/base.c index 20e0e7ee4edf..55e7f5bb0549 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c | |||
@@ -2294,8 +2294,12 @@ int of_map_rid(struct device_node *np, u32 rid, | |||
2294 | return 0; | 2294 | return 0; |
2295 | } | 2295 | } |
2296 | 2296 | ||
2297 | pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n", | 2297 | pr_info("%pOF: no %s translation for rid 0x%x on %pOF\n", np, map_name, |
2298 | np, map_name, rid, target && *target ? *target : NULL); | 2298 | rid, target && *target ? *target : NULL); |
2299 | return -EFAULT; | 2299 | |
2300 | /* Bypasses translation */ | ||
2301 | if (id_out) | ||
2302 | *id_out = rid; | ||
2303 | return 0; | ||
2300 | } | 2304 | } |
2301 | EXPORT_SYMBOL_GPL(of_map_rid); | 2305 | EXPORT_SYMBOL_GPL(of_map_rid); |
diff --git a/drivers/pci/of.c b/drivers/pci/of.c index 73d5adec0a28..bc7b27a28795 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c | |||
@@ -22,12 +22,15 @@ void pci_set_of_node(struct pci_dev *dev) | |||
22 | return; | 22 | return; |
23 | dev->dev.of_node = of_pci_find_child_device(dev->bus->dev.of_node, | 23 | dev->dev.of_node = of_pci_find_child_device(dev->bus->dev.of_node, |
24 | dev->devfn); | 24 | dev->devfn); |
25 | if (dev->dev.of_node) | ||
26 | dev->dev.fwnode = &dev->dev.of_node->fwnode; | ||
25 | } | 27 | } |
26 | 28 | ||
27 | void pci_release_of_node(struct pci_dev *dev) | 29 | void pci_release_of_node(struct pci_dev *dev) |
28 | { | 30 | { |
29 | of_node_put(dev->dev.of_node); | 31 | of_node_put(dev->dev.of_node); |
30 | dev->dev.of_node = NULL; | 32 | dev->dev.of_node = NULL; |
33 | dev->dev.fwnode = NULL; | ||
31 | } | 34 | } |
32 | 35 | ||
33 | void pci_set_bus_of_node(struct pci_bus *bus) | 36 | void pci_set_bus_of_node(struct pci_bus *bus) |
@@ -41,13 +44,18 @@ void pci_set_bus_of_node(struct pci_bus *bus) | |||
41 | if (node && of_property_read_bool(node, "external-facing")) | 44 | if (node && of_property_read_bool(node, "external-facing")) |
42 | bus->self->untrusted = true; | 45 | bus->self->untrusted = true; |
43 | } | 46 | } |
47 | |||
44 | bus->dev.of_node = node; | 48 | bus->dev.of_node = node; |
49 | |||
50 | if (bus->dev.of_node) | ||
51 | bus->dev.fwnode = &bus->dev.of_node->fwnode; | ||
45 | } | 52 | } |
46 | 53 | ||
47 | void pci_release_bus_of_node(struct pci_bus *bus) | 54 | void pci_release_bus_of_node(struct pci_bus *bus) |
48 | { | 55 | { |
49 | of_node_put(bus->dev.of_node); | 56 | of_node_put(bus->dev.of_node); |
50 | bus->dev.of_node = NULL; | 57 | bus->dev.of_node = NULL; |
58 | bus->dev.fwnode = NULL; | ||
51 | } | 59 | } |
52 | 60 | ||
53 | struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus) | 61 | struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus) |
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 1705398b026a..297e1076e571 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c | |||
@@ -792,7 +792,7 @@ static int virtscsi_probe(struct virtio_device *vdev) | |||
792 | num_targets = virtscsi_config_get(vdev, max_target) + 1; | 792 | num_targets = virtscsi_config_get(vdev, max_target) + 1; |
793 | 793 | ||
794 | shost = scsi_host_alloc(&virtscsi_host_template, | 794 | shost = scsi_host_alloc(&virtscsi_host_template, |
795 | sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues); | 795 | struct_size(vscsi, req_vqs, num_queues)); |
796 | if (!shost) | 796 | if (!shost) |
797 | return -ENOMEM; | 797 | return -ENOMEM; |
798 | 798 | ||
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 247e5585af5d..1a2dd53caade 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -956,7 +956,7 @@ static void handle_tx(struct vhost_net *net) | |||
956 | if (!sock) | 956 | if (!sock) |
957 | goto out; | 957 | goto out; |
958 | 958 | ||
959 | if (!vq_iotlb_prefetch(vq)) | 959 | if (!vq_meta_prefetch(vq)) |
960 | goto out; | 960 | goto out; |
961 | 961 | ||
962 | vhost_disable_notify(&net->dev, vq); | 962 | vhost_disable_notify(&net->dev, vq); |
@@ -1125,7 +1125,7 @@ static void handle_rx(struct vhost_net *net) | |||
1125 | if (!sock) | 1125 | if (!sock) |
1126 | goto out; | 1126 | goto out; |
1127 | 1127 | ||
1128 | if (!vq_iotlb_prefetch(vq)) | 1128 | if (!vq_meta_prefetch(vq)) |
1129 | goto out; | 1129 | goto out; |
1130 | 1130 | ||
1131 | vhost_disable_notify(&net->dev, vq); | 1131 | vhost_disable_notify(&net->dev, vq); |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ff8892c38666..0536f8526359 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -298,6 +298,160 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) | |||
298 | __vhost_vq_meta_reset(d->vqs[i]); | 298 | __vhost_vq_meta_reset(d->vqs[i]); |
299 | } | 299 | } |
300 | 300 | ||
301 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
302 | static void vhost_map_unprefetch(struct vhost_map *map) | ||
303 | { | ||
304 | kfree(map->pages); | ||
305 | map->pages = NULL; | ||
306 | map->npages = 0; | ||
307 | map->addr = NULL; | ||
308 | } | ||
309 | |||
310 | static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) | ||
311 | { | ||
312 | struct vhost_map *map[VHOST_NUM_ADDRS]; | ||
313 | int i; | ||
314 | |||
315 | spin_lock(&vq->mmu_lock); | ||
316 | for (i = 0; i < VHOST_NUM_ADDRS; i++) { | ||
317 | map[i] = rcu_dereference_protected(vq->maps[i], | ||
318 | lockdep_is_held(&vq->mmu_lock)); | ||
319 | if (map[i]) | ||
320 | rcu_assign_pointer(vq->maps[i], NULL); | ||
321 | } | ||
322 | spin_unlock(&vq->mmu_lock); | ||
323 | |||
324 | synchronize_rcu(); | ||
325 | |||
326 | for (i = 0; i < VHOST_NUM_ADDRS; i++) | ||
327 | if (map[i]) | ||
328 | vhost_map_unprefetch(map[i]); | ||
329 | |||
330 | } | ||
331 | |||
332 | static void vhost_reset_vq_maps(struct vhost_virtqueue *vq) | ||
333 | { | ||
334 | int i; | ||
335 | |||
336 | vhost_uninit_vq_maps(vq); | ||
337 | for (i = 0; i < VHOST_NUM_ADDRS; i++) | ||
338 | vq->uaddrs[i].size = 0; | ||
339 | } | ||
340 | |||
341 | static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr, | ||
342 | unsigned long start, | ||
343 | unsigned long end) | ||
344 | { | ||
345 | if (unlikely(!uaddr->size)) | ||
346 | return false; | ||
347 | |||
348 | return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size); | ||
349 | } | ||
350 | |||
351 | static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, | ||
352 | int index, | ||
353 | unsigned long start, | ||
354 | unsigned long end) | ||
355 | { | ||
356 | struct vhost_uaddr *uaddr = &vq->uaddrs[index]; | ||
357 | struct vhost_map *map; | ||
358 | int i; | ||
359 | |||
360 | if (!vhost_map_range_overlap(uaddr, start, end)) | ||
361 | return; | ||
362 | |||
363 | spin_lock(&vq->mmu_lock); | ||
364 | ++vq->invalidate_count; | ||
365 | |||
366 | map = rcu_dereference_protected(vq->maps[index], | ||
367 | lockdep_is_held(&vq->mmu_lock)); | ||
368 | if (map) { | ||
369 | if (uaddr->write) { | ||
370 | for (i = 0; i < map->npages; i++) | ||
371 | set_page_dirty(map->pages[i]); | ||
372 | } | ||
373 | rcu_assign_pointer(vq->maps[index], NULL); | ||
374 | } | ||
375 | spin_unlock(&vq->mmu_lock); | ||
376 | |||
377 | if (map) { | ||
378 | synchronize_rcu(); | ||
379 | vhost_map_unprefetch(map); | ||
380 | } | ||
381 | } | ||
382 | |||
383 | static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq, | ||
384 | int index, | ||
385 | unsigned long start, | ||
386 | unsigned long end) | ||
387 | { | ||
388 | if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end)) | ||
389 | return; | ||
390 | |||
391 | spin_lock(&vq->mmu_lock); | ||
392 | --vq->invalidate_count; | ||
393 | spin_unlock(&vq->mmu_lock); | ||
394 | } | ||
395 | |||
396 | static int vhost_invalidate_range_start(struct mmu_notifier *mn, | ||
397 | const struct mmu_notifier_range *range) | ||
398 | { | ||
399 | struct vhost_dev *dev = container_of(mn, struct vhost_dev, | ||
400 | mmu_notifier); | ||
401 | int i, j; | ||
402 | |||
403 | if (!mmu_notifier_range_blockable(range)) | ||
404 | return -EAGAIN; | ||
405 | |||
406 | for (i = 0; i < dev->nvqs; i++) { | ||
407 | struct vhost_virtqueue *vq = dev->vqs[i]; | ||
408 | |||
409 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
410 | vhost_invalidate_vq_start(vq, j, | ||
411 | range->start, | ||
412 | range->end); | ||
413 | } | ||
414 | |||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | static void vhost_invalidate_range_end(struct mmu_notifier *mn, | ||
419 | const struct mmu_notifier_range *range) | ||
420 | { | ||
421 | struct vhost_dev *dev = container_of(mn, struct vhost_dev, | ||
422 | mmu_notifier); | ||
423 | int i, j; | ||
424 | |||
425 | for (i = 0; i < dev->nvqs; i++) { | ||
426 | struct vhost_virtqueue *vq = dev->vqs[i]; | ||
427 | |||
428 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
429 | vhost_invalidate_vq_end(vq, j, | ||
430 | range->start, | ||
431 | range->end); | ||
432 | } | ||
433 | } | ||
434 | |||
435 | static const struct mmu_notifier_ops vhost_mmu_notifier_ops = { | ||
436 | .invalidate_range_start = vhost_invalidate_range_start, | ||
437 | .invalidate_range_end = vhost_invalidate_range_end, | ||
438 | }; | ||
439 | |||
440 | static void vhost_init_maps(struct vhost_dev *dev) | ||
441 | { | ||
442 | struct vhost_virtqueue *vq; | ||
443 | int i, j; | ||
444 | |||
445 | dev->mmu_notifier.ops = &vhost_mmu_notifier_ops; | ||
446 | |||
447 | for (i = 0; i < dev->nvqs; ++i) { | ||
448 | vq = dev->vqs[i]; | ||
449 | for (j = 0; j < VHOST_NUM_ADDRS; j++) | ||
450 | RCU_INIT_POINTER(vq->maps[j], NULL); | ||
451 | } | ||
452 | } | ||
453 | #endif | ||
454 | |||
301 | static void vhost_vq_reset(struct vhost_dev *dev, | 455 | static void vhost_vq_reset(struct vhost_dev *dev, |
302 | struct vhost_virtqueue *vq) | 456 | struct vhost_virtqueue *vq) |
303 | { | 457 | { |
@@ -326,7 +480,11 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
326 | vq->busyloop_timeout = 0; | 480 | vq->busyloop_timeout = 0; |
327 | vq->umem = NULL; | 481 | vq->umem = NULL; |
328 | vq->iotlb = NULL; | 482 | vq->iotlb = NULL; |
483 | vq->invalidate_count = 0; | ||
329 | __vhost_vq_meta_reset(vq); | 484 | __vhost_vq_meta_reset(vq); |
485 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
486 | vhost_reset_vq_maps(vq); | ||
487 | #endif | ||
330 | } | 488 | } |
331 | 489 | ||
332 | static int vhost_worker(void *data) | 490 | static int vhost_worker(void *data) |
@@ -427,6 +585,32 @@ bool vhost_exceeds_weight(struct vhost_virtqueue *vq, | |||
427 | } | 585 | } |
428 | EXPORT_SYMBOL_GPL(vhost_exceeds_weight); | 586 | EXPORT_SYMBOL_GPL(vhost_exceeds_weight); |
429 | 587 | ||
588 | static size_t vhost_get_avail_size(struct vhost_virtqueue *vq, | ||
589 | unsigned int num) | ||
590 | { | ||
591 | size_t event __maybe_unused = | ||
592 | vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | ||
593 | |||
594 | return sizeof(*vq->avail) + | ||
595 | sizeof(*vq->avail->ring) * num + event; | ||
596 | } | ||
597 | |||
598 | static size_t vhost_get_used_size(struct vhost_virtqueue *vq, | ||
599 | unsigned int num) | ||
600 | { | ||
601 | size_t event __maybe_unused = | ||
602 | vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | ||
603 | |||
604 | return sizeof(*vq->used) + | ||
605 | sizeof(*vq->used->ring) * num + event; | ||
606 | } | ||
607 | |||
608 | static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, | ||
609 | unsigned int num) | ||
610 | { | ||
611 | return sizeof(*vq->desc) * num; | ||
612 | } | ||
613 | |||
430 | void vhost_dev_init(struct vhost_dev *dev, | 614 | void vhost_dev_init(struct vhost_dev *dev, |
431 | struct vhost_virtqueue **vqs, int nvqs, | 615 | struct vhost_virtqueue **vqs, int nvqs, |
432 | int iov_limit, int weight, int byte_weight) | 616 | int iov_limit, int weight, int byte_weight) |
@@ -450,7 +634,9 @@ void vhost_dev_init(struct vhost_dev *dev, | |||
450 | INIT_LIST_HEAD(&dev->read_list); | 634 | INIT_LIST_HEAD(&dev->read_list); |
451 | INIT_LIST_HEAD(&dev->pending_list); | 635 | INIT_LIST_HEAD(&dev->pending_list); |
452 | spin_lock_init(&dev->iotlb_lock); | 636 | spin_lock_init(&dev->iotlb_lock); |
453 | 637 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | |
638 | vhost_init_maps(dev); | ||
639 | #endif | ||
454 | 640 | ||
455 | for (i = 0; i < dev->nvqs; ++i) { | 641 | for (i = 0; i < dev->nvqs; ++i) { |
456 | vq = dev->vqs[i]; | 642 | vq = dev->vqs[i]; |
@@ -459,6 +645,7 @@ void vhost_dev_init(struct vhost_dev *dev, | |||
459 | vq->heads = NULL; | 645 | vq->heads = NULL; |
460 | vq->dev = dev; | 646 | vq->dev = dev; |
461 | mutex_init(&vq->mutex); | 647 | mutex_init(&vq->mutex); |
648 | spin_lock_init(&vq->mmu_lock); | ||
462 | vhost_vq_reset(dev, vq); | 649 | vhost_vq_reset(dev, vq); |
463 | if (vq->handle_kick) | 650 | if (vq->handle_kick) |
464 | vhost_poll_init(&vq->poll, vq->handle_kick, | 651 | vhost_poll_init(&vq->poll, vq->handle_kick, |
@@ -538,7 +725,18 @@ long vhost_dev_set_owner(struct vhost_dev *dev) | |||
538 | if (err) | 725 | if (err) |
539 | goto err_cgroup; | 726 | goto err_cgroup; |
540 | 727 | ||
728 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
729 | err = mmu_notifier_register(&dev->mmu_notifier, dev->mm); | ||
730 | if (err) | ||
731 | goto err_mmu_notifier; | ||
732 | #endif | ||
733 | |||
541 | return 0; | 734 | return 0; |
735 | |||
736 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
737 | err_mmu_notifier: | ||
738 | vhost_dev_free_iovecs(dev); | ||
739 | #endif | ||
542 | err_cgroup: | 740 | err_cgroup: |
543 | kthread_stop(worker); | 741 | kthread_stop(worker); |
544 | dev->worker = NULL; | 742 | dev->worker = NULL; |
@@ -629,6 +827,107 @@ static void vhost_clear_msg(struct vhost_dev *dev) | |||
629 | spin_unlock(&dev->iotlb_lock); | 827 | spin_unlock(&dev->iotlb_lock); |
630 | } | 828 | } |
631 | 829 | ||
830 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
831 | static void vhost_setup_uaddr(struct vhost_virtqueue *vq, | ||
832 | int index, unsigned long uaddr, | ||
833 | size_t size, bool write) | ||
834 | { | ||
835 | struct vhost_uaddr *addr = &vq->uaddrs[index]; | ||
836 | |||
837 | addr->uaddr = uaddr; | ||
838 | addr->size = size; | ||
839 | addr->write = write; | ||
840 | } | ||
841 | |||
842 | static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq) | ||
843 | { | ||
844 | vhost_setup_uaddr(vq, VHOST_ADDR_DESC, | ||
845 | (unsigned long)vq->desc, | ||
846 | vhost_get_desc_size(vq, vq->num), | ||
847 | false); | ||
848 | vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL, | ||
849 | (unsigned long)vq->avail, | ||
850 | vhost_get_avail_size(vq, vq->num), | ||
851 | false); | ||
852 | vhost_setup_uaddr(vq, VHOST_ADDR_USED, | ||
853 | (unsigned long)vq->used, | ||
854 | vhost_get_used_size(vq, vq->num), | ||
855 | true); | ||
856 | } | ||
857 | |||
858 | static int vhost_map_prefetch(struct vhost_virtqueue *vq, | ||
859 | int index) | ||
860 | { | ||
861 | struct vhost_map *map; | ||
862 | struct vhost_uaddr *uaddr = &vq->uaddrs[index]; | ||
863 | struct page **pages; | ||
864 | int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE); | ||
865 | int npinned; | ||
866 | void *vaddr, *v; | ||
867 | int err; | ||
868 | int i; | ||
869 | |||
870 | spin_lock(&vq->mmu_lock); | ||
871 | |||
872 | err = -EFAULT; | ||
873 | if (vq->invalidate_count) | ||
874 | goto err; | ||
875 | |||
876 | err = -ENOMEM; | ||
877 | map = kmalloc(sizeof(*map), GFP_ATOMIC); | ||
878 | if (!map) | ||
879 | goto err; | ||
880 | |||
881 | pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC); | ||
882 | if (!pages) | ||
883 | goto err_pages; | ||
884 | |||
885 | err = EFAULT; | ||
886 | npinned = __get_user_pages_fast(uaddr->uaddr, npages, | ||
887 | uaddr->write, pages); | ||
888 | if (npinned > 0) | ||
889 | release_pages(pages, npinned); | ||
890 | if (npinned != npages) | ||
891 | goto err_gup; | ||
892 | |||
893 | for (i = 0; i < npinned; i++) | ||
894 | if (PageHighMem(pages[i])) | ||
895 | goto err_gup; | ||
896 | |||
897 | vaddr = v = page_address(pages[0]); | ||
898 | |||
899 | /* For simplicity, fallback to userspace address if VA is not | ||
900 | * contigious. | ||
901 | */ | ||
902 | for (i = 1; i < npinned; i++) { | ||
903 | v += PAGE_SIZE; | ||
904 | if (v != page_address(pages[i])) | ||
905 | goto err_gup; | ||
906 | } | ||
907 | |||
908 | map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1)); | ||
909 | map->npages = npages; | ||
910 | map->pages = pages; | ||
911 | |||
912 | rcu_assign_pointer(vq->maps[index], map); | ||
913 | /* No need for a synchronize_rcu(). This function should be | ||
914 | * called by dev->worker so we are serialized with all | ||
915 | * readers. | ||
916 | */ | ||
917 | spin_unlock(&vq->mmu_lock); | ||
918 | |||
919 | return 0; | ||
920 | |||
921 | err_gup: | ||
922 | kfree(pages); | ||
923 | err_pages: | ||
924 | kfree(map); | ||
925 | err: | ||
926 | spin_unlock(&vq->mmu_lock); | ||
927 | return err; | ||
928 | } | ||
929 | #endif | ||
930 | |||
632 | void vhost_dev_cleanup(struct vhost_dev *dev) | 931 | void vhost_dev_cleanup(struct vhost_dev *dev) |
633 | { | 932 | { |
634 | int i; | 933 | int i; |
@@ -658,8 +957,16 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
658 | kthread_stop(dev->worker); | 957 | kthread_stop(dev->worker); |
659 | dev->worker = NULL; | 958 | dev->worker = NULL; |
660 | } | 959 | } |
661 | if (dev->mm) | 960 | if (dev->mm) { |
961 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
962 | mmu_notifier_unregister(&dev->mmu_notifier, dev->mm); | ||
963 | #endif | ||
662 | mmput(dev->mm); | 964 | mmput(dev->mm); |
965 | } | ||
966 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
967 | for (i = 0; i < dev->nvqs; i++) | ||
968 | vhost_uninit_vq_maps(dev->vqs[i]); | ||
969 | #endif | ||
663 | dev->mm = NULL; | 970 | dev->mm = NULL; |
664 | } | 971 | } |
665 | EXPORT_SYMBOL_GPL(vhost_dev_cleanup); | 972 | EXPORT_SYMBOL_GPL(vhost_dev_cleanup); |
@@ -886,6 +1193,113 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, | |||
886 | ret; \ | 1193 | ret; \ |
887 | }) | 1194 | }) |
888 | 1195 | ||
1196 | static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) | ||
1197 | { | ||
1198 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1199 | struct vhost_map *map; | ||
1200 | struct vring_used *used; | ||
1201 | |||
1202 | if (!vq->iotlb) { | ||
1203 | rcu_read_lock(); | ||
1204 | |||
1205 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1206 | if (likely(map)) { | ||
1207 | used = map->addr; | ||
1208 | *((__virtio16 *)&used->ring[vq->num]) = | ||
1209 | cpu_to_vhost16(vq, vq->avail_idx); | ||
1210 | rcu_read_unlock(); | ||
1211 | return 0; | ||
1212 | } | ||
1213 | |||
1214 | rcu_read_unlock(); | ||
1215 | } | ||
1216 | #endif | ||
1217 | |||
1218 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), | ||
1219 | vhost_avail_event(vq)); | ||
1220 | } | ||
1221 | |||
1222 | static inline int vhost_put_used(struct vhost_virtqueue *vq, | ||
1223 | struct vring_used_elem *head, int idx, | ||
1224 | int count) | ||
1225 | { | ||
1226 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1227 | struct vhost_map *map; | ||
1228 | struct vring_used *used; | ||
1229 | size_t size; | ||
1230 | |||
1231 | if (!vq->iotlb) { | ||
1232 | rcu_read_lock(); | ||
1233 | |||
1234 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1235 | if (likely(map)) { | ||
1236 | used = map->addr; | ||
1237 | size = count * sizeof(*head); | ||
1238 | memcpy(used->ring + idx, head, size); | ||
1239 | rcu_read_unlock(); | ||
1240 | return 0; | ||
1241 | } | ||
1242 | |||
1243 | rcu_read_unlock(); | ||
1244 | } | ||
1245 | #endif | ||
1246 | |||
1247 | return vhost_copy_to_user(vq, vq->used->ring + idx, head, | ||
1248 | count * sizeof(*head)); | ||
1249 | } | ||
1250 | |||
1251 | static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) | ||
1252 | |||
1253 | { | ||
1254 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1255 | struct vhost_map *map; | ||
1256 | struct vring_used *used; | ||
1257 | |||
1258 | if (!vq->iotlb) { | ||
1259 | rcu_read_lock(); | ||
1260 | |||
1261 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1262 | if (likely(map)) { | ||
1263 | used = map->addr; | ||
1264 | used->flags = cpu_to_vhost16(vq, vq->used_flags); | ||
1265 | rcu_read_unlock(); | ||
1266 | return 0; | ||
1267 | } | ||
1268 | |||
1269 | rcu_read_unlock(); | ||
1270 | } | ||
1271 | #endif | ||
1272 | |||
1273 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), | ||
1274 | &vq->used->flags); | ||
1275 | } | ||
1276 | |||
1277 | static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) | ||
1278 | |||
1279 | { | ||
1280 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1281 | struct vhost_map *map; | ||
1282 | struct vring_used *used; | ||
1283 | |||
1284 | if (!vq->iotlb) { | ||
1285 | rcu_read_lock(); | ||
1286 | |||
1287 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1288 | if (likely(map)) { | ||
1289 | used = map->addr; | ||
1290 | used->idx = cpu_to_vhost16(vq, vq->last_used_idx); | ||
1291 | rcu_read_unlock(); | ||
1292 | return 0; | ||
1293 | } | ||
1294 | |||
1295 | rcu_read_unlock(); | ||
1296 | } | ||
1297 | #endif | ||
1298 | |||
1299 | return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), | ||
1300 | &vq->used->idx); | ||
1301 | } | ||
1302 | |||
889 | #define vhost_get_user(vq, x, ptr, type) \ | 1303 | #define vhost_get_user(vq, x, ptr, type) \ |
890 | ({ \ | 1304 | ({ \ |
891 | int ret; \ | 1305 | int ret; \ |
@@ -924,6 +1338,155 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d) | |||
924 | mutex_unlock(&d->vqs[i]->mutex); | 1338 | mutex_unlock(&d->vqs[i]->mutex); |
925 | } | 1339 | } |
926 | 1340 | ||
1341 | static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, | ||
1342 | __virtio16 *idx) | ||
1343 | { | ||
1344 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1345 | struct vhost_map *map; | ||
1346 | struct vring_avail *avail; | ||
1347 | |||
1348 | if (!vq->iotlb) { | ||
1349 | rcu_read_lock(); | ||
1350 | |||
1351 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1352 | if (likely(map)) { | ||
1353 | avail = map->addr; | ||
1354 | *idx = avail->idx; | ||
1355 | rcu_read_unlock(); | ||
1356 | return 0; | ||
1357 | } | ||
1358 | |||
1359 | rcu_read_unlock(); | ||
1360 | } | ||
1361 | #endif | ||
1362 | |||
1363 | return vhost_get_avail(vq, *idx, &vq->avail->idx); | ||
1364 | } | ||
1365 | |||
1366 | static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, | ||
1367 | __virtio16 *head, int idx) | ||
1368 | { | ||
1369 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1370 | struct vhost_map *map; | ||
1371 | struct vring_avail *avail; | ||
1372 | |||
1373 | if (!vq->iotlb) { | ||
1374 | rcu_read_lock(); | ||
1375 | |||
1376 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1377 | if (likely(map)) { | ||
1378 | avail = map->addr; | ||
1379 | *head = avail->ring[idx & (vq->num - 1)]; | ||
1380 | rcu_read_unlock(); | ||
1381 | return 0; | ||
1382 | } | ||
1383 | |||
1384 | rcu_read_unlock(); | ||
1385 | } | ||
1386 | #endif | ||
1387 | |||
1388 | return vhost_get_avail(vq, *head, | ||
1389 | &vq->avail->ring[idx & (vq->num - 1)]); | ||
1390 | } | ||
1391 | |||
1392 | static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, | ||
1393 | __virtio16 *flags) | ||
1394 | { | ||
1395 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1396 | struct vhost_map *map; | ||
1397 | struct vring_avail *avail; | ||
1398 | |||
1399 | if (!vq->iotlb) { | ||
1400 | rcu_read_lock(); | ||
1401 | |||
1402 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1403 | if (likely(map)) { | ||
1404 | avail = map->addr; | ||
1405 | *flags = avail->flags; | ||
1406 | rcu_read_unlock(); | ||
1407 | return 0; | ||
1408 | } | ||
1409 | |||
1410 | rcu_read_unlock(); | ||
1411 | } | ||
1412 | #endif | ||
1413 | |||
1414 | return vhost_get_avail(vq, *flags, &vq->avail->flags); | ||
1415 | } | ||
1416 | |||
1417 | static inline int vhost_get_used_event(struct vhost_virtqueue *vq, | ||
1418 | __virtio16 *event) | ||
1419 | { | ||
1420 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1421 | struct vhost_map *map; | ||
1422 | struct vring_avail *avail; | ||
1423 | |||
1424 | if (!vq->iotlb) { | ||
1425 | rcu_read_lock(); | ||
1426 | map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); | ||
1427 | if (likely(map)) { | ||
1428 | avail = map->addr; | ||
1429 | *event = (__virtio16)avail->ring[vq->num]; | ||
1430 | rcu_read_unlock(); | ||
1431 | return 0; | ||
1432 | } | ||
1433 | rcu_read_unlock(); | ||
1434 | } | ||
1435 | #endif | ||
1436 | |||
1437 | return vhost_get_avail(vq, *event, vhost_used_event(vq)); | ||
1438 | } | ||
1439 | |||
1440 | static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, | ||
1441 | __virtio16 *idx) | ||
1442 | { | ||
1443 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1444 | struct vhost_map *map; | ||
1445 | struct vring_used *used; | ||
1446 | |||
1447 | if (!vq->iotlb) { | ||
1448 | rcu_read_lock(); | ||
1449 | |||
1450 | map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); | ||
1451 | if (likely(map)) { | ||
1452 | used = map->addr; | ||
1453 | *idx = used->idx; | ||
1454 | rcu_read_unlock(); | ||
1455 | return 0; | ||
1456 | } | ||
1457 | |||
1458 | rcu_read_unlock(); | ||
1459 | } | ||
1460 | #endif | ||
1461 | |||
1462 | return vhost_get_used(vq, *idx, &vq->used->idx); | ||
1463 | } | ||
1464 | |||
1465 | static inline int vhost_get_desc(struct vhost_virtqueue *vq, | ||
1466 | struct vring_desc *desc, int idx) | ||
1467 | { | ||
1468 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1469 | struct vhost_map *map; | ||
1470 | struct vring_desc *d; | ||
1471 | |||
1472 | if (!vq->iotlb) { | ||
1473 | rcu_read_lock(); | ||
1474 | |||
1475 | map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]); | ||
1476 | if (likely(map)) { | ||
1477 | d = map->addr; | ||
1478 | *desc = *(d + idx); | ||
1479 | rcu_read_unlock(); | ||
1480 | return 0; | ||
1481 | } | ||
1482 | |||
1483 | rcu_read_unlock(); | ||
1484 | } | ||
1485 | #endif | ||
1486 | |||
1487 | return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); | ||
1488 | } | ||
1489 | |||
927 | static int vhost_new_umem_range(struct vhost_umem *umem, | 1490 | static int vhost_new_umem_range(struct vhost_umem *umem, |
928 | u64 start, u64 size, u64 end, | 1491 | u64 start, u64 size, u64 end, |
929 | u64 userspace_addr, int perm) | 1492 | u64 userspace_addr, int perm) |
@@ -1209,13 +1772,9 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, | |||
1209 | struct vring_used __user *used) | 1772 | struct vring_used __user *used) |
1210 | 1773 | ||
1211 | { | 1774 | { |
1212 | size_t s __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | 1775 | return access_ok(desc, vhost_get_desc_size(vq, num)) && |
1213 | 1776 | access_ok(avail, vhost_get_avail_size(vq, num)) && | |
1214 | return access_ok(desc, num * sizeof *desc) && | 1777 | access_ok(used, vhost_get_used_size(vq, num)); |
1215 | access_ok(avail, | ||
1216 | sizeof *avail + num * sizeof *avail->ring + s) && | ||
1217 | access_ok(used, | ||
1218 | sizeof *used + num * sizeof *used->ring + s); | ||
1219 | } | 1778 | } |
1220 | 1779 | ||
1221 | static void vhost_vq_meta_update(struct vhost_virtqueue *vq, | 1780 | static void vhost_vq_meta_update(struct vhost_virtqueue *vq, |
@@ -1265,26 +1824,42 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq, | |||
1265 | return true; | 1824 | return true; |
1266 | } | 1825 | } |
1267 | 1826 | ||
1268 | int vq_iotlb_prefetch(struct vhost_virtqueue *vq) | 1827 | #if VHOST_ARCH_CAN_ACCEL_UACCESS |
1828 | static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq) | ||
1829 | { | ||
1830 | struct vhost_map __rcu *map; | ||
1831 | int i; | ||
1832 | |||
1833 | for (i = 0; i < VHOST_NUM_ADDRS; i++) { | ||
1834 | rcu_read_lock(); | ||
1835 | map = rcu_dereference(vq->maps[i]); | ||
1836 | rcu_read_unlock(); | ||
1837 | if (unlikely(!map)) | ||
1838 | vhost_map_prefetch(vq, i); | ||
1839 | } | ||
1840 | } | ||
1841 | #endif | ||
1842 | |||
1843 | int vq_meta_prefetch(struct vhost_virtqueue *vq) | ||
1269 | { | 1844 | { |
1270 | size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | ||
1271 | unsigned int num = vq->num; | 1845 | unsigned int num = vq->num; |
1272 | 1846 | ||
1273 | if (!vq->iotlb) | 1847 | if (!vq->iotlb) { |
1848 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
1849 | vhost_vq_map_prefetch(vq); | ||
1850 | #endif | ||
1274 | return 1; | 1851 | return 1; |
1852 | } | ||
1275 | 1853 | ||
1276 | return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, | 1854 | return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, |
1277 | num * sizeof(*vq->desc), VHOST_ADDR_DESC) && | 1855 | vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && |
1278 | iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, | 1856 | iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, |
1279 | sizeof *vq->avail + | 1857 | vhost_get_avail_size(vq, num), |
1280 | num * sizeof(*vq->avail->ring) + s, | ||
1281 | VHOST_ADDR_AVAIL) && | 1858 | VHOST_ADDR_AVAIL) && |
1282 | iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, | 1859 | iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, |
1283 | sizeof *vq->used + | 1860 | vhost_get_used_size(vq, num), VHOST_ADDR_USED); |
1284 | num * sizeof(*vq->used->ring) + s, | ||
1285 | VHOST_ADDR_USED); | ||
1286 | } | 1861 | } |
1287 | EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); | 1862 | EXPORT_SYMBOL_GPL(vq_meta_prefetch); |
1288 | 1863 | ||
1289 | /* Can we log writes? */ | 1864 | /* Can we log writes? */ |
1290 | /* Caller should have device mutex but not vq mutex */ | 1865 | /* Caller should have device mutex but not vq mutex */ |
@@ -1299,13 +1874,10 @@ EXPORT_SYMBOL_GPL(vhost_log_access_ok); | |||
1299 | static bool vq_log_access_ok(struct vhost_virtqueue *vq, | 1874 | static bool vq_log_access_ok(struct vhost_virtqueue *vq, |
1300 | void __user *log_base) | 1875 | void __user *log_base) |
1301 | { | 1876 | { |
1302 | size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | ||
1303 | |||
1304 | return vq_memory_access_ok(log_base, vq->umem, | 1877 | return vq_memory_access_ok(log_base, vq->umem, |
1305 | vhost_has_feature(vq, VHOST_F_LOG_ALL)) && | 1878 | vhost_has_feature(vq, VHOST_F_LOG_ALL)) && |
1306 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, | 1879 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, |
1307 | sizeof *vq->used + | 1880 | vhost_get_used_size(vq, vq->num))); |
1308 | vq->num * sizeof *vq->used->ring + s)); | ||
1309 | } | 1881 | } |
1310 | 1882 | ||
1311 | /* Can we start vq? */ | 1883 | /* Can we start vq? */ |
@@ -1405,6 +1977,121 @@ err: | |||
1405 | return -EFAULT; | 1977 | return -EFAULT; |
1406 | } | 1978 | } |
1407 | 1979 | ||
1980 | static long vhost_vring_set_num(struct vhost_dev *d, | ||
1981 | struct vhost_virtqueue *vq, | ||
1982 | void __user *argp) | ||
1983 | { | ||
1984 | struct vhost_vring_state s; | ||
1985 | |||
1986 | /* Resizing ring with an active backend? | ||
1987 | * You don't want to do that. */ | ||
1988 | if (vq->private_data) | ||
1989 | return -EBUSY; | ||
1990 | |||
1991 | if (copy_from_user(&s, argp, sizeof s)) | ||
1992 | return -EFAULT; | ||
1993 | |||
1994 | if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) | ||
1995 | return -EINVAL; | ||
1996 | vq->num = s.num; | ||
1997 | |||
1998 | return 0; | ||
1999 | } | ||
2000 | |||
2001 | static long vhost_vring_set_addr(struct vhost_dev *d, | ||
2002 | struct vhost_virtqueue *vq, | ||
2003 | void __user *argp) | ||
2004 | { | ||
2005 | struct vhost_vring_addr a; | ||
2006 | |||
2007 | if (copy_from_user(&a, argp, sizeof a)) | ||
2008 | return -EFAULT; | ||
2009 | if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) | ||
2010 | return -EOPNOTSUPP; | ||
2011 | |||
2012 | /* For 32bit, verify that the top 32bits of the user | ||
2013 | data are set to zero. */ | ||
2014 | if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || | ||
2015 | (u64)(unsigned long)a.used_user_addr != a.used_user_addr || | ||
2016 | (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) | ||
2017 | return -EFAULT; | ||
2018 | |||
2019 | /* Make sure it's safe to cast pointers to vring types. */ | ||
2020 | BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); | ||
2021 | BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); | ||
2022 | if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || | ||
2023 | (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || | ||
2024 | (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) | ||
2025 | return -EINVAL; | ||
2026 | |||
2027 | /* We only verify access here if backend is configured. | ||
2028 | * If it is not, we don't as size might not have been setup. | ||
2029 | * We will verify when backend is configured. */ | ||
2030 | if (vq->private_data) { | ||
2031 | if (!vq_access_ok(vq, vq->num, | ||
2032 | (void __user *)(unsigned long)a.desc_user_addr, | ||
2033 | (void __user *)(unsigned long)a.avail_user_addr, | ||
2034 | (void __user *)(unsigned long)a.used_user_addr)) | ||
2035 | return -EINVAL; | ||
2036 | |||
2037 | /* Also validate log access for used ring if enabled. */ | ||
2038 | if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && | ||
2039 | !log_access_ok(vq->log_base, a.log_guest_addr, | ||
2040 | sizeof *vq->used + | ||
2041 | vq->num * sizeof *vq->used->ring)) | ||
2042 | return -EINVAL; | ||
2043 | } | ||
2044 | |||
2045 | vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); | ||
2046 | vq->desc = (void __user *)(unsigned long)a.desc_user_addr; | ||
2047 | vq->avail = (void __user *)(unsigned long)a.avail_user_addr; | ||
2048 | vq->log_addr = a.log_guest_addr; | ||
2049 | vq->used = (void __user *)(unsigned long)a.used_user_addr; | ||
2050 | |||
2051 | return 0; | ||
2052 | } | ||
2053 | |||
2054 | static long vhost_vring_set_num_addr(struct vhost_dev *d, | ||
2055 | struct vhost_virtqueue *vq, | ||
2056 | unsigned int ioctl, | ||
2057 | void __user *argp) | ||
2058 | { | ||
2059 | long r; | ||
2060 | |||
2061 | mutex_lock(&vq->mutex); | ||
2062 | |||
2063 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
2064 | /* Unregister MMU notifer to allow invalidation callback | ||
2065 | * can access vq->uaddrs[] without holding a lock. | ||
2066 | */ | ||
2067 | if (d->mm) | ||
2068 | mmu_notifier_unregister(&d->mmu_notifier, d->mm); | ||
2069 | |||
2070 | vhost_uninit_vq_maps(vq); | ||
2071 | #endif | ||
2072 | |||
2073 | switch (ioctl) { | ||
2074 | case VHOST_SET_VRING_NUM: | ||
2075 | r = vhost_vring_set_num(d, vq, argp); | ||
2076 | break; | ||
2077 | case VHOST_SET_VRING_ADDR: | ||
2078 | r = vhost_vring_set_addr(d, vq, argp); | ||
2079 | break; | ||
2080 | default: | ||
2081 | BUG(); | ||
2082 | } | ||
2083 | |||
2084 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
2085 | vhost_setup_vq_uaddr(vq); | ||
2086 | |||
2087 | if (d->mm) | ||
2088 | mmu_notifier_register(&d->mmu_notifier, d->mm); | ||
2089 | #endif | ||
2090 | |||
2091 | mutex_unlock(&vq->mutex); | ||
2092 | |||
2093 | return r; | ||
2094 | } | ||
1408 | long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) | 2095 | long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) |
1409 | { | 2096 | { |
1410 | struct file *eventfp, *filep = NULL; | 2097 | struct file *eventfp, *filep = NULL; |
@@ -1414,7 +2101,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg | |||
1414 | struct vhost_virtqueue *vq; | 2101 | struct vhost_virtqueue *vq; |
1415 | struct vhost_vring_state s; | 2102 | struct vhost_vring_state s; |
1416 | struct vhost_vring_file f; | 2103 | struct vhost_vring_file f; |
1417 | struct vhost_vring_addr a; | ||
1418 | u32 idx; | 2104 | u32 idx; |
1419 | long r; | 2105 | long r; |
1420 | 2106 | ||
@@ -1427,26 +2113,14 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg | |||
1427 | idx = array_index_nospec(idx, d->nvqs); | 2113 | idx = array_index_nospec(idx, d->nvqs); |
1428 | vq = d->vqs[idx]; | 2114 | vq = d->vqs[idx]; |
1429 | 2115 | ||
2116 | if (ioctl == VHOST_SET_VRING_NUM || | ||
2117 | ioctl == VHOST_SET_VRING_ADDR) { | ||
2118 | return vhost_vring_set_num_addr(d, vq, ioctl, argp); | ||
2119 | } | ||
2120 | |||
1430 | mutex_lock(&vq->mutex); | 2121 | mutex_lock(&vq->mutex); |
1431 | 2122 | ||
1432 | switch (ioctl) { | 2123 | switch (ioctl) { |
1433 | case VHOST_SET_VRING_NUM: | ||
1434 | /* Resizing ring with an active backend? | ||
1435 | * You don't want to do that. */ | ||
1436 | if (vq->private_data) { | ||
1437 | r = -EBUSY; | ||
1438 | break; | ||
1439 | } | ||
1440 | if (copy_from_user(&s, argp, sizeof s)) { | ||
1441 | r = -EFAULT; | ||
1442 | break; | ||
1443 | } | ||
1444 | if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) { | ||
1445 | r = -EINVAL; | ||
1446 | break; | ||
1447 | } | ||
1448 | vq->num = s.num; | ||
1449 | break; | ||
1450 | case VHOST_SET_VRING_BASE: | 2124 | case VHOST_SET_VRING_BASE: |
1451 | /* Moving base with an active backend? | 2125 | /* Moving base with an active backend? |
1452 | * You don't want to do that. */ | 2126 | * You don't want to do that. */ |
@@ -1472,62 +2146,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg | |||
1472 | if (copy_to_user(argp, &s, sizeof s)) | 2146 | if (copy_to_user(argp, &s, sizeof s)) |
1473 | r = -EFAULT; | 2147 | r = -EFAULT; |
1474 | break; | 2148 | break; |
1475 | case VHOST_SET_VRING_ADDR: | ||
1476 | if (copy_from_user(&a, argp, sizeof a)) { | ||
1477 | r = -EFAULT; | ||
1478 | break; | ||
1479 | } | ||
1480 | if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) { | ||
1481 | r = -EOPNOTSUPP; | ||
1482 | break; | ||
1483 | } | ||
1484 | /* For 32bit, verify that the top 32bits of the user | ||
1485 | data are set to zero. */ | ||
1486 | if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || | ||
1487 | (u64)(unsigned long)a.used_user_addr != a.used_user_addr || | ||
1488 | (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) { | ||
1489 | r = -EFAULT; | ||
1490 | break; | ||
1491 | } | ||
1492 | |||
1493 | /* Make sure it's safe to cast pointers to vring types. */ | ||
1494 | BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); | ||
1495 | BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); | ||
1496 | if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || | ||
1497 | (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || | ||
1498 | (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) { | ||
1499 | r = -EINVAL; | ||
1500 | break; | ||
1501 | } | ||
1502 | |||
1503 | /* We only verify access here if backend is configured. | ||
1504 | * If it is not, we don't as size might not have been setup. | ||
1505 | * We will verify when backend is configured. */ | ||
1506 | if (vq->private_data) { | ||
1507 | if (!vq_access_ok(vq, vq->num, | ||
1508 | (void __user *)(unsigned long)a.desc_user_addr, | ||
1509 | (void __user *)(unsigned long)a.avail_user_addr, | ||
1510 | (void __user *)(unsigned long)a.used_user_addr)) { | ||
1511 | r = -EINVAL; | ||
1512 | break; | ||
1513 | } | ||
1514 | |||
1515 | /* Also validate log access for used ring if enabled. */ | ||
1516 | if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && | ||
1517 | !log_access_ok(vq->log_base, a.log_guest_addr, | ||
1518 | sizeof *vq->used + | ||
1519 | vq->num * sizeof *vq->used->ring)) { | ||
1520 | r = -EINVAL; | ||
1521 | break; | ||
1522 | } | ||
1523 | } | ||
1524 | |||
1525 | vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); | ||
1526 | vq->desc = (void __user *)(unsigned long)a.desc_user_addr; | ||
1527 | vq->avail = (void __user *)(unsigned long)a.avail_user_addr; | ||
1528 | vq->log_addr = a.log_guest_addr; | ||
1529 | vq->used = (void __user *)(unsigned long)a.used_user_addr; | ||
1530 | break; | ||
1531 | case VHOST_SET_VRING_KICK: | 2149 | case VHOST_SET_VRING_KICK: |
1532 | if (copy_from_user(&f, argp, sizeof f)) { | 2150 | if (copy_from_user(&f, argp, sizeof f)) { |
1533 | r = -EFAULT; | 2151 | r = -EFAULT; |
@@ -1861,8 +2479,7 @@ EXPORT_SYMBOL_GPL(vhost_log_write); | |||
1861 | static int vhost_update_used_flags(struct vhost_virtqueue *vq) | 2479 | static int vhost_update_used_flags(struct vhost_virtqueue *vq) |
1862 | { | 2480 | { |
1863 | void __user *used; | 2481 | void __user *used; |
1864 | if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), | 2482 | if (vhost_put_used_flags(vq)) |
1865 | &vq->used->flags) < 0) | ||
1866 | return -EFAULT; | 2483 | return -EFAULT; |
1867 | if (unlikely(vq->log_used)) { | 2484 | if (unlikely(vq->log_used)) { |
1868 | /* Make sure the flag is seen before log. */ | 2485 | /* Make sure the flag is seen before log. */ |
@@ -1879,8 +2496,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) | |||
1879 | 2496 | ||
1880 | static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) | 2497 | static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) |
1881 | { | 2498 | { |
1882 | if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), | 2499 | if (vhost_put_avail_event(vq)) |
1883 | vhost_avail_event(vq))) | ||
1884 | return -EFAULT; | 2500 | return -EFAULT; |
1885 | if (unlikely(vq->log_used)) { | 2501 | if (unlikely(vq->log_used)) { |
1886 | void __user *used; | 2502 | void __user *used; |
@@ -1916,7 +2532,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) | |||
1916 | r = -EFAULT; | 2532 | r = -EFAULT; |
1917 | goto err; | 2533 | goto err; |
1918 | } | 2534 | } |
1919 | r = vhost_get_used(vq, last_used_idx, &vq->used->idx); | 2535 | r = vhost_get_used_idx(vq, &last_used_idx); |
1920 | if (r) { | 2536 | if (r) { |
1921 | vq_err(vq, "Can't access used idx at %p\n", | 2537 | vq_err(vq, "Can't access used idx at %p\n", |
1922 | &vq->used->idx); | 2538 | &vq->used->idx); |
@@ -2115,7 +2731,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, | |||
2115 | last_avail_idx = vq->last_avail_idx; | 2731 | last_avail_idx = vq->last_avail_idx; |
2116 | 2732 | ||
2117 | if (vq->avail_idx == vq->last_avail_idx) { | 2733 | if (vq->avail_idx == vq->last_avail_idx) { |
2118 | if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { | 2734 | if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) { |
2119 | vq_err(vq, "Failed to access avail idx at %p\n", | 2735 | vq_err(vq, "Failed to access avail idx at %p\n", |
2120 | &vq->avail->idx); | 2736 | &vq->avail->idx); |
2121 | return -EFAULT; | 2737 | return -EFAULT; |
@@ -2142,8 +2758,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, | |||
2142 | 2758 | ||
2143 | /* Grab the next descriptor number they're advertising, and increment | 2759 | /* Grab the next descriptor number they're advertising, and increment |
2144 | * the index we've seen. */ | 2760 | * the index we've seen. */ |
2145 | if (unlikely(vhost_get_avail(vq, ring_head, | 2761 | if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { |
2146 | &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { | ||
2147 | vq_err(vq, "Failed to read head: idx %d address %p\n", | 2762 | vq_err(vq, "Failed to read head: idx %d address %p\n", |
2148 | last_avail_idx, | 2763 | last_avail_idx, |
2149 | &vq->avail->ring[last_avail_idx % vq->num]); | 2764 | &vq->avail->ring[last_avail_idx % vq->num]); |
@@ -2178,8 +2793,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, | |||
2178 | i, vq->num, head); | 2793 | i, vq->num, head); |
2179 | return -EINVAL; | 2794 | return -EINVAL; |
2180 | } | 2795 | } |
2181 | ret = vhost_copy_from_user(vq, &desc, vq->desc + i, | 2796 | ret = vhost_get_desc(vq, &desc, i); |
2182 | sizeof desc); | ||
2183 | if (unlikely(ret)) { | 2797 | if (unlikely(ret)) { |
2184 | vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", | 2798 | vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", |
2185 | i, vq->desc + i); | 2799 | i, vq->desc + i); |
@@ -2272,16 +2886,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, | |||
2272 | 2886 | ||
2273 | start = vq->last_used_idx & (vq->num - 1); | 2887 | start = vq->last_used_idx & (vq->num - 1); |
2274 | used = vq->used->ring + start; | 2888 | used = vq->used->ring + start; |
2275 | if (count == 1) { | 2889 | if (vhost_put_used(vq, heads, start, count)) { |
2276 | if (vhost_put_user(vq, heads[0].id, &used->id)) { | ||
2277 | vq_err(vq, "Failed to write used id"); | ||
2278 | return -EFAULT; | ||
2279 | } | ||
2280 | if (vhost_put_user(vq, heads[0].len, &used->len)) { | ||
2281 | vq_err(vq, "Failed to write used len"); | ||
2282 | return -EFAULT; | ||
2283 | } | ||
2284 | } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) { | ||
2285 | vq_err(vq, "Failed to write used"); | 2890 | vq_err(vq, "Failed to write used"); |
2286 | return -EFAULT; | 2891 | return -EFAULT; |
2287 | } | 2892 | } |
@@ -2323,8 +2928,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, | |||
2323 | 2928 | ||
2324 | /* Make sure buffer is written before we update index. */ | 2929 | /* Make sure buffer is written before we update index. */ |
2325 | smp_wmb(); | 2930 | smp_wmb(); |
2326 | if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), | 2931 | if (vhost_put_used_idx(vq)) { |
2327 | &vq->used->idx)) { | ||
2328 | vq_err(vq, "Failed to increment used idx"); | 2932 | vq_err(vq, "Failed to increment used idx"); |
2329 | return -EFAULT; | 2933 | return -EFAULT; |
2330 | } | 2934 | } |
@@ -2357,7 +2961,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |||
2357 | 2961 | ||
2358 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { | 2962 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { |
2359 | __virtio16 flags; | 2963 | __virtio16 flags; |
2360 | if (vhost_get_avail(vq, flags, &vq->avail->flags)) { | 2964 | if (vhost_get_avail_flags(vq, &flags)) { |
2361 | vq_err(vq, "Failed to get flags"); | 2965 | vq_err(vq, "Failed to get flags"); |
2362 | return true; | 2966 | return true; |
2363 | } | 2967 | } |
@@ -2371,7 +2975,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |||
2371 | if (unlikely(!v)) | 2975 | if (unlikely(!v)) |
2372 | return true; | 2976 | return true; |
2373 | 2977 | ||
2374 | if (vhost_get_avail(vq, event, vhost_used_event(vq))) { | 2978 | if (vhost_get_used_event(vq, &event)) { |
2375 | vq_err(vq, "Failed to get used event idx"); | 2979 | vq_err(vq, "Failed to get used event idx"); |
2376 | return true; | 2980 | return true; |
2377 | } | 2981 | } |
@@ -2416,7 +3020,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |||
2416 | if (vq->avail_idx != vq->last_avail_idx) | 3020 | if (vq->avail_idx != vq->last_avail_idx) |
2417 | return false; | 3021 | return false; |
2418 | 3022 | ||
2419 | r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); | 3023 | r = vhost_get_avail_idx(vq, &avail_idx); |
2420 | if (unlikely(r)) | 3024 | if (unlikely(r)) |
2421 | return false; | 3025 | return false; |
2422 | vq->avail_idx = vhost16_to_cpu(vq, avail_idx); | 3026 | vq->avail_idx = vhost16_to_cpu(vq, avail_idx); |
@@ -2452,7 +3056,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |||
2452 | /* They could have slipped one in as we were doing that: make | 3056 | /* They could have slipped one in as we were doing that: make |
2453 | * sure it's written, then check again. */ | 3057 | * sure it's written, then check again. */ |
2454 | smp_mb(); | 3058 | smp_mb(); |
2455 | r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); | 3059 | r = vhost_get_avail_idx(vq, &avail_idx); |
2456 | if (r) { | 3060 | if (r) { |
2457 | vq_err(vq, "Failed to check avail idx at %p: %d\n", | 3061 | vq_err(vq, "Failed to check avail idx at %p: %d\n", |
2458 | &vq->avail->idx, r); | 3062 | &vq->avail->idx, r); |
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 27a78a9b8cc7..819296332913 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h | |||
@@ -12,6 +12,9 @@ | |||
12 | #include <linux/virtio_config.h> | 12 | #include <linux/virtio_config.h> |
13 | #include <linux/virtio_ring.h> | 13 | #include <linux/virtio_ring.h> |
14 | #include <linux/atomic.h> | 14 | #include <linux/atomic.h> |
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/mmu_notifier.h> | ||
17 | #include <asm/cacheflush.h> | ||
15 | 18 | ||
16 | struct vhost_work; | 19 | struct vhost_work; |
17 | typedef void (*vhost_work_fn_t)(struct vhost_work *work); | 20 | typedef void (*vhost_work_fn_t)(struct vhost_work *work); |
@@ -80,6 +83,24 @@ enum vhost_uaddr_type { | |||
80 | VHOST_NUM_ADDRS = 3, | 83 | VHOST_NUM_ADDRS = 3, |
81 | }; | 84 | }; |
82 | 85 | ||
86 | struct vhost_map { | ||
87 | int npages; | ||
88 | void *addr; | ||
89 | struct page **pages; | ||
90 | }; | ||
91 | |||
92 | struct vhost_uaddr { | ||
93 | unsigned long uaddr; | ||
94 | size_t size; | ||
95 | bool write; | ||
96 | }; | ||
97 | |||
98 | #if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0 | ||
99 | #define VHOST_ARCH_CAN_ACCEL_UACCESS 1 | ||
100 | #else | ||
101 | #define VHOST_ARCH_CAN_ACCEL_UACCESS 0 | ||
102 | #endif | ||
103 | |||
83 | /* The virtqueue structure describes a queue attached to a device. */ | 104 | /* The virtqueue structure describes a queue attached to a device. */ |
84 | struct vhost_virtqueue { | 105 | struct vhost_virtqueue { |
85 | struct vhost_dev *dev; | 106 | struct vhost_dev *dev; |
@@ -90,7 +111,22 @@ struct vhost_virtqueue { | |||
90 | struct vring_desc __user *desc; | 111 | struct vring_desc __user *desc; |
91 | struct vring_avail __user *avail; | 112 | struct vring_avail __user *avail; |
92 | struct vring_used __user *used; | 113 | struct vring_used __user *used; |
114 | |||
115 | #if VHOST_ARCH_CAN_ACCEL_UACCESS | ||
116 | /* Read by memory accessors, modified by meta data | ||
117 | * prefetching, MMU notifier and vring ioctl(). | ||
118 | * Synchonrized through mmu_lock (writers) and RCU (writers | ||
119 | * and readers). | ||
120 | */ | ||
121 | struct vhost_map __rcu *maps[VHOST_NUM_ADDRS]; | ||
122 | /* Read by MMU notifier, modified by vring ioctl(), | ||
123 | * synchronized through MMU notifier | ||
124 | * registering/unregistering. | ||
125 | */ | ||
126 | struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS]; | ||
127 | #endif | ||
93 | const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; | 128 | const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; |
129 | |||
94 | struct file *kick; | 130 | struct file *kick; |
95 | struct eventfd_ctx *call_ctx; | 131 | struct eventfd_ctx *call_ctx; |
96 | struct eventfd_ctx *error_ctx; | 132 | struct eventfd_ctx *error_ctx; |
@@ -145,6 +181,8 @@ struct vhost_virtqueue { | |||
145 | bool user_be; | 181 | bool user_be; |
146 | #endif | 182 | #endif |
147 | u32 busyloop_timeout; | 183 | u32 busyloop_timeout; |
184 | spinlock_t mmu_lock; | ||
185 | int invalidate_count; | ||
148 | }; | 186 | }; |
149 | 187 | ||
150 | struct vhost_msg_node { | 188 | struct vhost_msg_node { |
@@ -158,6 +196,9 @@ struct vhost_msg_node { | |||
158 | 196 | ||
159 | struct vhost_dev { | 197 | struct vhost_dev { |
160 | struct mm_struct *mm; | 198 | struct mm_struct *mm; |
199 | #ifdef CONFIG_MMU_NOTIFIER | ||
200 | struct mmu_notifier mmu_notifier; | ||
201 | #endif | ||
161 | struct mutex mutex; | 202 | struct mutex mutex; |
162 | struct vhost_virtqueue **vqs; | 203 | struct vhost_virtqueue **vqs; |
163 | int nvqs; | 204 | int nvqs; |
@@ -212,7 +253,7 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); | |||
212 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, | 253 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, |
213 | unsigned int log_num, u64 len, | 254 | unsigned int log_num, u64 len, |
214 | struct iovec *iov, int count); | 255 | struct iovec *iov, int count); |
215 | int vq_iotlb_prefetch(struct vhost_virtqueue *vq); | 256 | int vq_meta_prefetch(struct vhost_virtqueue *vq); |
216 | 257 | ||
217 | struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); | 258 | struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); |
218 | void vhost_enqueue_msg(struct vhost_dev *dev, | 259 | void vhost_enqueue_msg(struct vhost_dev *dev, |
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index f363fbeb5ab0..e09edb5c5e06 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c | |||
@@ -463,9 +463,14 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, | |||
463 | struct irq_affinity *desc) | 463 | struct irq_affinity *desc) |
464 | { | 464 | { |
465 | struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); | 465 | struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); |
466 | unsigned int irq = platform_get_irq(vm_dev->pdev, 0); | 466 | int irq = platform_get_irq(vm_dev->pdev, 0); |
467 | int i, err, queue_idx = 0; | 467 | int i, err, queue_idx = 0; |
468 | 468 | ||
469 | if (irq < 0) { | ||
470 | dev_err(&vdev->dev, "Cannot get IRQ resource\n"); | ||
471 | return irq; | ||
472 | } | ||
473 | |||
469 | err = request_irq(irq, vm_interrupt, IRQF_SHARED, | 474 | err = request_irq(irq, vm_interrupt, IRQF_SHARED, |
470 | dev_name(&vdev->dev), vm_dev); | 475 | dev_name(&vdev->dev), vm_dev); |
471 | if (err) | 476 | if (err) |
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 6d5c3b2d4f4d..cfe47c5d9a56 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h | |||
@@ -43,5 +43,6 @@ | |||
43 | #define VIRTIO_ID_INPUT 18 /* virtio input */ | 43 | #define VIRTIO_ID_INPUT 18 /* virtio input */ |
44 | #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ | 44 | #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ |
45 | #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ | 45 | #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ |
46 | #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ | ||
46 | 47 | ||
47 | #endif /* _LINUX_VIRTIO_IDS_H */ | 48 | #endif /* _LINUX_VIRTIO_IDS_H */ |
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h new file mode 100644 index 000000000000..ba1b460c9944 --- /dev/null +++ b/include/uapi/linux/virtio_iommu.h | |||
@@ -0,0 +1,161 @@ | |||
1 | /* SPDX-License-Identifier: BSD-3-Clause */ | ||
2 | /* | ||
3 | * Virtio-iommu definition v0.9 | ||
4 | * | ||
5 | * Copyright (C) 2018 Arm Ltd. | ||
6 | */ | ||
7 | #ifndef _UAPI_LINUX_VIRTIO_IOMMU_H | ||
8 | #define _UAPI_LINUX_VIRTIO_IOMMU_H | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | |||
12 | /* Feature bits */ | ||
13 | #define VIRTIO_IOMMU_F_INPUT_RANGE 0 | ||
14 | #define VIRTIO_IOMMU_F_DOMAIN_BITS 1 | ||
15 | #define VIRTIO_IOMMU_F_MAP_UNMAP 2 | ||
16 | #define VIRTIO_IOMMU_F_BYPASS 3 | ||
17 | #define VIRTIO_IOMMU_F_PROBE 4 | ||
18 | |||
19 | struct virtio_iommu_range { | ||
20 | __u64 start; | ||
21 | __u64 end; | ||
22 | }; | ||
23 | |||
24 | struct virtio_iommu_config { | ||
25 | /* Supported page sizes */ | ||
26 | __u64 page_size_mask; | ||
27 | /* Supported IOVA range */ | ||
28 | struct virtio_iommu_range input_range; | ||
29 | /* Max domain ID size */ | ||
30 | __u8 domain_bits; | ||
31 | __u8 padding[3]; | ||
32 | /* Probe buffer size */ | ||
33 | __u32 probe_size; | ||
34 | }; | ||
35 | |||
36 | /* Request types */ | ||
37 | #define VIRTIO_IOMMU_T_ATTACH 0x01 | ||
38 | #define VIRTIO_IOMMU_T_DETACH 0x02 | ||
39 | #define VIRTIO_IOMMU_T_MAP 0x03 | ||
40 | #define VIRTIO_IOMMU_T_UNMAP 0x04 | ||
41 | #define VIRTIO_IOMMU_T_PROBE 0x05 | ||
42 | |||
43 | /* Status types */ | ||
44 | #define VIRTIO_IOMMU_S_OK 0x00 | ||
45 | #define VIRTIO_IOMMU_S_IOERR 0x01 | ||
46 | #define VIRTIO_IOMMU_S_UNSUPP 0x02 | ||
47 | #define VIRTIO_IOMMU_S_DEVERR 0x03 | ||
48 | #define VIRTIO_IOMMU_S_INVAL 0x04 | ||
49 | #define VIRTIO_IOMMU_S_RANGE 0x05 | ||
50 | #define VIRTIO_IOMMU_S_NOENT 0x06 | ||
51 | #define VIRTIO_IOMMU_S_FAULT 0x07 | ||
52 | |||
53 | struct virtio_iommu_req_head { | ||
54 | __u8 type; | ||
55 | __u8 reserved[3]; | ||
56 | }; | ||
57 | |||
58 | struct virtio_iommu_req_tail { | ||
59 | __u8 status; | ||
60 | __u8 reserved[3]; | ||
61 | }; | ||
62 | |||
63 | struct virtio_iommu_req_attach { | ||
64 | struct virtio_iommu_req_head head; | ||
65 | __le32 domain; | ||
66 | __le32 endpoint; | ||
67 | __u8 reserved[8]; | ||
68 | struct virtio_iommu_req_tail tail; | ||
69 | }; | ||
70 | |||
71 | struct virtio_iommu_req_detach { | ||
72 | struct virtio_iommu_req_head head; | ||
73 | __le32 domain; | ||
74 | __le32 endpoint; | ||
75 | __u8 reserved[8]; | ||
76 | struct virtio_iommu_req_tail tail; | ||
77 | }; | ||
78 | |||
79 | #define VIRTIO_IOMMU_MAP_F_READ (1 << 0) | ||
80 | #define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1) | ||
81 | #define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2) | ||
82 | #define VIRTIO_IOMMU_MAP_F_MMIO (1 << 3) | ||
83 | |||
84 | #define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \ | ||
85 | VIRTIO_IOMMU_MAP_F_WRITE | \ | ||
86 | VIRTIO_IOMMU_MAP_F_EXEC | \ | ||
87 | VIRTIO_IOMMU_MAP_F_MMIO) | ||
88 | |||
89 | struct virtio_iommu_req_map { | ||
90 | struct virtio_iommu_req_head head; | ||
91 | __le32 domain; | ||
92 | __le64 virt_start; | ||
93 | __le64 virt_end; | ||
94 | __le64 phys_start; | ||
95 | __le32 flags; | ||
96 | struct virtio_iommu_req_tail tail; | ||
97 | }; | ||
98 | |||
99 | struct virtio_iommu_req_unmap { | ||
100 | struct virtio_iommu_req_head head; | ||
101 | __le32 domain; | ||
102 | __le64 virt_start; | ||
103 | __le64 virt_end; | ||
104 | __u8 reserved[4]; | ||
105 | struct virtio_iommu_req_tail tail; | ||
106 | }; | ||
107 | |||
108 | #define VIRTIO_IOMMU_PROBE_T_NONE 0 | ||
109 | #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1 | ||
110 | |||
111 | #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff | ||
112 | |||
113 | struct virtio_iommu_probe_property { | ||
114 | __le16 type; | ||
115 | __le16 length; | ||
116 | }; | ||
117 | |||
118 | #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0 | ||
119 | #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1 | ||
120 | |||
121 | struct virtio_iommu_probe_resv_mem { | ||
122 | struct virtio_iommu_probe_property head; | ||
123 | __u8 subtype; | ||
124 | __u8 reserved[3]; | ||
125 | __le64 start; | ||
126 | __le64 end; | ||
127 | }; | ||
128 | |||
129 | struct virtio_iommu_req_probe { | ||
130 | struct virtio_iommu_req_head head; | ||
131 | __le32 endpoint; | ||
132 | __u8 reserved[64]; | ||
133 | |||
134 | __u8 properties[]; | ||
135 | |||
136 | /* | ||
137 | * Tail follows the variable-length properties array. No padding, | ||
138 | * property lengths are all aligned on 8 bytes. | ||
139 | */ | ||
140 | }; | ||
141 | |||
142 | /* Fault types */ | ||
143 | #define VIRTIO_IOMMU_FAULT_R_UNKNOWN 0 | ||
144 | #define VIRTIO_IOMMU_FAULT_R_DOMAIN 1 | ||
145 | #define VIRTIO_IOMMU_FAULT_R_MAPPING 2 | ||
146 | |||
147 | #define VIRTIO_IOMMU_FAULT_F_READ (1 << 0) | ||
148 | #define VIRTIO_IOMMU_FAULT_F_WRITE (1 << 1) | ||
149 | #define VIRTIO_IOMMU_FAULT_F_EXEC (1 << 2) | ||
150 | #define VIRTIO_IOMMU_FAULT_F_ADDRESS (1 << 8) | ||
151 | |||
152 | struct virtio_iommu_fault { | ||
153 | __u8 reason; | ||
154 | __u8 reserved[3]; | ||
155 | __le32 flags; | ||
156 | __le32 endpoint; | ||
157 | __u8 reserved2[4]; | ||
158 | __le64 address; | ||
159 | }; | ||
160 | |||
161 | #endif | ||