diff options
Diffstat (limited to 'drivers/vfio/pci/vfio_pci_config.c')
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 1540 |
1 files changed, 1540 insertions, 0 deletions
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c new file mode 100644 index 000000000000..8b8f7d11e102 --- /dev/null +++ b/drivers/vfio/pci/vfio_pci_config.c | |||
@@ -0,0 +1,1540 @@ | |||
1 | /* | ||
2 | * VFIO PCI config space virtualization | ||
3 | * | ||
4 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | ||
5 | * Author: Alex Williamson <alex.williamson@redhat.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * Derived from original vfio: | ||
12 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. | ||
13 | * Author: Tom Lyon, pugs@cisco.com | ||
14 | */ | ||
15 | |||
16 | /* | ||
17 | * This code handles reading and writing of PCI configuration registers. | ||
18 | * This is hairy because we want to allow a lot of flexibility to the | ||
19 | * user driver, but cannot trust it with all of the config fields. | ||
20 | * Tables determine which fields can be read and written, as well as | ||
21 | * which fields are 'virtualized' - special actions and translations to | ||
22 | * make it appear to the user that he has control, when in fact things | ||
23 | * must be negotiated with the underlying OS. | ||
24 | */ | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/pci.h> | ||
28 | #include <linux/uaccess.h> | ||
29 | #include <linux/vfio.h> | ||
30 | |||
31 | #include "vfio_pci_private.h" | ||
32 | |||
33 | #define PCI_CFG_SPACE_SIZE 256 | ||
34 | |||
35 | /* Useful "pseudo" capabilities */ | ||
36 | #define PCI_CAP_ID_BASIC 0 | ||
37 | #define PCI_CAP_ID_INVALID 0xFF | ||
38 | |||
39 | #define is_bar(offset) \ | ||
40 | ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \ | ||
41 | (offset >= PCI_ROM_ADDRESS && offset < PCI_ROM_ADDRESS + 4)) | ||
42 | |||
43 | /* | ||
44 | * Lengths of PCI Config Capabilities | ||
45 | * 0: Removed from the user visible capability list | ||
46 | * FF: Variable length | ||
47 | */ | ||
48 | static u8 pci_cap_length[] = { | ||
49 | [PCI_CAP_ID_BASIC] = PCI_STD_HEADER_SIZEOF, /* pci config header */ | ||
50 | [PCI_CAP_ID_PM] = PCI_PM_SIZEOF, | ||
51 | [PCI_CAP_ID_AGP] = PCI_AGP_SIZEOF, | ||
52 | [PCI_CAP_ID_VPD] = PCI_CAP_VPD_SIZEOF, | ||
53 | [PCI_CAP_ID_SLOTID] = 0, /* bridge - don't care */ | ||
54 | [PCI_CAP_ID_MSI] = 0xFF, /* 10, 14, 20, or 24 */ | ||
55 | [PCI_CAP_ID_CHSWP] = 0, /* cpci - not yet */ | ||
56 | [PCI_CAP_ID_PCIX] = 0xFF, /* 8 or 24 */ | ||
57 | [PCI_CAP_ID_HT] = 0xFF, /* hypertransport */ | ||
58 | [PCI_CAP_ID_VNDR] = 0xFF, /* variable */ | ||
59 | [PCI_CAP_ID_DBG] = 0, /* debug - don't care */ | ||
60 | [PCI_CAP_ID_CCRC] = 0, /* cpci - not yet */ | ||
61 | [PCI_CAP_ID_SHPC] = 0, /* hotswap - not yet */ | ||
62 | [PCI_CAP_ID_SSVID] = 0, /* bridge - don't care */ | ||
63 | [PCI_CAP_ID_AGP3] = 0, /* AGP8x - not yet */ | ||
64 | [PCI_CAP_ID_SECDEV] = 0, /* secure device not yet */ | ||
65 | [PCI_CAP_ID_EXP] = 0xFF, /* 20 or 44 */ | ||
66 | [PCI_CAP_ID_MSIX] = PCI_CAP_MSIX_SIZEOF, | ||
67 | [PCI_CAP_ID_SATA] = 0xFF, | ||
68 | [PCI_CAP_ID_AF] = PCI_CAP_AF_SIZEOF, | ||
69 | }; | ||
70 | |||
71 | /* | ||
72 | * Lengths of PCIe/PCI-X Extended Config Capabilities | ||
73 | * 0: Removed or masked from the user visible capabilty list | ||
74 | * FF: Variable length | ||
75 | */ | ||
76 | static u16 pci_ext_cap_length[] = { | ||
77 | [PCI_EXT_CAP_ID_ERR] = PCI_ERR_ROOT_COMMAND, | ||
78 | [PCI_EXT_CAP_ID_VC] = 0xFF, | ||
79 | [PCI_EXT_CAP_ID_DSN] = PCI_EXT_CAP_DSN_SIZEOF, | ||
80 | [PCI_EXT_CAP_ID_PWR] = PCI_EXT_CAP_PWR_SIZEOF, | ||
81 | [PCI_EXT_CAP_ID_RCLD] = 0, /* root only - don't care */ | ||
82 | [PCI_EXT_CAP_ID_RCILC] = 0, /* root only - don't care */ | ||
83 | [PCI_EXT_CAP_ID_RCEC] = 0, /* root only - don't care */ | ||
84 | [PCI_EXT_CAP_ID_MFVC] = 0xFF, | ||
85 | [PCI_EXT_CAP_ID_VC9] = 0xFF, /* same as CAP_ID_VC */ | ||
86 | [PCI_EXT_CAP_ID_RCRB] = 0, /* root only - don't care */ | ||
87 | [PCI_EXT_CAP_ID_VNDR] = 0xFF, | ||
88 | [PCI_EXT_CAP_ID_CAC] = 0, /* obsolete */ | ||
89 | [PCI_EXT_CAP_ID_ACS] = 0xFF, | ||
90 | [PCI_EXT_CAP_ID_ARI] = PCI_EXT_CAP_ARI_SIZEOF, | ||
91 | [PCI_EXT_CAP_ID_ATS] = PCI_EXT_CAP_ATS_SIZEOF, | ||
92 | [PCI_EXT_CAP_ID_SRIOV] = PCI_EXT_CAP_SRIOV_SIZEOF, | ||
93 | [PCI_EXT_CAP_ID_MRIOV] = 0, /* not yet */ | ||
94 | [PCI_EXT_CAP_ID_MCAST] = PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF, | ||
95 | [PCI_EXT_CAP_ID_PRI] = PCI_EXT_CAP_PRI_SIZEOF, | ||
96 | [PCI_EXT_CAP_ID_AMD_XXX] = 0, /* not yet */ | ||
97 | [PCI_EXT_CAP_ID_REBAR] = 0xFF, | ||
98 | [PCI_EXT_CAP_ID_DPA] = 0xFF, | ||
99 | [PCI_EXT_CAP_ID_TPH] = 0xFF, | ||
100 | [PCI_EXT_CAP_ID_LTR] = PCI_EXT_CAP_LTR_SIZEOF, | ||
101 | [PCI_EXT_CAP_ID_SECPCI] = 0, /* not yet */ | ||
102 | [PCI_EXT_CAP_ID_PMUX] = 0, /* not yet */ | ||
103 | [PCI_EXT_CAP_ID_PASID] = 0, /* not yet */ | ||
104 | }; | ||
105 | |||
106 | /* | ||
107 | * Read/Write Permission Bits - one bit for each bit in capability | ||
108 | * Any field can be read if it exists, but what is read depends on | ||
109 | * whether the field is 'virtualized', or just pass thru to the | ||
110 | * hardware. Any virtualized field is also virtualized for writes. | ||
111 | * Writes are only permitted if they have a 1 bit here. | ||
112 | */ | ||
113 | struct perm_bits { | ||
114 | u8 *virt; /* read/write virtual data, not hw */ | ||
115 | u8 *write; /* writeable bits */ | ||
116 | int (*readfn)(struct vfio_pci_device *vdev, int pos, int count, | ||
117 | struct perm_bits *perm, int offset, __le32 *val); | ||
118 | int (*writefn)(struct vfio_pci_device *vdev, int pos, int count, | ||
119 | struct perm_bits *perm, int offset, __le32 val); | ||
120 | }; | ||
121 | |||
122 | #define NO_VIRT 0 | ||
123 | #define ALL_VIRT 0xFFFFFFFFU | ||
124 | #define NO_WRITE 0 | ||
125 | #define ALL_WRITE 0xFFFFFFFFU | ||
126 | |||
127 | static int vfio_user_config_read(struct pci_dev *pdev, int offset, | ||
128 | __le32 *val, int count) | ||
129 | { | ||
130 | int ret = -EINVAL; | ||
131 | u32 tmp_val = 0; | ||
132 | |||
133 | switch (count) { | ||
134 | case 1: | ||
135 | { | ||
136 | u8 tmp; | ||
137 | ret = pci_user_read_config_byte(pdev, offset, &tmp); | ||
138 | tmp_val = tmp; | ||
139 | break; | ||
140 | } | ||
141 | case 2: | ||
142 | { | ||
143 | u16 tmp; | ||
144 | ret = pci_user_read_config_word(pdev, offset, &tmp); | ||
145 | tmp_val = tmp; | ||
146 | break; | ||
147 | } | ||
148 | case 4: | ||
149 | ret = pci_user_read_config_dword(pdev, offset, &tmp_val); | ||
150 | break; | ||
151 | } | ||
152 | |||
153 | *val = cpu_to_le32(tmp_val); | ||
154 | |||
155 | return pcibios_err_to_errno(ret); | ||
156 | } | ||
157 | |||
158 | static int vfio_user_config_write(struct pci_dev *pdev, int offset, | ||
159 | __le32 val, int count) | ||
160 | { | ||
161 | int ret = -EINVAL; | ||
162 | u32 tmp_val = le32_to_cpu(val); | ||
163 | |||
164 | switch (count) { | ||
165 | case 1: | ||
166 | ret = pci_user_write_config_byte(pdev, offset, tmp_val); | ||
167 | break; | ||
168 | case 2: | ||
169 | ret = pci_user_write_config_word(pdev, offset, tmp_val); | ||
170 | break; | ||
171 | case 4: | ||
172 | ret = pci_user_write_config_dword(pdev, offset, tmp_val); | ||
173 | break; | ||
174 | } | ||
175 | |||
176 | return pcibios_err_to_errno(ret); | ||
177 | } | ||
178 | |||
179 | static int vfio_default_config_read(struct vfio_pci_device *vdev, int pos, | ||
180 | int count, struct perm_bits *perm, | ||
181 | int offset, __le32 *val) | ||
182 | { | ||
183 | __le32 virt = 0; | ||
184 | |||
185 | memcpy(val, vdev->vconfig + pos, count); | ||
186 | |||
187 | memcpy(&virt, perm->virt + offset, count); | ||
188 | |||
189 | /* Any non-virtualized bits? */ | ||
190 | if (cpu_to_le32(~0U >> (32 - (count * 8))) != virt) { | ||
191 | struct pci_dev *pdev = vdev->pdev; | ||
192 | __le32 phys_val = 0; | ||
193 | int ret; | ||
194 | |||
195 | ret = vfio_user_config_read(pdev, pos, &phys_val, count); | ||
196 | if (ret) | ||
197 | return ret; | ||
198 | |||
199 | *val = (phys_val & ~virt) | (*val & virt); | ||
200 | } | ||
201 | |||
202 | return count; | ||
203 | } | ||
204 | |||
205 | static int vfio_default_config_write(struct vfio_pci_device *vdev, int pos, | ||
206 | int count, struct perm_bits *perm, | ||
207 | int offset, __le32 val) | ||
208 | { | ||
209 | __le32 virt = 0, write = 0; | ||
210 | |||
211 | memcpy(&write, perm->write + offset, count); | ||
212 | |||
213 | if (!write) | ||
214 | return count; /* drop, no writable bits */ | ||
215 | |||
216 | memcpy(&virt, perm->virt + offset, count); | ||
217 | |||
218 | /* Virtualized and writable bits go to vconfig */ | ||
219 | if (write & virt) { | ||
220 | __le32 virt_val = 0; | ||
221 | |||
222 | memcpy(&virt_val, vdev->vconfig + pos, count); | ||
223 | |||
224 | virt_val &= ~(write & virt); | ||
225 | virt_val |= (val & (write & virt)); | ||
226 | |||
227 | memcpy(vdev->vconfig + pos, &virt_val, count); | ||
228 | } | ||
229 | |||
230 | /* Non-virtualzed and writable bits go to hardware */ | ||
231 | if (write & ~virt) { | ||
232 | struct pci_dev *pdev = vdev->pdev; | ||
233 | __le32 phys_val = 0; | ||
234 | int ret; | ||
235 | |||
236 | ret = vfio_user_config_read(pdev, pos, &phys_val, count); | ||
237 | if (ret) | ||
238 | return ret; | ||
239 | |||
240 | phys_val &= ~(write & ~virt); | ||
241 | phys_val |= (val & (write & ~virt)); | ||
242 | |||
243 | ret = vfio_user_config_write(pdev, pos, phys_val, count); | ||
244 | if (ret) | ||
245 | return ret; | ||
246 | } | ||
247 | |||
248 | return count; | ||
249 | } | ||
250 | |||
251 | /* Allow direct read from hardware, except for capability next pointer */ | ||
252 | static int vfio_direct_config_read(struct vfio_pci_device *vdev, int pos, | ||
253 | int count, struct perm_bits *perm, | ||
254 | int offset, __le32 *val) | ||
255 | { | ||
256 | int ret; | ||
257 | |||
258 | ret = vfio_user_config_read(vdev->pdev, pos, val, count); | ||
259 | if (ret) | ||
260 | return pcibios_err_to_errno(ret); | ||
261 | |||
262 | if (pos >= PCI_CFG_SPACE_SIZE) { /* Extended cap header mangling */ | ||
263 | if (offset < 4) | ||
264 | memcpy(val, vdev->vconfig + pos, count); | ||
265 | } else if (pos >= PCI_STD_HEADER_SIZEOF) { /* Std cap mangling */ | ||
266 | if (offset == PCI_CAP_LIST_ID && count > 1) | ||
267 | memcpy(val, vdev->vconfig + pos, | ||
268 | min(PCI_CAP_FLAGS, count)); | ||
269 | else if (offset == PCI_CAP_LIST_NEXT) | ||
270 | memcpy(val, vdev->vconfig + pos, 1); | ||
271 | } | ||
272 | |||
273 | return count; | ||
274 | } | ||
275 | |||
276 | static int vfio_direct_config_write(struct vfio_pci_device *vdev, int pos, | ||
277 | int count, struct perm_bits *perm, | ||
278 | int offset, __le32 val) | ||
279 | { | ||
280 | int ret; | ||
281 | |||
282 | ret = vfio_user_config_write(vdev->pdev, pos, val, count); | ||
283 | if (ret) | ||
284 | return ret; | ||
285 | |||
286 | return count; | ||
287 | } | ||
288 | |||
289 | /* Default all regions to read-only, no-virtualization */ | ||
290 | static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { | ||
291 | [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } | ||
292 | }; | ||
293 | static struct perm_bits ecap_perms[PCI_EXT_CAP_ID_MAX + 1] = { | ||
294 | [0 ... PCI_EXT_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } | ||
295 | }; | ||
296 | |||
297 | static void free_perm_bits(struct perm_bits *perm) | ||
298 | { | ||
299 | kfree(perm->virt); | ||
300 | kfree(perm->write); | ||
301 | perm->virt = NULL; | ||
302 | perm->write = NULL; | ||
303 | } | ||
304 | |||
305 | static int alloc_perm_bits(struct perm_bits *perm, int size) | ||
306 | { | ||
307 | /* | ||
308 | * Round up all permission bits to the next dword, this lets us | ||
309 | * ignore whether a read/write exceeds the defined capability | ||
310 | * structure. We can do this because: | ||
311 | * - Standard config space is already dword aligned | ||
312 | * - Capabilities are all dword alinged (bits 0:1 of next reserved) | ||
313 | * - Express capabilities defined as dword aligned | ||
314 | */ | ||
315 | size = round_up(size, 4); | ||
316 | |||
317 | /* | ||
318 | * Zero state is | ||
319 | * - All Readable, None Writeable, None Virtualized | ||
320 | */ | ||
321 | perm->virt = kzalloc(size, GFP_KERNEL); | ||
322 | perm->write = kzalloc(size, GFP_KERNEL); | ||
323 | if (!perm->virt || !perm->write) { | ||
324 | free_perm_bits(perm); | ||
325 | return -ENOMEM; | ||
326 | } | ||
327 | |||
328 | perm->readfn = vfio_default_config_read; | ||
329 | perm->writefn = vfio_default_config_write; | ||
330 | |||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | /* | ||
335 | * Helper functions for filling in permission tables | ||
336 | */ | ||
337 | static inline void p_setb(struct perm_bits *p, int off, u8 virt, u8 write) | ||
338 | { | ||
339 | p->virt[off] = virt; | ||
340 | p->write[off] = write; | ||
341 | } | ||
342 | |||
343 | /* Handle endian-ness - pci and tables are little-endian */ | ||
344 | static inline void p_setw(struct perm_bits *p, int off, u16 virt, u16 write) | ||
345 | { | ||
346 | *(__le16 *)(&p->virt[off]) = cpu_to_le16(virt); | ||
347 | *(__le16 *)(&p->write[off]) = cpu_to_le16(write); | ||
348 | } | ||
349 | |||
350 | /* Handle endian-ness - pci and tables are little-endian */ | ||
351 | static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write) | ||
352 | { | ||
353 | *(__le32 *)(&p->virt[off]) = cpu_to_le32(virt); | ||
354 | *(__le32 *)(&p->write[off]) = cpu_to_le32(write); | ||
355 | } | ||
356 | |||
357 | /* | ||
358 | * Restore the *real* BARs after we detect a FLR or backdoor reset. | ||
359 | * (backdoor = some device specific technique that we didn't catch) | ||
360 | */ | ||
361 | static void vfio_bar_restore(struct vfio_pci_device *vdev) | ||
362 | { | ||
363 | struct pci_dev *pdev = vdev->pdev; | ||
364 | u32 *rbar = vdev->rbar; | ||
365 | int i; | ||
366 | |||
367 | if (pdev->is_virtfn) | ||
368 | return; | ||
369 | |||
370 | pr_info("%s: %s reset recovery - restoring bars\n", | ||
371 | __func__, dev_name(&pdev->dev)); | ||
372 | |||
373 | for (i = PCI_BASE_ADDRESS_0; i <= PCI_BASE_ADDRESS_5; i += 4, rbar++) | ||
374 | pci_user_write_config_dword(pdev, i, *rbar); | ||
375 | |||
376 | pci_user_write_config_dword(pdev, PCI_ROM_ADDRESS, *rbar); | ||
377 | } | ||
378 | |||
379 | static __le32 vfio_generate_bar_flags(struct pci_dev *pdev, int bar) | ||
380 | { | ||
381 | unsigned long flags = pci_resource_flags(pdev, bar); | ||
382 | u32 val; | ||
383 | |||
384 | if (flags & IORESOURCE_IO) | ||
385 | return cpu_to_le32(PCI_BASE_ADDRESS_SPACE_IO); | ||
386 | |||
387 | val = PCI_BASE_ADDRESS_SPACE_MEMORY; | ||
388 | |||
389 | if (flags & IORESOURCE_PREFETCH) | ||
390 | val |= PCI_BASE_ADDRESS_MEM_PREFETCH; | ||
391 | |||
392 | if (flags & IORESOURCE_MEM_64) | ||
393 | val |= PCI_BASE_ADDRESS_MEM_TYPE_64; | ||
394 | |||
395 | return cpu_to_le32(val); | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Pretend we're hardware and tweak the values of the *virtual* PCI BARs | ||
400 | * to reflect the hardware capabilities. This implements BAR sizing. | ||
401 | */ | ||
402 | static void vfio_bar_fixup(struct vfio_pci_device *vdev) | ||
403 | { | ||
404 | struct pci_dev *pdev = vdev->pdev; | ||
405 | int i; | ||
406 | __le32 *bar; | ||
407 | u64 mask; | ||
408 | |||
409 | bar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0]; | ||
410 | |||
411 | for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++, bar++) { | ||
412 | if (!pci_resource_start(pdev, i)) { | ||
413 | *bar = 0; /* Unmapped by host = unimplemented to user */ | ||
414 | continue; | ||
415 | } | ||
416 | |||
417 | mask = ~(pci_resource_len(pdev, i) - 1); | ||
418 | |||
419 | *bar &= cpu_to_le32((u32)mask); | ||
420 | *bar |= vfio_generate_bar_flags(pdev, i); | ||
421 | |||
422 | if (*bar & cpu_to_le32(PCI_BASE_ADDRESS_MEM_TYPE_64)) { | ||
423 | bar++; | ||
424 | *bar &= cpu_to_le32((u32)(mask >> 32)); | ||
425 | i++; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; | ||
430 | |||
431 | /* | ||
432 | * NB. we expose the actual BAR size here, regardless of whether | ||
433 | * we can read it. When we report the REGION_INFO for the ROM | ||
434 | * we report what PCI tells us is the actual ROM size. | ||
435 | */ | ||
436 | if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { | ||
437 | mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); | ||
438 | mask |= PCI_ROM_ADDRESS_ENABLE; | ||
439 | *bar &= cpu_to_le32((u32)mask); | ||
440 | } else | ||
441 | *bar = 0; | ||
442 | |||
443 | vdev->bardirty = false; | ||
444 | } | ||
445 | |||
446 | static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos, | ||
447 | int count, struct perm_bits *perm, | ||
448 | int offset, __le32 *val) | ||
449 | { | ||
450 | if (is_bar(offset)) /* pos == offset for basic config */ | ||
451 | vfio_bar_fixup(vdev); | ||
452 | |||
453 | count = vfio_default_config_read(vdev, pos, count, perm, offset, val); | ||
454 | |||
455 | /* Mask in virtual memory enable for SR-IOV devices */ | ||
456 | if (offset == PCI_COMMAND && vdev->pdev->is_virtfn) { | ||
457 | u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); | ||
458 | u32 tmp_val = le32_to_cpu(*val); | ||
459 | |||
460 | tmp_val |= cmd & PCI_COMMAND_MEMORY; | ||
461 | *val = cpu_to_le32(tmp_val); | ||
462 | } | ||
463 | |||
464 | return count; | ||
465 | } | ||
466 | |||
467 | static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, | ||
468 | int count, struct perm_bits *perm, | ||
469 | int offset, __le32 val) | ||
470 | { | ||
471 | struct pci_dev *pdev = vdev->pdev; | ||
472 | __le16 *virt_cmd; | ||
473 | u16 new_cmd = 0; | ||
474 | int ret; | ||
475 | |||
476 | virt_cmd = (__le16 *)&vdev->vconfig[PCI_COMMAND]; | ||
477 | |||
478 | if (offset == PCI_COMMAND) { | ||
479 | bool phys_mem, virt_mem, new_mem, phys_io, virt_io, new_io; | ||
480 | u16 phys_cmd; | ||
481 | |||
482 | ret = pci_user_read_config_word(pdev, PCI_COMMAND, &phys_cmd); | ||
483 | if (ret) | ||
484 | return ret; | ||
485 | |||
486 | new_cmd = le32_to_cpu(val); | ||
487 | |||
488 | phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY); | ||
489 | virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY); | ||
490 | new_mem = !!(new_cmd & PCI_COMMAND_MEMORY); | ||
491 | |||
492 | phys_io = !!(phys_cmd & PCI_COMMAND_IO); | ||
493 | virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); | ||
494 | new_io = !!(new_cmd & PCI_COMMAND_IO); | ||
495 | |||
496 | /* | ||
497 | * If the user is writing mem/io enable (new_mem/io) and we | ||
498 | * think it's already enabled (virt_mem/io), but the hardware | ||
499 | * shows it disabled (phys_mem/io, then the device has | ||
500 | * undergone some kind of backdoor reset and needs to be | ||
501 | * restored before we allow it to enable the bars. | ||
502 | * SR-IOV devices will trigger this, but we catch them later | ||
503 | */ | ||
504 | if ((new_mem && virt_mem && !phys_mem) || | ||
505 | (new_io && virt_io && !phys_io)) | ||
506 | vfio_bar_restore(vdev); | ||
507 | } | ||
508 | |||
509 | count = vfio_default_config_write(vdev, pos, count, perm, offset, val); | ||
510 | if (count < 0) | ||
511 | return count; | ||
512 | |||
513 | /* | ||
514 | * Save current memory/io enable bits in vconfig to allow for | ||
515 | * the test above next time. | ||
516 | */ | ||
517 | if (offset == PCI_COMMAND) { | ||
518 | u16 mask = PCI_COMMAND_MEMORY | PCI_COMMAND_IO; | ||
519 | |||
520 | *virt_cmd &= cpu_to_le16(~mask); | ||
521 | *virt_cmd |= cpu_to_le16(new_cmd & mask); | ||
522 | } | ||
523 | |||
524 | /* Emulate INTx disable */ | ||
525 | if (offset >= PCI_COMMAND && offset <= PCI_COMMAND + 1) { | ||
526 | bool virt_intx_disable; | ||
527 | |||
528 | virt_intx_disable = !!(le16_to_cpu(*virt_cmd) & | ||
529 | PCI_COMMAND_INTX_DISABLE); | ||
530 | |||
531 | if (virt_intx_disable && !vdev->virq_disabled) { | ||
532 | vdev->virq_disabled = true; | ||
533 | vfio_pci_intx_mask(vdev); | ||
534 | } else if (!virt_intx_disable && vdev->virq_disabled) { | ||
535 | vdev->virq_disabled = false; | ||
536 | vfio_pci_intx_unmask(vdev); | ||
537 | } | ||
538 | } | ||
539 | |||
540 | if (is_bar(offset)) | ||
541 | vdev->bardirty = true; | ||
542 | |||
543 | return count; | ||
544 | } | ||
545 | |||
546 | /* Permissions for the Basic PCI Header */ | ||
547 | static int __init init_pci_cap_basic_perm(struct perm_bits *perm) | ||
548 | { | ||
549 | if (alloc_perm_bits(perm, PCI_STD_HEADER_SIZEOF)) | ||
550 | return -ENOMEM; | ||
551 | |||
552 | perm->readfn = vfio_basic_config_read; | ||
553 | perm->writefn = vfio_basic_config_write; | ||
554 | |||
555 | /* Virtualized for SR-IOV functions, which just have FFFF */ | ||
556 | p_setw(perm, PCI_VENDOR_ID, (u16)ALL_VIRT, NO_WRITE); | ||
557 | p_setw(perm, PCI_DEVICE_ID, (u16)ALL_VIRT, NO_WRITE); | ||
558 | |||
559 | /* | ||
560 | * Virtualize INTx disable, we use it internally for interrupt | ||
561 | * control and can emulate it for non-PCI 2.3 devices. | ||
562 | */ | ||
563 | p_setw(perm, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE, (u16)ALL_WRITE); | ||
564 | |||
565 | /* Virtualize capability list, we might want to skip/disable */ | ||
566 | p_setw(perm, PCI_STATUS, PCI_STATUS_CAP_LIST, NO_WRITE); | ||
567 | |||
568 | /* No harm to write */ | ||
569 | p_setb(perm, PCI_CACHE_LINE_SIZE, NO_VIRT, (u8)ALL_WRITE); | ||
570 | p_setb(perm, PCI_LATENCY_TIMER, NO_VIRT, (u8)ALL_WRITE); | ||
571 | p_setb(perm, PCI_BIST, NO_VIRT, (u8)ALL_WRITE); | ||
572 | |||
573 | /* Virtualize all bars, can't touch the real ones */ | ||
574 | p_setd(perm, PCI_BASE_ADDRESS_0, ALL_VIRT, ALL_WRITE); | ||
575 | p_setd(perm, PCI_BASE_ADDRESS_1, ALL_VIRT, ALL_WRITE); | ||
576 | p_setd(perm, PCI_BASE_ADDRESS_2, ALL_VIRT, ALL_WRITE); | ||
577 | p_setd(perm, PCI_BASE_ADDRESS_3, ALL_VIRT, ALL_WRITE); | ||
578 | p_setd(perm, PCI_BASE_ADDRESS_4, ALL_VIRT, ALL_WRITE); | ||
579 | p_setd(perm, PCI_BASE_ADDRESS_5, ALL_VIRT, ALL_WRITE); | ||
580 | p_setd(perm, PCI_ROM_ADDRESS, ALL_VIRT, ALL_WRITE); | ||
581 | |||
582 | /* Allow us to adjust capability chain */ | ||
583 | p_setb(perm, PCI_CAPABILITY_LIST, (u8)ALL_VIRT, NO_WRITE); | ||
584 | |||
585 | /* Sometimes used by sw, just virtualize */ | ||
586 | p_setb(perm, PCI_INTERRUPT_LINE, (u8)ALL_VIRT, (u8)ALL_WRITE); | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | /* Permissions for the Power Management capability */ | ||
591 | static int __init init_pci_cap_pm_perm(struct perm_bits *perm) | ||
592 | { | ||
593 | if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_PM])) | ||
594 | return -ENOMEM; | ||
595 | |||
596 | /* | ||
597 | * We always virtualize the next field so we can remove | ||
598 | * capabilities from the chain if we want to. | ||
599 | */ | ||
600 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | ||
601 | |||
602 | /* | ||
603 | * Power management is defined *per function*, | ||
604 | * so we let the user write this | ||
605 | */ | ||
606 | p_setd(perm, PCI_PM_CTRL, NO_VIRT, ALL_WRITE); | ||
607 | return 0; | ||
608 | } | ||
609 | |||
610 | /* Permissions for PCI-X capability */ | ||
611 | static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) | ||
612 | { | ||
613 | /* Alloc 24, but only 8 are used in v0 */ | ||
614 | if (alloc_perm_bits(perm, PCI_CAP_PCIX_SIZEOF_V2)) | ||
615 | return -ENOMEM; | ||
616 | |||
617 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | ||
618 | |||
619 | p_setw(perm, PCI_X_CMD, NO_VIRT, (u16)ALL_WRITE); | ||
620 | p_setd(perm, PCI_X_ECC_CSR, NO_VIRT, ALL_WRITE); | ||
621 | return 0; | ||
622 | } | ||
623 | |||
624 | /* Permissions for PCI Express capability */ | ||
625 | static int __init init_pci_cap_exp_perm(struct perm_bits *perm) | ||
626 | { | ||
627 | /* Alloc larger of two possible sizes */ | ||
628 | if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2)) | ||
629 | return -ENOMEM; | ||
630 | |||
631 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | ||
632 | |||
633 | /* | ||
634 | * Allow writes to device control fields (includes FLR!) | ||
635 | * but not to devctl_phantom which could confuse IOMMU | ||
636 | * or to the ARI bit in devctl2 which is set at probe time | ||
637 | */ | ||
638 | p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM); | ||
639 | p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); | ||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | /* Permissions for Advanced Function capability */ | ||
644 | static int __init init_pci_cap_af_perm(struct perm_bits *perm) | ||
645 | { | ||
646 | if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF])) | ||
647 | return -ENOMEM; | ||
648 | |||
649 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | ||
650 | p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR); | ||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | /* Permissions for Advanced Error Reporting extended capability */ | ||
655 | static int __init init_pci_ext_cap_err_perm(struct perm_bits *perm) | ||
656 | { | ||
657 | u32 mask; | ||
658 | |||
659 | if (alloc_perm_bits(perm, pci_ext_cap_length[PCI_EXT_CAP_ID_ERR])) | ||
660 | return -ENOMEM; | ||
661 | |||
662 | /* | ||
663 | * Virtualize the first dword of all express capabilities | ||
664 | * because it includes the next pointer. This lets us later | ||
665 | * remove capabilities from the chain if we need to. | ||
666 | */ | ||
667 | p_setd(perm, 0, ALL_VIRT, NO_WRITE); | ||
668 | |||
669 | /* Writable bits mask */ | ||
670 | mask = PCI_ERR_UNC_TRAIN | /* Training */ | ||
671 | PCI_ERR_UNC_DLP | /* Data Link Protocol */ | ||
672 | PCI_ERR_UNC_SURPDN | /* Surprise Down */ | ||
673 | PCI_ERR_UNC_POISON_TLP | /* Poisoned TLP */ | ||
674 | PCI_ERR_UNC_FCP | /* Flow Control Protocol */ | ||
675 | PCI_ERR_UNC_COMP_TIME | /* Completion Timeout */ | ||
676 | PCI_ERR_UNC_COMP_ABORT | /* Completer Abort */ | ||
677 | PCI_ERR_UNC_UNX_COMP | /* Unexpected Completion */ | ||
678 | PCI_ERR_UNC_RX_OVER | /* Receiver Overflow */ | ||
679 | PCI_ERR_UNC_MALF_TLP | /* Malformed TLP */ | ||
680 | PCI_ERR_UNC_ECRC | /* ECRC Error Status */ | ||
681 | PCI_ERR_UNC_UNSUP | /* Unsupported Request */ | ||
682 | PCI_ERR_UNC_ACSV | /* ACS Violation */ | ||
683 | PCI_ERR_UNC_INTN | /* internal error */ | ||
684 | PCI_ERR_UNC_MCBTLP | /* MC blocked TLP */ | ||
685 | PCI_ERR_UNC_ATOMEG | /* Atomic egress blocked */ | ||
686 | PCI_ERR_UNC_TLPPRE; /* TLP prefix blocked */ | ||
687 | p_setd(perm, PCI_ERR_UNCOR_STATUS, NO_VIRT, mask); | ||
688 | p_setd(perm, PCI_ERR_UNCOR_MASK, NO_VIRT, mask); | ||
689 | p_setd(perm, PCI_ERR_UNCOR_SEVER, NO_VIRT, mask); | ||
690 | |||
691 | mask = PCI_ERR_COR_RCVR | /* Receiver Error Status */ | ||
692 | PCI_ERR_COR_BAD_TLP | /* Bad TLP Status */ | ||
693 | PCI_ERR_COR_BAD_DLLP | /* Bad DLLP Status */ | ||
694 | PCI_ERR_COR_REP_ROLL | /* REPLAY_NUM Rollover */ | ||
695 | PCI_ERR_COR_REP_TIMER | /* Replay Timer Timeout */ | ||
696 | PCI_ERR_COR_ADV_NFAT | /* Advisory Non-Fatal */ | ||
697 | PCI_ERR_COR_INTERNAL | /* Corrected Internal */ | ||
698 | PCI_ERR_COR_LOG_OVER; /* Header Log Overflow */ | ||
699 | p_setd(perm, PCI_ERR_COR_STATUS, NO_VIRT, mask); | ||
700 | p_setd(perm, PCI_ERR_COR_MASK, NO_VIRT, mask); | ||
701 | |||
702 | mask = PCI_ERR_CAP_ECRC_GENE | /* ECRC Generation Enable */ | ||
703 | PCI_ERR_CAP_ECRC_CHKE; /* ECRC Check Enable */ | ||
704 | p_setd(perm, PCI_ERR_CAP, NO_VIRT, mask); | ||
705 | return 0; | ||
706 | } | ||
707 | |||
708 | /* Permissions for Power Budgeting extended capability */ | ||
709 | static int __init init_pci_ext_cap_pwr_perm(struct perm_bits *perm) | ||
710 | { | ||
711 | if (alloc_perm_bits(perm, pci_ext_cap_length[PCI_EXT_CAP_ID_PWR])) | ||
712 | return -ENOMEM; | ||
713 | |||
714 | p_setd(perm, 0, ALL_VIRT, NO_WRITE); | ||
715 | |||
716 | /* Writing the data selector is OK, the info is still read-only */ | ||
717 | p_setb(perm, PCI_PWR_DATA, NO_VIRT, (u8)ALL_WRITE); | ||
718 | return 0; | ||
719 | } | ||
720 | |||
721 | /* | ||
722 | * Initialize the shared permission tables | ||
723 | */ | ||
724 | void vfio_pci_uninit_perm_bits(void) | ||
725 | { | ||
726 | free_perm_bits(&cap_perms[PCI_CAP_ID_BASIC]); | ||
727 | |||
728 | free_perm_bits(&cap_perms[PCI_CAP_ID_PM]); | ||
729 | free_perm_bits(&cap_perms[PCI_CAP_ID_PCIX]); | ||
730 | free_perm_bits(&cap_perms[PCI_CAP_ID_EXP]); | ||
731 | free_perm_bits(&cap_perms[PCI_CAP_ID_AF]); | ||
732 | |||
733 | free_perm_bits(&ecap_perms[PCI_EXT_CAP_ID_ERR]); | ||
734 | free_perm_bits(&ecap_perms[PCI_EXT_CAP_ID_PWR]); | ||
735 | } | ||
736 | |||
737 | int __init vfio_pci_init_perm_bits(void) | ||
738 | { | ||
739 | int ret; | ||
740 | |||
741 | /* Basic config space */ | ||
742 | ret = init_pci_cap_basic_perm(&cap_perms[PCI_CAP_ID_BASIC]); | ||
743 | |||
744 | /* Capabilities */ | ||
745 | ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]); | ||
746 | cap_perms[PCI_CAP_ID_VPD].writefn = vfio_direct_config_write; | ||
747 | ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]); | ||
748 | cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_direct_config_write; | ||
749 | ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]); | ||
750 | ret |= init_pci_cap_af_perm(&cap_perms[PCI_CAP_ID_AF]); | ||
751 | |||
752 | /* Extended capabilities */ | ||
753 | ret |= init_pci_ext_cap_err_perm(&ecap_perms[PCI_EXT_CAP_ID_ERR]); | ||
754 | ret |= init_pci_ext_cap_pwr_perm(&ecap_perms[PCI_EXT_CAP_ID_PWR]); | ||
755 | ecap_perms[PCI_EXT_CAP_ID_VNDR].writefn = vfio_direct_config_write; | ||
756 | |||
757 | if (ret) | ||
758 | vfio_pci_uninit_perm_bits(); | ||
759 | |||
760 | return ret; | ||
761 | } | ||
762 | |||
763 | static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos) | ||
764 | { | ||
765 | u8 cap; | ||
766 | int base = (pos >= PCI_CFG_SPACE_SIZE) ? PCI_CFG_SPACE_SIZE : | ||
767 | PCI_STD_HEADER_SIZEOF; | ||
768 | base /= 4; | ||
769 | pos /= 4; | ||
770 | |||
771 | cap = vdev->pci_config_map[pos]; | ||
772 | |||
773 | if (cap == PCI_CAP_ID_BASIC) | ||
774 | return 0; | ||
775 | |||
776 | /* XXX Can we have to abutting capabilities of the same type? */ | ||
777 | while (pos - 1 >= base && vdev->pci_config_map[pos - 1] == cap) | ||
778 | pos--; | ||
779 | |||
780 | return pos * 4; | ||
781 | } | ||
782 | |||
783 | static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos, | ||
784 | int count, struct perm_bits *perm, | ||
785 | int offset, __le32 *val) | ||
786 | { | ||
787 | /* Update max available queue size from msi_qmax */ | ||
788 | if (offset <= PCI_MSI_FLAGS && offset + count >= PCI_MSI_FLAGS) { | ||
789 | __le16 *flags; | ||
790 | int start; | ||
791 | |||
792 | start = vfio_find_cap_start(vdev, pos); | ||
793 | |||
794 | flags = (__le16 *)&vdev->vconfig[start]; | ||
795 | |||
796 | *flags &= cpu_to_le16(~PCI_MSI_FLAGS_QMASK); | ||
797 | *flags |= cpu_to_le16(vdev->msi_qmax << 1); | ||
798 | } | ||
799 | |||
800 | return vfio_default_config_read(vdev, pos, count, perm, offset, val); | ||
801 | } | ||
802 | |||
803 | static int vfio_msi_config_write(struct vfio_pci_device *vdev, int pos, | ||
804 | int count, struct perm_bits *perm, | ||
805 | int offset, __le32 val) | ||
806 | { | ||
807 | count = vfio_default_config_write(vdev, pos, count, perm, offset, val); | ||
808 | if (count < 0) | ||
809 | return count; | ||
810 | |||
811 | /* Fixup and write configured queue size and enable to hardware */ | ||
812 | if (offset <= PCI_MSI_FLAGS && offset + count >= PCI_MSI_FLAGS) { | ||
813 | __le16 *pflags; | ||
814 | u16 flags; | ||
815 | int start, ret; | ||
816 | |||
817 | start = vfio_find_cap_start(vdev, pos); | ||
818 | |||
819 | pflags = (__le16 *)&vdev->vconfig[start + PCI_MSI_FLAGS]; | ||
820 | |||
821 | flags = le16_to_cpu(*pflags); | ||
822 | |||
823 | /* MSI is enabled via ioctl */ | ||
824 | if (!is_msi(vdev)) | ||
825 | flags &= ~PCI_MSI_FLAGS_ENABLE; | ||
826 | |||
827 | /* Check queue size */ | ||
828 | if ((flags & PCI_MSI_FLAGS_QSIZE) >> 4 > vdev->msi_qmax) { | ||
829 | flags &= ~PCI_MSI_FLAGS_QSIZE; | ||
830 | flags |= vdev->msi_qmax << 4; | ||
831 | } | ||
832 | |||
833 | /* Write back to virt and to hardware */ | ||
834 | *pflags = cpu_to_le16(flags); | ||
835 | ret = pci_user_write_config_word(vdev->pdev, | ||
836 | start + PCI_MSI_FLAGS, | ||
837 | flags); | ||
838 | if (ret) | ||
839 | return pcibios_err_to_errno(ret); | ||
840 | } | ||
841 | |||
842 | return count; | ||
843 | } | ||
844 | |||
845 | /* | ||
846 | * MSI determination is per-device, so this routine gets used beyond | ||
847 | * initialization time. Don't add __init | ||
848 | */ | ||
849 | static int init_pci_cap_msi_perm(struct perm_bits *perm, int len, u16 flags) | ||
850 | { | ||
851 | if (alloc_perm_bits(perm, len)) | ||
852 | return -ENOMEM; | ||
853 | |||
854 | perm->readfn = vfio_msi_config_read; | ||
855 | perm->writefn = vfio_msi_config_write; | ||
856 | |||
857 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | ||
858 | |||
859 | /* | ||
860 | * The upper byte of the control register is reserved, | ||
861 | * just setup the lower byte. | ||
862 | */ | ||
863 | p_setb(perm, PCI_MSI_FLAGS, (u8)ALL_VIRT, (u8)ALL_WRITE); | ||
864 | p_setd(perm, PCI_MSI_ADDRESS_LO, ALL_VIRT, ALL_WRITE); | ||
865 | if (flags & PCI_MSI_FLAGS_64BIT) { | ||
866 | p_setd(perm, PCI_MSI_ADDRESS_HI, ALL_VIRT, ALL_WRITE); | ||
867 | p_setw(perm, PCI_MSI_DATA_64, (u16)ALL_VIRT, (u16)ALL_WRITE); | ||
868 | if (flags & PCI_MSI_FLAGS_MASKBIT) { | ||
869 | p_setd(perm, PCI_MSI_MASK_64, NO_VIRT, ALL_WRITE); | ||
870 | p_setd(perm, PCI_MSI_PENDING_64, NO_VIRT, ALL_WRITE); | ||
871 | } | ||
872 | } else { | ||
873 | p_setw(perm, PCI_MSI_DATA_32, (u16)ALL_VIRT, (u16)ALL_WRITE); | ||
874 | if (flags & PCI_MSI_FLAGS_MASKBIT) { | ||
875 | p_setd(perm, PCI_MSI_MASK_32, NO_VIRT, ALL_WRITE); | ||
876 | p_setd(perm, PCI_MSI_PENDING_32, NO_VIRT, ALL_WRITE); | ||
877 | } | ||
878 | } | ||
879 | return 0; | ||
880 | } | ||
881 | |||
882 | /* Determine MSI CAP field length; initialize msi_perms on 1st call per vdev */ | ||
883 | static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos) | ||
884 | { | ||
885 | struct pci_dev *pdev = vdev->pdev; | ||
886 | int len, ret; | ||
887 | u16 flags; | ||
888 | |||
889 | ret = pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &flags); | ||
890 | if (ret) | ||
891 | return pcibios_err_to_errno(ret); | ||
892 | |||
893 | len = 10; /* Minimum size */ | ||
894 | if (flags & PCI_MSI_FLAGS_64BIT) | ||
895 | len += 4; | ||
896 | if (flags & PCI_MSI_FLAGS_MASKBIT) | ||
897 | len += 10; | ||
898 | |||
899 | if (vdev->msi_perm) | ||
900 | return len; | ||
901 | |||
902 | vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL); | ||
903 | if (!vdev->msi_perm) | ||
904 | return -ENOMEM; | ||
905 | |||
906 | ret = init_pci_cap_msi_perm(vdev->msi_perm, len, flags); | ||
907 | if (ret) | ||
908 | return ret; | ||
909 | |||
910 | return len; | ||
911 | } | ||
912 | |||
913 | /* Determine extended capability length for VC (2 & 9) and MFVC */ | ||
914 | static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos) | ||
915 | { | ||
916 | struct pci_dev *pdev = vdev->pdev; | ||
917 | u32 tmp; | ||
918 | int ret, evcc, phases, vc_arb; | ||
919 | int len = PCI_CAP_VC_BASE_SIZEOF; | ||
920 | |||
921 | ret = pci_read_config_dword(pdev, pos + PCI_VC_PORT_REG1, &tmp); | ||
922 | if (ret) | ||
923 | return pcibios_err_to_errno(ret); | ||
924 | |||
925 | evcc = tmp & PCI_VC_REG1_EVCC; /* extended vc count */ | ||
926 | ret = pci_read_config_dword(pdev, pos + PCI_VC_PORT_REG2, &tmp); | ||
927 | if (ret) | ||
928 | return pcibios_err_to_errno(ret); | ||
929 | |||
930 | if (tmp & PCI_VC_REG2_128_PHASE) | ||
931 | phases = 128; | ||
932 | else if (tmp & PCI_VC_REG2_64_PHASE) | ||
933 | phases = 64; | ||
934 | else if (tmp & PCI_VC_REG2_32_PHASE) | ||
935 | phases = 32; | ||
936 | else | ||
937 | phases = 0; | ||
938 | |||
939 | vc_arb = phases * 4; | ||
940 | |||
941 | /* | ||
942 | * Port arbitration tables are root & switch only; | ||
943 | * function arbitration tables are function 0 only. | ||
944 | * In either case, we'll never let user write them so | ||
945 | * we don't care how big they are | ||
946 | */ | ||
947 | len += (1 + evcc) * PCI_CAP_VC_PER_VC_SIZEOF; | ||
948 | if (vc_arb) { | ||
949 | len = round_up(len, 16); | ||
950 | len += vc_arb / 8; | ||
951 | } | ||
952 | return len; | ||
953 | } | ||
954 | |||
955 | static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) | ||
956 | { | ||
957 | struct pci_dev *pdev = vdev->pdev; | ||
958 | u16 word; | ||
959 | u8 byte; | ||
960 | int ret; | ||
961 | |||
962 | switch (cap) { | ||
963 | case PCI_CAP_ID_MSI: | ||
964 | return vfio_msi_cap_len(vdev, pos); | ||
965 | case PCI_CAP_ID_PCIX: | ||
966 | ret = pci_read_config_word(pdev, pos + PCI_X_CMD, &word); | ||
967 | if (ret) | ||
968 | return pcibios_err_to_errno(ret); | ||
969 | |||
970 | if (PCI_X_CMD_VERSION(word)) { | ||
971 | vdev->extended_caps = true; | ||
972 | return PCI_CAP_PCIX_SIZEOF_V2; | ||
973 | } else | ||
974 | return PCI_CAP_PCIX_SIZEOF_V0; | ||
975 | case PCI_CAP_ID_VNDR: | ||
976 | /* length follows next field */ | ||
977 | ret = pci_read_config_byte(pdev, pos + PCI_CAP_FLAGS, &byte); | ||
978 | if (ret) | ||
979 | return pcibios_err_to_errno(ret); | ||
980 | |||
981 | return byte; | ||
982 | case PCI_CAP_ID_EXP: | ||
983 | /* length based on version */ | ||
984 | ret = pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &word); | ||
985 | if (ret) | ||
986 | return pcibios_err_to_errno(ret); | ||
987 | |||
988 | if ((word & PCI_EXP_FLAGS_VERS) == 1) | ||
989 | return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1; | ||
990 | else { | ||
991 | vdev->extended_caps = true; | ||
992 | return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2; | ||
993 | } | ||
994 | case PCI_CAP_ID_HT: | ||
995 | ret = pci_read_config_byte(pdev, pos + 3, &byte); | ||
996 | if (ret) | ||
997 | return pcibios_err_to_errno(ret); | ||
998 | |||
999 | return (byte & HT_3BIT_CAP_MASK) ? | ||
1000 | HT_CAP_SIZEOF_SHORT : HT_CAP_SIZEOF_LONG; | ||
1001 | case PCI_CAP_ID_SATA: | ||
1002 | ret = pci_read_config_byte(pdev, pos + PCI_SATA_REGS, &byte); | ||
1003 | if (ret) | ||
1004 | return pcibios_err_to_errno(ret); | ||
1005 | |||
1006 | byte &= PCI_SATA_REGS_MASK; | ||
1007 | if (byte == PCI_SATA_REGS_INLINE) | ||
1008 | return PCI_SATA_SIZEOF_LONG; | ||
1009 | else | ||
1010 | return PCI_SATA_SIZEOF_SHORT; | ||
1011 | default: | ||
1012 | pr_warn("%s: %s unknown length for pci cap 0x%x@0x%x\n", | ||
1013 | dev_name(&pdev->dev), __func__, cap, pos); | ||
1014 | } | ||
1015 | |||
1016 | return 0; | ||
1017 | } | ||
1018 | |||
1019 | static int vfio_ext_cap_len(struct vfio_pci_device *vdev, u16 ecap, u16 epos) | ||
1020 | { | ||
1021 | struct pci_dev *pdev = vdev->pdev; | ||
1022 | u8 byte; | ||
1023 | u32 dword; | ||
1024 | int ret; | ||
1025 | |||
1026 | switch (ecap) { | ||
1027 | case PCI_EXT_CAP_ID_VNDR: | ||
1028 | ret = pci_read_config_dword(pdev, epos + PCI_VSEC_HDR, &dword); | ||
1029 | if (ret) | ||
1030 | return pcibios_err_to_errno(ret); | ||
1031 | |||
1032 | return dword >> PCI_VSEC_HDR_LEN_SHIFT; | ||
1033 | case PCI_EXT_CAP_ID_VC: | ||
1034 | case PCI_EXT_CAP_ID_VC9: | ||
1035 | case PCI_EXT_CAP_ID_MFVC: | ||
1036 | return vfio_vc_cap_len(vdev, epos); | ||
1037 | case PCI_EXT_CAP_ID_ACS: | ||
1038 | ret = pci_read_config_byte(pdev, epos + PCI_ACS_CAP, &byte); | ||
1039 | if (ret) | ||
1040 | return pcibios_err_to_errno(ret); | ||
1041 | |||
1042 | if (byte & PCI_ACS_EC) { | ||
1043 | int bits; | ||
1044 | |||
1045 | ret = pci_read_config_byte(pdev, | ||
1046 | epos + PCI_ACS_EGRESS_BITS, | ||
1047 | &byte); | ||
1048 | if (ret) | ||
1049 | return pcibios_err_to_errno(ret); | ||
1050 | |||
1051 | bits = byte ? round_up(byte, 32) : 256; | ||
1052 | return 8 + (bits / 8); | ||
1053 | } | ||
1054 | return 8; | ||
1055 | |||
1056 | case PCI_EXT_CAP_ID_REBAR: | ||
1057 | ret = pci_read_config_byte(pdev, epos + PCI_REBAR_CTRL, &byte); | ||
1058 | if (ret) | ||
1059 | return pcibios_err_to_errno(ret); | ||
1060 | |||
1061 | byte &= PCI_REBAR_CTRL_NBAR_MASK; | ||
1062 | byte >>= PCI_REBAR_CTRL_NBAR_SHIFT; | ||
1063 | |||
1064 | return 4 + (byte * 8); | ||
1065 | case PCI_EXT_CAP_ID_DPA: | ||
1066 | ret = pci_read_config_byte(pdev, epos + PCI_DPA_CAP, &byte); | ||
1067 | if (ret) | ||
1068 | return pcibios_err_to_errno(ret); | ||
1069 | |||
1070 | byte &= PCI_DPA_CAP_SUBSTATE_MASK; | ||
1071 | byte = round_up(byte + 1, 4); | ||
1072 | return PCI_DPA_BASE_SIZEOF + byte; | ||
1073 | case PCI_EXT_CAP_ID_TPH: | ||
1074 | ret = pci_read_config_dword(pdev, epos + PCI_TPH_CAP, &dword); | ||
1075 | if (ret) | ||
1076 | return pcibios_err_to_errno(ret); | ||
1077 | |||
1078 | if ((dword & PCI_TPH_CAP_LOC_MASK) == PCI_TPH_LOC_CAP) { | ||
1079 | int sts; | ||
1080 | |||
1081 | sts = byte & PCI_TPH_CAP_ST_MASK; | ||
1082 | sts >>= PCI_TPH_CAP_ST_SHIFT; | ||
1083 | return PCI_TPH_BASE_SIZEOF + round_up(sts * 2, 4); | ||
1084 | } | ||
1085 | return PCI_TPH_BASE_SIZEOF; | ||
1086 | default: | ||
1087 | pr_warn("%s: %s unknown length for pci ecap 0x%x@0x%x\n", | ||
1088 | dev_name(&pdev->dev), __func__, ecap, epos); | ||
1089 | } | ||
1090 | |||
1091 | return 0; | ||
1092 | } | ||
1093 | |||
1094 | static int vfio_fill_vconfig_bytes(struct vfio_pci_device *vdev, | ||
1095 | int offset, int size) | ||
1096 | { | ||
1097 | struct pci_dev *pdev = vdev->pdev; | ||
1098 | int ret = 0; | ||
1099 | |||
1100 | /* | ||
1101 | * We try to read physical config space in the largest chunks | ||
1102 | * we can, assuming that all of the fields support dword access. | ||
1103 | * pci_save_state() makes this same assumption and seems to do ok. | ||
1104 | */ | ||
1105 | while (size) { | ||
1106 | int filled; | ||
1107 | |||
1108 | if (size >= 4 && !(offset % 4)) { | ||
1109 | __le32 *dwordp = (__le32 *)&vdev->vconfig[offset]; | ||
1110 | u32 dword; | ||
1111 | |||
1112 | ret = pci_read_config_dword(pdev, offset, &dword); | ||
1113 | if (ret) | ||
1114 | return ret; | ||
1115 | *dwordp = cpu_to_le32(dword); | ||
1116 | filled = 4; | ||
1117 | } else if (size >= 2 && !(offset % 2)) { | ||
1118 | __le16 *wordp = (__le16 *)&vdev->vconfig[offset]; | ||
1119 | u16 word; | ||
1120 | |||
1121 | ret = pci_read_config_word(pdev, offset, &word); | ||
1122 | if (ret) | ||
1123 | return ret; | ||
1124 | *wordp = cpu_to_le16(word); | ||
1125 | filled = 2; | ||
1126 | } else { | ||
1127 | u8 *byte = &vdev->vconfig[offset]; | ||
1128 | ret = pci_read_config_byte(pdev, offset, byte); | ||
1129 | if (ret) | ||
1130 | return ret; | ||
1131 | filled = 1; | ||
1132 | } | ||
1133 | |||
1134 | offset += filled; | ||
1135 | size -= filled; | ||
1136 | } | ||
1137 | |||
1138 | return ret; | ||
1139 | } | ||
1140 | |||
1141 | static int vfio_cap_init(struct vfio_pci_device *vdev) | ||
1142 | { | ||
1143 | struct pci_dev *pdev = vdev->pdev; | ||
1144 | u8 *map = vdev->pci_config_map; | ||
1145 | u16 status; | ||
1146 | u8 pos, *prev, cap; | ||
1147 | int loops, ret, caps = 0; | ||
1148 | |||
1149 | /* Any capabilities? */ | ||
1150 | ret = pci_read_config_word(pdev, PCI_STATUS, &status); | ||
1151 | if (ret) | ||
1152 | return ret; | ||
1153 | |||
1154 | if (!(status & PCI_STATUS_CAP_LIST)) | ||
1155 | return 0; /* Done */ | ||
1156 | |||
1157 | ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos); | ||
1158 | if (ret) | ||
1159 | return ret; | ||
1160 | |||
1161 | /* Mark the previous position in case we want to skip a capability */ | ||
1162 | prev = &vdev->vconfig[PCI_CAPABILITY_LIST]; | ||
1163 | |||
1164 | /* We can bound our loop, capabilities are dword aligned */ | ||
1165 | loops = (PCI_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF) / PCI_CAP_SIZEOF; | ||
1166 | while (pos && loops--) { | ||
1167 | u8 next; | ||
1168 | int i, len = 0; | ||
1169 | |||
1170 | ret = pci_read_config_byte(pdev, pos, &cap); | ||
1171 | if (ret) | ||
1172 | return ret; | ||
1173 | |||
1174 | ret = pci_read_config_byte(pdev, | ||
1175 | pos + PCI_CAP_LIST_NEXT, &next); | ||
1176 | if (ret) | ||
1177 | return ret; | ||
1178 | |||
1179 | if (cap <= PCI_CAP_ID_MAX) { | ||
1180 | len = pci_cap_length[cap]; | ||
1181 | if (len == 0xFF) { /* Variable length */ | ||
1182 | len = vfio_cap_len(vdev, cap, pos); | ||
1183 | if (len < 0) | ||
1184 | return len; | ||
1185 | } | ||
1186 | } | ||
1187 | |||
1188 | if (!len) { | ||
1189 | pr_info("%s: %s hiding cap 0x%x\n", | ||
1190 | __func__, dev_name(&pdev->dev), cap); | ||
1191 | *prev = next; | ||
1192 | pos = next; | ||
1193 | continue; | ||
1194 | } | ||
1195 | |||
1196 | /* Sanity check, do we overlap other capabilities? */ | ||
1197 | for (i = 0; i < len; i += 4) { | ||
1198 | if (likely(map[(pos + i) / 4] == PCI_CAP_ID_INVALID)) | ||
1199 | continue; | ||
1200 | |||
1201 | pr_warn("%s: %s pci config conflict @0x%x, was cap 0x%x now cap 0x%x\n", | ||
1202 | __func__, dev_name(&pdev->dev), | ||
1203 | pos + i, map[pos + i], cap); | ||
1204 | } | ||
1205 | |||
1206 | memset(map + (pos / 4), cap, len / 4); | ||
1207 | ret = vfio_fill_vconfig_bytes(vdev, pos, len); | ||
1208 | if (ret) | ||
1209 | return ret; | ||
1210 | |||
1211 | prev = &vdev->vconfig[pos + PCI_CAP_LIST_NEXT]; | ||
1212 | pos = next; | ||
1213 | caps++; | ||
1214 | } | ||
1215 | |||
1216 | /* If we didn't fill any capabilities, clear the status flag */ | ||
1217 | if (!caps) { | ||
1218 | __le16 *vstatus = (__le16 *)&vdev->vconfig[PCI_STATUS]; | ||
1219 | *vstatus &= ~cpu_to_le16(PCI_STATUS_CAP_LIST); | ||
1220 | } | ||
1221 | |||
1222 | return 0; | ||
1223 | } | ||
1224 | |||
1225 | static int vfio_ecap_init(struct vfio_pci_device *vdev) | ||
1226 | { | ||
1227 | struct pci_dev *pdev = vdev->pdev; | ||
1228 | u8 *map = vdev->pci_config_map; | ||
1229 | u16 epos; | ||
1230 | __le32 *prev = NULL; | ||
1231 | int loops, ret, ecaps = 0; | ||
1232 | |||
1233 | if (!vdev->extended_caps) | ||
1234 | return 0; | ||
1235 | |||
1236 | epos = PCI_CFG_SPACE_SIZE; | ||
1237 | |||
1238 | loops = (pdev->cfg_size - PCI_CFG_SPACE_SIZE) / PCI_CAP_SIZEOF; | ||
1239 | |||
1240 | while (loops-- && epos >= PCI_CFG_SPACE_SIZE) { | ||
1241 | u32 header; | ||
1242 | u16 ecap; | ||
1243 | int i, len = 0; | ||
1244 | bool hidden = false; | ||
1245 | |||
1246 | ret = pci_read_config_dword(pdev, epos, &header); | ||
1247 | if (ret) | ||
1248 | return ret; | ||
1249 | |||
1250 | ecap = PCI_EXT_CAP_ID(header); | ||
1251 | |||
1252 | if (ecap <= PCI_EXT_CAP_ID_MAX) { | ||
1253 | len = pci_ext_cap_length[ecap]; | ||
1254 | if (len == 0xFF) { | ||
1255 | len = vfio_ext_cap_len(vdev, ecap, epos); | ||
1256 | if (len < 0) | ||
1257 | return ret; | ||
1258 | } | ||
1259 | } | ||
1260 | |||
1261 | if (!len) { | ||
1262 | pr_info("%s: %s hiding ecap 0x%x@0x%x\n", | ||
1263 | __func__, dev_name(&pdev->dev), ecap, epos); | ||
1264 | |||
1265 | /* If not the first in the chain, we can skip over it */ | ||
1266 | if (prev) { | ||
1267 | u32 val = epos = PCI_EXT_CAP_NEXT(header); | ||
1268 | *prev &= cpu_to_le32(~(0xffcU << 20)); | ||
1269 | *prev |= cpu_to_le32(val << 20); | ||
1270 | continue; | ||
1271 | } | ||
1272 | |||
1273 | /* | ||
1274 | * Otherwise, fill in a placeholder, the direct | ||
1275 | * readfn will virtualize this automatically | ||
1276 | */ | ||
1277 | len = PCI_CAP_SIZEOF; | ||
1278 | hidden = true; | ||
1279 | } | ||
1280 | |||
1281 | for (i = 0; i < len; i += 4) { | ||
1282 | if (likely(map[(epos + i) / 4] == PCI_CAP_ID_INVALID)) | ||
1283 | continue; | ||
1284 | |||
1285 | pr_warn("%s: %s pci config conflict @0x%x, was ecap 0x%x now ecap 0x%x\n", | ||
1286 | __func__, dev_name(&pdev->dev), | ||
1287 | epos + i, map[epos + i], ecap); | ||
1288 | } | ||
1289 | |||
1290 | /* | ||
1291 | * Even though ecap is 2 bytes, we're currently a long way | ||
1292 | * from exceeding 1 byte capabilities. If we ever make it | ||
1293 | * up to 0xFF we'll need to up this to a two-byte, byte map. | ||
1294 | */ | ||
1295 | BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID); | ||
1296 | |||
1297 | memset(map + (epos / 4), ecap, len / 4); | ||
1298 | ret = vfio_fill_vconfig_bytes(vdev, epos, len); | ||
1299 | if (ret) | ||
1300 | return ret; | ||
1301 | |||
1302 | /* | ||
1303 | * If we're just using this capability to anchor the list, | ||
1304 | * hide the real ID. Only count real ecaps. XXX PCI spec | ||
1305 | * indicates to use cap id = 0, version = 0, next = 0 if | ||
1306 | * ecaps are absent, hope users check all the way to next. | ||
1307 | */ | ||
1308 | if (hidden) | ||
1309 | *(__le32 *)&vdev->vconfig[epos] &= | ||
1310 | cpu_to_le32((0xffcU << 20)); | ||
1311 | else | ||
1312 | ecaps++; | ||
1313 | |||
1314 | prev = (__le32 *)&vdev->vconfig[epos]; | ||
1315 | epos = PCI_EXT_CAP_NEXT(header); | ||
1316 | } | ||
1317 | |||
1318 | if (!ecaps) | ||
1319 | *(u32 *)&vdev->vconfig[PCI_CFG_SPACE_SIZE] = 0; | ||
1320 | |||
1321 | return 0; | ||
1322 | } | ||
1323 | |||
1324 | /* | ||
1325 | * For each device we allocate a pci_config_map that indicates the | ||
1326 | * capability occupying each dword and thus the struct perm_bits we | ||
1327 | * use for read and write. We also allocate a virtualized config | ||
1328 | * space which tracks reads and writes to bits that we emulate for | ||
1329 | * the user. Initial values filled from device. | ||
1330 | * | ||
1331 | * Using shared stuct perm_bits between all vfio-pci devices saves | ||
1332 | * us from allocating cfg_size buffers for virt and write for every | ||
1333 | * device. We could remove vconfig and allocate individual buffers | ||
1334 | * for each area requring emulated bits, but the array of pointers | ||
1335 | * would be comparable in size (at least for standard config space). | ||
1336 | */ | ||
1337 | int vfio_config_init(struct vfio_pci_device *vdev) | ||
1338 | { | ||
1339 | struct pci_dev *pdev = vdev->pdev; | ||
1340 | u8 *map, *vconfig; | ||
1341 | int ret; | ||
1342 | |||
1343 | /* | ||
1344 | * Config space, caps and ecaps are all dword aligned, so we can | ||
1345 | * use one byte per dword to record the type. | ||
1346 | */ | ||
1347 | map = kmalloc(pdev->cfg_size / 4, GFP_KERNEL); | ||
1348 | if (!map) | ||
1349 | return -ENOMEM; | ||
1350 | |||
1351 | vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL); | ||
1352 | if (!vconfig) { | ||
1353 | kfree(map); | ||
1354 | return -ENOMEM; | ||
1355 | } | ||
1356 | |||
1357 | vdev->pci_config_map = map; | ||
1358 | vdev->vconfig = vconfig; | ||
1359 | |||
1360 | memset(map, PCI_CAP_ID_BASIC, PCI_STD_HEADER_SIZEOF / 4); | ||
1361 | memset(map + (PCI_STD_HEADER_SIZEOF / 4), PCI_CAP_ID_INVALID, | ||
1362 | (pdev->cfg_size - PCI_STD_HEADER_SIZEOF) / 4); | ||
1363 | |||
1364 | ret = vfio_fill_vconfig_bytes(vdev, 0, PCI_STD_HEADER_SIZEOF); | ||
1365 | if (ret) | ||
1366 | goto out; | ||
1367 | |||
1368 | vdev->bardirty = true; | ||
1369 | |||
1370 | /* | ||
1371 | * XXX can we just pci_load_saved_state/pci_restore_state? | ||
1372 | * may need to rebuild vconfig after that | ||
1373 | */ | ||
1374 | |||
1375 | /* For restore after reset */ | ||
1376 | vdev->rbar[0] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_0]); | ||
1377 | vdev->rbar[1] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_1]); | ||
1378 | vdev->rbar[2] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_2]); | ||
1379 | vdev->rbar[3] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_3]); | ||
1380 | vdev->rbar[4] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_4]); | ||
1381 | vdev->rbar[5] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_5]); | ||
1382 | vdev->rbar[6] = le32_to_cpu(*(__le32 *)&vconfig[PCI_ROM_ADDRESS]); | ||
1383 | |||
1384 | if (pdev->is_virtfn) { | ||
1385 | *(__le16 *)&vconfig[PCI_VENDOR_ID] = cpu_to_le16(pdev->vendor); | ||
1386 | *(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device); | ||
1387 | } | ||
1388 | |||
1389 | ret = vfio_cap_init(vdev); | ||
1390 | if (ret) | ||
1391 | goto out; | ||
1392 | |||
1393 | ret = vfio_ecap_init(vdev); | ||
1394 | if (ret) | ||
1395 | goto out; | ||
1396 | |||
1397 | return 0; | ||
1398 | |||
1399 | out: | ||
1400 | kfree(map); | ||
1401 | vdev->pci_config_map = NULL; | ||
1402 | kfree(vconfig); | ||
1403 | vdev->vconfig = NULL; | ||
1404 | return pcibios_err_to_errno(ret); | ||
1405 | } | ||
1406 | |||
1407 | void vfio_config_free(struct vfio_pci_device *vdev) | ||
1408 | { | ||
1409 | kfree(vdev->vconfig); | ||
1410 | vdev->vconfig = NULL; | ||
1411 | kfree(vdev->pci_config_map); | ||
1412 | vdev->pci_config_map = NULL; | ||
1413 | kfree(vdev->msi_perm); | ||
1414 | vdev->msi_perm = NULL; | ||
1415 | } | ||
1416 | |||
1417 | static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, | ||
1418 | size_t count, loff_t *ppos, bool iswrite) | ||
1419 | { | ||
1420 | struct pci_dev *pdev = vdev->pdev; | ||
1421 | struct perm_bits *perm; | ||
1422 | __le32 val = 0; | ||
1423 | int cap_start = 0, offset; | ||
1424 | u8 cap_id; | ||
1425 | ssize_t ret = count; | ||
1426 | |||
1427 | if (*ppos < 0 || *ppos + count > pdev->cfg_size) | ||
1428 | return -EFAULT; | ||
1429 | |||
1430 | /* | ||
1431 | * gcc can't seem to figure out we're a static function, only called | ||
1432 | * with count of 1/2/4 and hits copy_from_user_overflow without this. | ||
1433 | */ | ||
1434 | if (count > sizeof(val)) | ||
1435 | return -EINVAL; | ||
1436 | |||
1437 | cap_id = vdev->pci_config_map[*ppos / 4]; | ||
1438 | |||
1439 | if (cap_id == PCI_CAP_ID_INVALID) { | ||
1440 | if (iswrite) | ||
1441 | return ret; /* drop */ | ||
1442 | |||
1443 | /* | ||
1444 | * Per PCI spec 3.0, section 6.1, reads from reserved and | ||
1445 | * unimplemented registers return 0 | ||
1446 | */ | ||
1447 | if (copy_to_user(buf, &val, count)) | ||
1448 | return -EFAULT; | ||
1449 | |||
1450 | return ret; | ||
1451 | } | ||
1452 | |||
1453 | /* | ||
1454 | * All capabilities are minimum 4 bytes and aligned on dword | ||
1455 | * boundaries. Since we don't support unaligned accesses, we're | ||
1456 | * only ever accessing a single capability. | ||
1457 | */ | ||
1458 | if (*ppos >= PCI_CFG_SPACE_SIZE) { | ||
1459 | WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX); | ||
1460 | |||
1461 | perm = &ecap_perms[cap_id]; | ||
1462 | cap_start = vfio_find_cap_start(vdev, *ppos); | ||
1463 | |||
1464 | } else { | ||
1465 | WARN_ON(cap_id > PCI_CAP_ID_MAX); | ||
1466 | |||
1467 | perm = &cap_perms[cap_id]; | ||
1468 | |||
1469 | if (cap_id == PCI_CAP_ID_MSI) | ||
1470 | perm = vdev->msi_perm; | ||
1471 | |||
1472 | if (cap_id > PCI_CAP_ID_BASIC) | ||
1473 | cap_start = vfio_find_cap_start(vdev, *ppos); | ||
1474 | } | ||
1475 | |||
1476 | WARN_ON(!cap_start && cap_id != PCI_CAP_ID_BASIC); | ||
1477 | WARN_ON(cap_start > *ppos); | ||
1478 | |||
1479 | offset = *ppos - cap_start; | ||
1480 | |||
1481 | if (iswrite) { | ||
1482 | if (!perm->writefn) | ||
1483 | return ret; | ||
1484 | |||
1485 | if (copy_from_user(&val, buf, count)) | ||
1486 | return -EFAULT; | ||
1487 | |||
1488 | ret = perm->writefn(vdev, *ppos, count, perm, offset, val); | ||
1489 | } else { | ||
1490 | if (perm->readfn) { | ||
1491 | ret = perm->readfn(vdev, *ppos, count, | ||
1492 | perm, offset, &val); | ||
1493 | if (ret < 0) | ||
1494 | return ret; | ||
1495 | } | ||
1496 | |||
1497 | if (copy_to_user(buf, &val, count)) | ||
1498 | return -EFAULT; | ||
1499 | } | ||
1500 | |||
1501 | return ret; | ||
1502 | } | ||
1503 | |||
1504 | ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev, | ||
1505 | char __user *buf, size_t count, | ||
1506 | loff_t *ppos, bool iswrite) | ||
1507 | { | ||
1508 | size_t done = 0; | ||
1509 | int ret = 0; | ||
1510 | loff_t pos = *ppos; | ||
1511 | |||
1512 | pos &= VFIO_PCI_OFFSET_MASK; | ||
1513 | |||
1514 | /* | ||
1515 | * We want to both keep the access size the caller users as well as | ||
1516 | * support reading large chunks of config space in a single call. | ||
1517 | * PCI doesn't support unaligned accesses, so we can safely break | ||
1518 | * those apart. | ||
1519 | */ | ||
1520 | while (count) { | ||
1521 | if (count >= 4 && !(pos % 4)) | ||
1522 | ret = vfio_config_do_rw(vdev, buf, 4, &pos, iswrite); | ||
1523 | else if (count >= 2 && !(pos % 2)) | ||
1524 | ret = vfio_config_do_rw(vdev, buf, 2, &pos, iswrite); | ||
1525 | else | ||
1526 | ret = vfio_config_do_rw(vdev, buf, 1, &pos, iswrite); | ||
1527 | |||
1528 | if (ret < 0) | ||
1529 | return ret; | ||
1530 | |||
1531 | count -= ret; | ||
1532 | done += ret; | ||
1533 | buf += ret; | ||
1534 | pos += ret; | ||
1535 | } | ||
1536 | |||
1537 | *ppos += done; | ||
1538 | |||
1539 | return done; | ||
1540 | } | ||