diff options
Diffstat (limited to 'drivers/pci/controller/vmd.c')
-rw-r--r-- | drivers/pci/controller/vmd.c | 870 |
1 files changed, 870 insertions, 0 deletions
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c new file mode 100644 index 000000000000..942b64fc7f1f --- /dev/null +++ b/drivers/pci/controller/vmd.c | |||
@@ -0,0 +1,870 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Volume Management Device driver | ||
4 | * Copyright (c) 2015, Intel Corporation. | ||
5 | */ | ||
6 | |||
7 | #include <linux/device.h> | ||
8 | #include <linux/interrupt.h> | ||
9 | #include <linux/irq.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/msi.h> | ||
13 | #include <linux/pci.h> | ||
14 | #include <linux/srcu.h> | ||
15 | #include <linux/rculist.h> | ||
16 | #include <linux/rcupdate.h> | ||
17 | |||
18 | #include <asm/irqdomain.h> | ||
19 | #include <asm/device.h> | ||
20 | #include <asm/msi.h> | ||
21 | #include <asm/msidef.h> | ||
22 | |||
23 | #define VMD_CFGBAR 0 | ||
24 | #define VMD_MEMBAR1 2 | ||
25 | #define VMD_MEMBAR2 4 | ||
26 | |||
27 | #define PCI_REG_VMCAP 0x40 | ||
28 | #define BUS_RESTRICT_CAP(vmcap) (vmcap & 0x1) | ||
29 | #define PCI_REG_VMCONFIG 0x44 | ||
30 | #define BUS_RESTRICT_CFG(vmcfg) ((vmcfg >> 8) & 0x3) | ||
31 | #define PCI_REG_VMLOCK 0x70 | ||
32 | #define MB2_SHADOW_EN(vmlock) (vmlock & 0x2) | ||
33 | |||
34 | enum vmd_features { | ||
35 | /* | ||
36 | * Device may contain registers which hint the physical location of the | ||
37 | * membars, in order to allow proper address translation during | ||
38 | * resource assignment to enable guest virtualization | ||
39 | */ | ||
40 | VMD_FEAT_HAS_MEMBAR_SHADOW = (1 << 0), | ||
41 | |||
42 | /* | ||
43 | * Device may provide root port configuration information which limits | ||
44 | * bus numbering | ||
45 | */ | ||
46 | VMD_FEAT_HAS_BUS_RESTRICTIONS = (1 << 1), | ||
47 | }; | ||
48 | |||
49 | /* | ||
50 | * Lock for manipulating VMD IRQ lists. | ||
51 | */ | ||
52 | static DEFINE_RAW_SPINLOCK(list_lock); | ||
53 | |||
54 | /** | ||
55 | * struct vmd_irq - private data to map driver IRQ to the VMD shared vector | ||
56 | * @node: list item for parent traversal. | ||
57 | * @irq: back pointer to parent. | ||
58 | * @enabled: true if driver enabled IRQ | ||
59 | * @virq: the virtual IRQ value provided to the requesting driver. | ||
60 | * | ||
61 | * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to | ||
62 | * a VMD IRQ using this structure. | ||
63 | */ | ||
64 | struct vmd_irq { | ||
65 | struct list_head node; | ||
66 | struct vmd_irq_list *irq; | ||
67 | bool enabled; | ||
68 | unsigned int virq; | ||
69 | }; | ||
70 | |||
71 | /** | ||
72 | * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector | ||
73 | * @irq_list: the list of irq's the VMD one demuxes to. | ||
74 | * @srcu: SRCU struct for local synchronization. | ||
75 | * @count: number of child IRQs assigned to this vector; used to track | ||
76 | * sharing. | ||
77 | */ | ||
78 | struct vmd_irq_list { | ||
79 | struct list_head irq_list; | ||
80 | struct srcu_struct srcu; | ||
81 | unsigned int count; | ||
82 | }; | ||
83 | |||
84 | struct vmd_dev { | ||
85 | struct pci_dev *dev; | ||
86 | |||
87 | spinlock_t cfg_lock; | ||
88 | char __iomem *cfgbar; | ||
89 | |||
90 | int msix_count; | ||
91 | struct vmd_irq_list *irqs; | ||
92 | |||
93 | struct pci_sysdata sysdata; | ||
94 | struct resource resources[3]; | ||
95 | struct irq_domain *irq_domain; | ||
96 | struct pci_bus *bus; | ||
97 | |||
98 | #ifdef CONFIG_X86_DEV_DMA_OPS | ||
99 | struct dma_map_ops dma_ops; | ||
100 | struct dma_domain dma_domain; | ||
101 | #endif | ||
102 | }; | ||
103 | |||
104 | static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) | ||
105 | { | ||
106 | return container_of(bus->sysdata, struct vmd_dev, sysdata); | ||
107 | } | ||
108 | |||
109 | static inline unsigned int index_from_irqs(struct vmd_dev *vmd, | ||
110 | struct vmd_irq_list *irqs) | ||
111 | { | ||
112 | return irqs - vmd->irqs; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Drivers managing a device in a VMD domain allocate their own IRQs as before, | ||
117 | * but the MSI entry for the hardware it's driving will be programmed with a | ||
118 | * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its | ||
119 | * domain into one of its own, and the VMD driver de-muxes these for the | ||
120 | * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations | ||
121 | * and irq_chip to set this up. | ||
122 | */ | ||
123 | static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) | ||
124 | { | ||
125 | struct vmd_irq *vmdirq = data->chip_data; | ||
126 | struct vmd_irq_list *irq = vmdirq->irq; | ||
127 | struct vmd_dev *vmd = irq_data_get_irq_handler_data(data); | ||
128 | |||
129 | msg->address_hi = MSI_ADDR_BASE_HI; | ||
130 | msg->address_lo = MSI_ADDR_BASE_LO | | ||
131 | MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq)); | ||
132 | msg->data = 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. | ||
137 | */ | ||
138 | static void vmd_irq_enable(struct irq_data *data) | ||
139 | { | ||
140 | struct vmd_irq *vmdirq = data->chip_data; | ||
141 | unsigned long flags; | ||
142 | |||
143 | raw_spin_lock_irqsave(&list_lock, flags); | ||
144 | WARN_ON(vmdirq->enabled); | ||
145 | list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); | ||
146 | vmdirq->enabled = true; | ||
147 | raw_spin_unlock_irqrestore(&list_lock, flags); | ||
148 | |||
149 | data->chip->irq_unmask(data); | ||
150 | } | ||
151 | |||
152 | static void vmd_irq_disable(struct irq_data *data) | ||
153 | { | ||
154 | struct vmd_irq *vmdirq = data->chip_data; | ||
155 | unsigned long flags; | ||
156 | |||
157 | data->chip->irq_mask(data); | ||
158 | |||
159 | raw_spin_lock_irqsave(&list_lock, flags); | ||
160 | if (vmdirq->enabled) { | ||
161 | list_del_rcu(&vmdirq->node); | ||
162 | vmdirq->enabled = false; | ||
163 | } | ||
164 | raw_spin_unlock_irqrestore(&list_lock, flags); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * XXX: Stubbed until we develop acceptable way to not create conflicts with | ||
169 | * other devices sharing the same vector. | ||
170 | */ | ||
171 | static int vmd_irq_set_affinity(struct irq_data *data, | ||
172 | const struct cpumask *dest, bool force) | ||
173 | { | ||
174 | return -EINVAL; | ||
175 | } | ||
176 | |||
177 | static struct irq_chip vmd_msi_controller = { | ||
178 | .name = "VMD-MSI", | ||
179 | .irq_enable = vmd_irq_enable, | ||
180 | .irq_disable = vmd_irq_disable, | ||
181 | .irq_compose_msi_msg = vmd_compose_msi_msg, | ||
182 | .irq_set_affinity = vmd_irq_set_affinity, | ||
183 | }; | ||
184 | |||
185 | static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, | ||
186 | msi_alloc_info_t *arg) | ||
187 | { | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * XXX: We can be even smarter selecting the best IRQ once we solve the | ||
193 | * affinity problem. | ||
194 | */ | ||
195 | static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) | ||
196 | { | ||
197 | int i, best = 1; | ||
198 | unsigned long flags; | ||
199 | |||
200 | if (pci_is_bridge(msi_desc_to_pci_dev(desc)) || vmd->msix_count == 1) | ||
201 | return &vmd->irqs[0]; | ||
202 | |||
203 | raw_spin_lock_irqsave(&list_lock, flags); | ||
204 | for (i = 1; i < vmd->msix_count; i++) | ||
205 | if (vmd->irqs[i].count < vmd->irqs[best].count) | ||
206 | best = i; | ||
207 | vmd->irqs[best].count++; | ||
208 | raw_spin_unlock_irqrestore(&list_lock, flags); | ||
209 | |||
210 | return &vmd->irqs[best]; | ||
211 | } | ||
212 | |||
213 | static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, | ||
214 | unsigned int virq, irq_hw_number_t hwirq, | ||
215 | msi_alloc_info_t *arg) | ||
216 | { | ||
217 | struct msi_desc *desc = arg->desc; | ||
218 | struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); | ||
219 | struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); | ||
220 | unsigned int index, vector; | ||
221 | |||
222 | if (!vmdirq) | ||
223 | return -ENOMEM; | ||
224 | |||
225 | INIT_LIST_HEAD(&vmdirq->node); | ||
226 | vmdirq->irq = vmd_next_irq(vmd, desc); | ||
227 | vmdirq->virq = virq; | ||
228 | index = index_from_irqs(vmd, vmdirq->irq); | ||
229 | vector = pci_irq_vector(vmd->dev, index); | ||
230 | |||
231 | irq_domain_set_info(domain, virq, vector, info->chip, vmdirq, | ||
232 | handle_untracked_irq, vmd, NULL); | ||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | static void vmd_msi_free(struct irq_domain *domain, | ||
237 | struct msi_domain_info *info, unsigned int virq) | ||
238 | { | ||
239 | struct vmd_irq *vmdirq = irq_get_chip_data(virq); | ||
240 | unsigned long flags; | ||
241 | |||
242 | synchronize_srcu(&vmdirq->irq->srcu); | ||
243 | |||
244 | /* XXX: Potential optimization to rebalance */ | ||
245 | raw_spin_lock_irqsave(&list_lock, flags); | ||
246 | vmdirq->irq->count--; | ||
247 | raw_spin_unlock_irqrestore(&list_lock, flags); | ||
248 | |||
249 | kfree(vmdirq); | ||
250 | } | ||
251 | |||
252 | static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, | ||
253 | int nvec, msi_alloc_info_t *arg) | ||
254 | { | ||
255 | struct pci_dev *pdev = to_pci_dev(dev); | ||
256 | struct vmd_dev *vmd = vmd_from_bus(pdev->bus); | ||
257 | |||
258 | if (nvec > vmd->msix_count) | ||
259 | return vmd->msix_count; | ||
260 | |||
261 | memset(arg, 0, sizeof(*arg)); | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) | ||
266 | { | ||
267 | arg->desc = desc; | ||
268 | } | ||
269 | |||
270 | static struct msi_domain_ops vmd_msi_domain_ops = { | ||
271 | .get_hwirq = vmd_get_hwirq, | ||
272 | .msi_init = vmd_msi_init, | ||
273 | .msi_free = vmd_msi_free, | ||
274 | .msi_prepare = vmd_msi_prepare, | ||
275 | .set_desc = vmd_set_desc, | ||
276 | }; | ||
277 | |||
278 | static struct msi_domain_info vmd_msi_domain_info = { | ||
279 | .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | | ||
280 | MSI_FLAG_PCI_MSIX, | ||
281 | .ops = &vmd_msi_domain_ops, | ||
282 | .chip = &vmd_msi_controller, | ||
283 | }; | ||
284 | |||
285 | #ifdef CONFIG_X86_DEV_DMA_OPS | ||
286 | /* | ||
287 | * VMD replaces the requester ID with its own. DMA mappings for devices in a | ||
288 | * VMD domain need to be mapped for the VMD, not the device requiring | ||
289 | * the mapping. | ||
290 | */ | ||
291 | static struct device *to_vmd_dev(struct device *dev) | ||
292 | { | ||
293 | struct pci_dev *pdev = to_pci_dev(dev); | ||
294 | struct vmd_dev *vmd = vmd_from_bus(pdev->bus); | ||
295 | |||
296 | return &vmd->dev->dev; | ||
297 | } | ||
298 | |||
299 | static const struct dma_map_ops *vmd_dma_ops(struct device *dev) | ||
300 | { | ||
301 | return get_dma_ops(to_vmd_dev(dev)); | ||
302 | } | ||
303 | |||
304 | static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, | ||
305 | gfp_t flag, unsigned long attrs) | ||
306 | { | ||
307 | return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, | ||
308 | attrs); | ||
309 | } | ||
310 | |||
311 | static void vmd_free(struct device *dev, size_t size, void *vaddr, | ||
312 | dma_addr_t addr, unsigned long attrs) | ||
313 | { | ||
314 | return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, | ||
315 | attrs); | ||
316 | } | ||
317 | |||
318 | static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, | ||
319 | void *cpu_addr, dma_addr_t addr, size_t size, | ||
320 | unsigned long attrs) | ||
321 | { | ||
322 | return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, | ||
323 | size, attrs); | ||
324 | } | ||
325 | |||
326 | static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, | ||
327 | void *cpu_addr, dma_addr_t addr, size_t size, | ||
328 | unsigned long attrs) | ||
329 | { | ||
330 | return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, | ||
331 | addr, size, attrs); | ||
332 | } | ||
333 | |||
334 | static dma_addr_t vmd_map_page(struct device *dev, struct page *page, | ||
335 | unsigned long offset, size_t size, | ||
336 | enum dma_data_direction dir, | ||
337 | unsigned long attrs) | ||
338 | { | ||
339 | return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, | ||
340 | dir, attrs); | ||
341 | } | ||
342 | |||
343 | static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, | ||
344 | enum dma_data_direction dir, unsigned long attrs) | ||
345 | { | ||
346 | vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); | ||
347 | } | ||
348 | |||
349 | static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, | ||
350 | enum dma_data_direction dir, unsigned long attrs) | ||
351 | { | ||
352 | return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); | ||
353 | } | ||
354 | |||
355 | static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, | ||
356 | enum dma_data_direction dir, unsigned long attrs) | ||
357 | { | ||
358 | vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); | ||
359 | } | ||
360 | |||
361 | static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, | ||
362 | size_t size, enum dma_data_direction dir) | ||
363 | { | ||
364 | vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); | ||
365 | } | ||
366 | |||
367 | static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, | ||
368 | size_t size, enum dma_data_direction dir) | ||
369 | { | ||
370 | vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, | ||
371 | dir); | ||
372 | } | ||
373 | |||
374 | static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, | ||
375 | int nents, enum dma_data_direction dir) | ||
376 | { | ||
377 | vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); | ||
378 | } | ||
379 | |||
380 | static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, | ||
381 | int nents, enum dma_data_direction dir) | ||
382 | { | ||
383 | vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); | ||
384 | } | ||
385 | |||
386 | static int vmd_mapping_error(struct device *dev, dma_addr_t addr) | ||
387 | { | ||
388 | return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); | ||
389 | } | ||
390 | |||
391 | static int vmd_dma_supported(struct device *dev, u64 mask) | ||
392 | { | ||
393 | return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); | ||
394 | } | ||
395 | |||
396 | #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK | ||
397 | static u64 vmd_get_required_mask(struct device *dev) | ||
398 | { | ||
399 | return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); | ||
400 | } | ||
401 | #endif | ||
402 | |||
403 | static void vmd_teardown_dma_ops(struct vmd_dev *vmd) | ||
404 | { | ||
405 | struct dma_domain *domain = &vmd->dma_domain; | ||
406 | |||
407 | if (get_dma_ops(&vmd->dev->dev)) | ||
408 | del_dma_domain(domain); | ||
409 | } | ||
410 | |||
411 | #define ASSIGN_VMD_DMA_OPS(source, dest, fn) \ | ||
412 | do { \ | ||
413 | if (source->fn) \ | ||
414 | dest->fn = vmd_##fn; \ | ||
415 | } while (0) | ||
416 | |||
417 | static void vmd_setup_dma_ops(struct vmd_dev *vmd) | ||
418 | { | ||
419 | const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); | ||
420 | struct dma_map_ops *dest = &vmd->dma_ops; | ||
421 | struct dma_domain *domain = &vmd->dma_domain; | ||
422 | |||
423 | domain->domain_nr = vmd->sysdata.domain; | ||
424 | domain->dma_ops = dest; | ||
425 | |||
426 | if (!source) | ||
427 | return; | ||
428 | ASSIGN_VMD_DMA_OPS(source, dest, alloc); | ||
429 | ASSIGN_VMD_DMA_OPS(source, dest, free); | ||
430 | ASSIGN_VMD_DMA_OPS(source, dest, mmap); | ||
431 | ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable); | ||
432 | ASSIGN_VMD_DMA_OPS(source, dest, map_page); | ||
433 | ASSIGN_VMD_DMA_OPS(source, dest, unmap_page); | ||
434 | ASSIGN_VMD_DMA_OPS(source, dest, map_sg); | ||
435 | ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg); | ||
436 | ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu); | ||
437 | ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); | ||
438 | ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); | ||
439 | ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); | ||
440 | ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); | ||
441 | ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); | ||
442 | #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK | ||
443 | ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); | ||
444 | #endif | ||
445 | add_dma_domain(domain); | ||
446 | } | ||
447 | #undef ASSIGN_VMD_DMA_OPS | ||
448 | #else | ||
449 | static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {} | ||
450 | static void vmd_setup_dma_ops(struct vmd_dev *vmd) {} | ||
451 | #endif | ||
452 | |||
453 | static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, | ||
454 | unsigned int devfn, int reg, int len) | ||
455 | { | ||
456 | char __iomem *addr = vmd->cfgbar + | ||
457 | (bus->number << 20) + (devfn << 12) + reg; | ||
458 | |||
459 | if ((addr - vmd->cfgbar) + len >= | ||
460 | resource_size(&vmd->dev->resource[VMD_CFGBAR])) | ||
461 | return NULL; | ||
462 | |||
463 | return addr; | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * CPU may deadlock if config space is not serialized on some versions of this | ||
468 | * hardware, so all config space access is done under a spinlock. | ||
469 | */ | ||
470 | static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, | ||
471 | int len, u32 *value) | ||
472 | { | ||
473 | struct vmd_dev *vmd = vmd_from_bus(bus); | ||
474 | char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); | ||
475 | unsigned long flags; | ||
476 | int ret = 0; | ||
477 | |||
478 | if (!addr) | ||
479 | return -EFAULT; | ||
480 | |||
481 | spin_lock_irqsave(&vmd->cfg_lock, flags); | ||
482 | switch (len) { | ||
483 | case 1: | ||
484 | *value = readb(addr); | ||
485 | break; | ||
486 | case 2: | ||
487 | *value = readw(addr); | ||
488 | break; | ||
489 | case 4: | ||
490 | *value = readl(addr); | ||
491 | break; | ||
492 | default: | ||
493 | ret = -EINVAL; | ||
494 | break; | ||
495 | } | ||
496 | spin_unlock_irqrestore(&vmd->cfg_lock, flags); | ||
497 | return ret; | ||
498 | } | ||
499 | |||
500 | /* | ||
501 | * VMD h/w converts non-posted config writes to posted memory writes. The | ||
502 | * read-back in this function forces the completion so it returns only after | ||
503 | * the config space was written, as expected. | ||
504 | */ | ||
505 | static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, | ||
506 | int len, u32 value) | ||
507 | { | ||
508 | struct vmd_dev *vmd = vmd_from_bus(bus); | ||
509 | char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); | ||
510 | unsigned long flags; | ||
511 | int ret = 0; | ||
512 | |||
513 | if (!addr) | ||
514 | return -EFAULT; | ||
515 | |||
516 | spin_lock_irqsave(&vmd->cfg_lock, flags); | ||
517 | switch (len) { | ||
518 | case 1: | ||
519 | writeb(value, addr); | ||
520 | readb(addr); | ||
521 | break; | ||
522 | case 2: | ||
523 | writew(value, addr); | ||
524 | readw(addr); | ||
525 | break; | ||
526 | case 4: | ||
527 | writel(value, addr); | ||
528 | readl(addr); | ||
529 | break; | ||
530 | default: | ||
531 | ret = -EINVAL; | ||
532 | break; | ||
533 | } | ||
534 | spin_unlock_irqrestore(&vmd->cfg_lock, flags); | ||
535 | return ret; | ||
536 | } | ||
537 | |||
538 | static struct pci_ops vmd_ops = { | ||
539 | .read = vmd_pci_read, | ||
540 | .write = vmd_pci_write, | ||
541 | }; | ||
542 | |||
543 | static void vmd_attach_resources(struct vmd_dev *vmd) | ||
544 | { | ||
545 | vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; | ||
546 | vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; | ||
547 | } | ||
548 | |||
549 | static void vmd_detach_resources(struct vmd_dev *vmd) | ||
550 | { | ||
551 | vmd->dev->resource[VMD_MEMBAR1].child = NULL; | ||
552 | vmd->dev->resource[VMD_MEMBAR2].child = NULL; | ||
553 | } | ||
554 | |||
555 | /* | ||
556 | * VMD domains start at 0x10000 to not clash with ACPI _SEG domains. | ||
557 | * Per ACPI r6.0, sec 6.5.6, _SEG returns an integer, of which the lower | ||
558 | * 16 bits are the PCI Segment Group (domain) number. Other bits are | ||
559 | * currently reserved. | ||
560 | */ | ||
561 | static int vmd_find_free_domain(void) | ||
562 | { | ||
563 | int domain = 0xffff; | ||
564 | struct pci_bus *bus = NULL; | ||
565 | |||
566 | while ((bus = pci_find_next_bus(bus)) != NULL) | ||
567 | domain = max_t(int, domain, pci_domain_nr(bus)); | ||
568 | return domain + 1; | ||
569 | } | ||
570 | |||
571 | static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) | ||
572 | { | ||
573 | struct pci_sysdata *sd = &vmd->sysdata; | ||
574 | struct fwnode_handle *fn; | ||
575 | struct resource *res; | ||
576 | u32 upper_bits; | ||
577 | unsigned long flags; | ||
578 | LIST_HEAD(resources); | ||
579 | resource_size_t offset[2] = {0}; | ||
580 | resource_size_t membar2_offset = 0x2000, busn_start = 0; | ||
581 | |||
582 | /* | ||
583 | * Shadow registers may exist in certain VMD device ids which allow | ||
584 | * guests to correctly assign host physical addresses to the root ports | ||
585 | * and child devices. These registers will either return the host value | ||
586 | * or 0, depending on an enable bit in the VMD device. | ||
587 | */ | ||
588 | if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) { | ||
589 | u32 vmlock; | ||
590 | int ret; | ||
591 | |||
592 | membar2_offset = 0x2018; | ||
593 | ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock); | ||
594 | if (ret || vmlock == ~0) | ||
595 | return -ENODEV; | ||
596 | |||
597 | if (MB2_SHADOW_EN(vmlock)) { | ||
598 | void __iomem *membar2; | ||
599 | |||
600 | membar2 = pci_iomap(vmd->dev, VMD_MEMBAR2, 0); | ||
601 | if (!membar2) | ||
602 | return -ENOMEM; | ||
603 | offset[0] = vmd->dev->resource[VMD_MEMBAR1].start - | ||
604 | readq(membar2 + 0x2008); | ||
605 | offset[1] = vmd->dev->resource[VMD_MEMBAR2].start - | ||
606 | readq(membar2 + 0x2010); | ||
607 | pci_iounmap(vmd->dev, membar2); | ||
608 | } | ||
609 | } | ||
610 | |||
611 | /* | ||
612 | * Certain VMD devices may have a root port configuration option which | ||
613 | * limits the bus range to between 0-127 or 128-255 | ||
614 | */ | ||
615 | if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) { | ||
616 | u32 vmcap, vmconfig; | ||
617 | |||
618 | pci_read_config_dword(vmd->dev, PCI_REG_VMCAP, &vmcap); | ||
619 | pci_read_config_dword(vmd->dev, PCI_REG_VMCONFIG, &vmconfig); | ||
620 | if (BUS_RESTRICT_CAP(vmcap) && | ||
621 | (BUS_RESTRICT_CFG(vmconfig) == 0x1)) | ||
622 | busn_start = 128; | ||
623 | } | ||
624 | |||
625 | res = &vmd->dev->resource[VMD_CFGBAR]; | ||
626 | vmd->resources[0] = (struct resource) { | ||
627 | .name = "VMD CFGBAR", | ||
628 | .start = busn_start, | ||
629 | .end = busn_start + (resource_size(res) >> 20) - 1, | ||
630 | .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, | ||
631 | }; | ||
632 | |||
633 | /* | ||
634 | * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can | ||
635 | * put 32-bit resources in the window. | ||
636 | * | ||
637 | * There's no hardware reason why a 64-bit window *couldn't* | ||
638 | * contain a 32-bit resource, but pbus_size_mem() computes the | ||
639 | * bridge window size assuming a 64-bit window will contain no | ||
640 | * 32-bit resources. __pci_assign_resource() enforces that | ||
641 | * artificial restriction to make sure everything will fit. | ||
642 | * | ||
643 | * The only way we could use a 64-bit non-prefechable MEMBAR is | ||
644 | * if its address is <4GB so that we can convert it to a 32-bit | ||
645 | * resource. To be visible to the host OS, all VMD endpoints must | ||
646 | * be initially configured by platform BIOS, which includes setting | ||
647 | * up these resources. We can assume the device is configured | ||
648 | * according to the platform needs. | ||
649 | */ | ||
650 | res = &vmd->dev->resource[VMD_MEMBAR1]; | ||
651 | upper_bits = upper_32_bits(res->end); | ||
652 | flags = res->flags & ~IORESOURCE_SIZEALIGN; | ||
653 | if (!upper_bits) | ||
654 | flags &= ~IORESOURCE_MEM_64; | ||
655 | vmd->resources[1] = (struct resource) { | ||
656 | .name = "VMD MEMBAR1", | ||
657 | .start = res->start, | ||
658 | .end = res->end, | ||
659 | .flags = flags, | ||
660 | .parent = res, | ||
661 | }; | ||
662 | |||
663 | res = &vmd->dev->resource[VMD_MEMBAR2]; | ||
664 | upper_bits = upper_32_bits(res->end); | ||
665 | flags = res->flags & ~IORESOURCE_SIZEALIGN; | ||
666 | if (!upper_bits) | ||
667 | flags &= ~IORESOURCE_MEM_64; | ||
668 | vmd->resources[2] = (struct resource) { | ||
669 | .name = "VMD MEMBAR2", | ||
670 | .start = res->start + membar2_offset, | ||
671 | .end = res->end, | ||
672 | .flags = flags, | ||
673 | .parent = res, | ||
674 | }; | ||
675 | |||
676 | sd->vmd_domain = true; | ||
677 | sd->domain = vmd_find_free_domain(); | ||
678 | if (sd->domain < 0) | ||
679 | return sd->domain; | ||
680 | |||
681 | sd->node = pcibus_to_node(vmd->dev->bus); | ||
682 | |||
683 | fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain); | ||
684 | if (!fn) | ||
685 | return -ENODEV; | ||
686 | |||
687 | vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info, | ||
688 | x86_vector_domain); | ||
689 | irq_domain_free_fwnode(fn); | ||
690 | if (!vmd->irq_domain) | ||
691 | return -ENODEV; | ||
692 | |||
693 | pci_add_resource(&resources, &vmd->resources[0]); | ||
694 | pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]); | ||
695 | pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]); | ||
696 | |||
697 | vmd->bus = pci_create_root_bus(&vmd->dev->dev, busn_start, &vmd_ops, | ||
698 | sd, &resources); | ||
699 | if (!vmd->bus) { | ||
700 | pci_free_resource_list(&resources); | ||
701 | irq_domain_remove(vmd->irq_domain); | ||
702 | return -ENODEV; | ||
703 | } | ||
704 | |||
705 | vmd_attach_resources(vmd); | ||
706 | vmd_setup_dma_ops(vmd); | ||
707 | dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); | ||
708 | pci_rescan_bus(vmd->bus); | ||
709 | |||
710 | WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, | ||
711 | "domain"), "Can't create symlink to domain\n"); | ||
712 | return 0; | ||
713 | } | ||
714 | |||
715 | static irqreturn_t vmd_irq(int irq, void *data) | ||
716 | { | ||
717 | struct vmd_irq_list *irqs = data; | ||
718 | struct vmd_irq *vmdirq; | ||
719 | int idx; | ||
720 | |||
721 | idx = srcu_read_lock(&irqs->srcu); | ||
722 | list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) | ||
723 | generic_handle_irq(vmdirq->virq); | ||
724 | srcu_read_unlock(&irqs->srcu, idx); | ||
725 | |||
726 | return IRQ_HANDLED; | ||
727 | } | ||
728 | |||
729 | static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) | ||
730 | { | ||
731 | struct vmd_dev *vmd; | ||
732 | int i, err; | ||
733 | |||
734 | if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) | ||
735 | return -ENOMEM; | ||
736 | |||
737 | vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); | ||
738 | if (!vmd) | ||
739 | return -ENOMEM; | ||
740 | |||
741 | vmd->dev = dev; | ||
742 | err = pcim_enable_device(dev); | ||
743 | if (err < 0) | ||
744 | return err; | ||
745 | |||
746 | vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0); | ||
747 | if (!vmd->cfgbar) | ||
748 | return -ENOMEM; | ||
749 | |||
750 | pci_set_master(dev); | ||
751 | if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) && | ||
752 | dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) | ||
753 | return -ENODEV; | ||
754 | |||
755 | vmd->msix_count = pci_msix_vec_count(dev); | ||
756 | if (vmd->msix_count < 0) | ||
757 | return -ENODEV; | ||
758 | |||
759 | vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count, | ||
760 | PCI_IRQ_MSIX); | ||
761 | if (vmd->msix_count < 0) | ||
762 | return vmd->msix_count; | ||
763 | |||
764 | vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), | ||
765 | GFP_KERNEL); | ||
766 | if (!vmd->irqs) | ||
767 | return -ENOMEM; | ||
768 | |||
769 | for (i = 0; i < vmd->msix_count; i++) { | ||
770 | err = init_srcu_struct(&vmd->irqs[i].srcu); | ||
771 | if (err) | ||
772 | return err; | ||
773 | |||
774 | INIT_LIST_HEAD(&vmd->irqs[i].irq_list); | ||
775 | err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), | ||
776 | vmd_irq, IRQF_NO_THREAD, | ||
777 | "vmd", &vmd->irqs[i]); | ||
778 | if (err) | ||
779 | return err; | ||
780 | } | ||
781 | |||
782 | spin_lock_init(&vmd->cfg_lock); | ||
783 | pci_set_drvdata(dev, vmd); | ||
784 | err = vmd_enable_domain(vmd, (unsigned long) id->driver_data); | ||
785 | if (err) | ||
786 | return err; | ||
787 | |||
788 | dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n", | ||
789 | vmd->sysdata.domain); | ||
790 | return 0; | ||
791 | } | ||
792 | |||
793 | static void vmd_cleanup_srcu(struct vmd_dev *vmd) | ||
794 | { | ||
795 | int i; | ||
796 | |||
797 | for (i = 0; i < vmd->msix_count; i++) | ||
798 | cleanup_srcu_struct(&vmd->irqs[i].srcu); | ||
799 | } | ||
800 | |||
801 | static void vmd_remove(struct pci_dev *dev) | ||
802 | { | ||
803 | struct vmd_dev *vmd = pci_get_drvdata(dev); | ||
804 | |||
805 | vmd_detach_resources(vmd); | ||
806 | sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); | ||
807 | pci_stop_root_bus(vmd->bus); | ||
808 | pci_remove_root_bus(vmd->bus); | ||
809 | vmd_cleanup_srcu(vmd); | ||
810 | vmd_teardown_dma_ops(vmd); | ||
811 | irq_domain_remove(vmd->irq_domain); | ||
812 | } | ||
813 | |||
814 | #ifdef CONFIG_PM_SLEEP | ||
815 | static int vmd_suspend(struct device *dev) | ||
816 | { | ||
817 | struct pci_dev *pdev = to_pci_dev(dev); | ||
818 | struct vmd_dev *vmd = pci_get_drvdata(pdev); | ||
819 | int i; | ||
820 | |||
821 | for (i = 0; i < vmd->msix_count; i++) | ||
822 | devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]); | ||
823 | |||
824 | pci_save_state(pdev); | ||
825 | return 0; | ||
826 | } | ||
827 | |||
828 | static int vmd_resume(struct device *dev) | ||
829 | { | ||
830 | struct pci_dev *pdev = to_pci_dev(dev); | ||
831 | struct vmd_dev *vmd = pci_get_drvdata(pdev); | ||
832 | int err, i; | ||
833 | |||
834 | for (i = 0; i < vmd->msix_count; i++) { | ||
835 | err = devm_request_irq(dev, pci_irq_vector(pdev, i), | ||
836 | vmd_irq, IRQF_NO_THREAD, | ||
837 | "vmd", &vmd->irqs[i]); | ||
838 | if (err) | ||
839 | return err; | ||
840 | } | ||
841 | |||
842 | pci_restore_state(pdev); | ||
843 | return 0; | ||
844 | } | ||
845 | #endif | ||
846 | static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); | ||
847 | |||
848 | static const struct pci_device_id vmd_ids[] = { | ||
849 | {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_201D),}, | ||
850 | {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0), | ||
851 | .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW | | ||
852 | VMD_FEAT_HAS_BUS_RESTRICTIONS,}, | ||
853 | {0,} | ||
854 | }; | ||
855 | MODULE_DEVICE_TABLE(pci, vmd_ids); | ||
856 | |||
857 | static struct pci_driver vmd_drv = { | ||
858 | .name = "vmd", | ||
859 | .id_table = vmd_ids, | ||
860 | .probe = vmd_probe, | ||
861 | .remove = vmd_remove, | ||
862 | .driver = { | ||
863 | .pm = &vmd_dev_pm_ops, | ||
864 | }, | ||
865 | }; | ||
866 | module_pci_driver(vmd_drv); | ||
867 | |||
868 | MODULE_AUTHOR("Intel Corporation"); | ||
869 | MODULE_LICENSE("GPL v2"); | ||
870 | MODULE_VERSION("0.6"); | ||