diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 16:45:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 16:45:15 -0400 |
commit | 111ad119d1765b1bbef2629a5f2bd825caeb7e74 (patch) | |
tree | 167ee4a4e6e9276bb7178ddcce85d6860543cfb4 /drivers | |
parent | 997271cf5e12c1b38aec0764187094663501c984 (diff) | |
parent | 3a6d28b11a895d08b6b4fc6f16dd9ff995844b45 (diff) |
Merge branch 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
xen/pciback: Have 'passthrough' option instead of XEN_PCIDEV_BACKEND_PASS and XEN_PCIDEV_BACKEND_VPCI
xen/pciback: Remove the DEBUG option.
xen/pciback: Drop two backends, squash and cleanup some code.
xen/pciback: Print out the MSI/MSI-X (PIRQ) values
xen/pciback: Don't setup an fake IRQ handler for SR-IOV devices.
xen: rename pciback module to xen-pciback.
xen/pciback: Fine-grain the spinlocks and fix BUG: scheduling while atomic cases.
xen/pciback: Allocate IRQ handler for device that is shared with guest.
xen/pciback: Disable MSI/MSI-X when reseting a device
xen/pciback: guest SR-IOV support for PV guest
xen/pciback: Register the owner (domain) of the PCI device.
xen/pciback: Cleanup the driver based on checkpatch warnings and errors.
xen/pciback: xen pci backend driver.
xen: tmem: self-ballooning and frontswap-selfshrinking
xen: Add module alias to autoload backend drivers
xen: Populate xenbus device attributes
xen: Add __attribute__((format(printf... where appropriate
xen: prepare tmem shim to handle frontswap
xen: allow enable use of VGA console on dom0
Diffstat (limited to 'drivers')
23 files changed, 5200 insertions, 52 deletions
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6cc0db1bf522..3f129b45451a 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -684,7 +684,7 @@ again: | |||
684 | 684 | ||
685 | err = xenbus_switch_state(dev, XenbusStateConnected); | 685 | err = xenbus_switch_state(dev, XenbusStateConnected); |
686 | if (err) | 686 | if (err) |
687 | xenbus_dev_fatal(dev, err, "switching to Connected state", | 687 | xenbus_dev_fatal(dev, err, "%s: switching to Connected state", |
688 | dev->nodename); | 688 | dev->nodename); |
689 | 689 | ||
690 | return; | 690 | return; |
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b37c1a..03bc471c3eed 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -9,6 +9,23 @@ config XEN_BALLOON | |||
9 | the system to expand the domain's memory allocation, or alternatively | 9 | the system to expand the domain's memory allocation, or alternatively |
10 | return unneeded memory to the system. | 10 | return unneeded memory to the system. |
11 | 11 | ||
12 | config XEN_SELFBALLOONING | ||
13 | bool "Dynamically self-balloon kernel memory to target" | ||
14 | depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP | ||
15 | default n | ||
16 | help | ||
17 | Self-ballooning dynamically balloons available kernel memory driven | ||
18 | by the current usage of anonymous memory ("committed AS") and | ||
19 | controlled by various sysfs-settable parameters. Configuring | ||
20 | FRONTSWAP is highly recommended; if it is not configured, self- | ||
21 | ballooning is disabled by default but can be enabled with the | ||
22 | 'selfballooning' kernel boot parameter. If FRONTSWAP is configured, | ||
23 | frontswap-selfshrinking is enabled by default but can be disabled | ||
24 | with the 'noselfshrink' kernel boot parameter; and self-ballooning | ||
25 | is enabled by default but can be disabled with the 'noselfballooning' | ||
26 | kernel boot parameter. Note that systems without a sufficiently | ||
27 | large swap device should not enable self-ballooning. | ||
28 | |||
12 | config XEN_SCRUB_PAGES | 29 | config XEN_SCRUB_PAGES |
13 | bool "Scrub pages before returning them to system" | 30 | bool "Scrub pages before returning them to system" |
14 | depends on XEN_BALLOON | 31 | depends on XEN_BALLOON |
@@ -105,4 +122,33 @@ config SWIOTLB_XEN | |||
105 | depends on PCI | 122 | depends on PCI |
106 | select SWIOTLB | 123 | select SWIOTLB |
107 | 124 | ||
125 | config XEN_TMEM | ||
126 | bool | ||
127 | default y if (CLEANCACHE || FRONTSWAP) | ||
128 | help | ||
129 | Shim to interface in-kernel Transcendent Memory hooks | ||
130 | (e.g. cleancache and frontswap) to Xen tmem hypercalls. | ||
131 | |||
132 | config XEN_PCIDEV_BACKEND | ||
133 | tristate "Xen PCI-device backend driver" | ||
134 | depends on PCI && X86 && XEN | ||
135 | depends on XEN_BACKEND | ||
136 | default m | ||
137 | help | ||
138 | The PCI device backend driver allows the kernel to export arbitrary | ||
139 | PCI devices to other guests. If you select this to be a module, you | ||
140 | will need to make sure no other driver has bound to the device(s) | ||
141 | you want to make visible to other guests. | ||
142 | |||
143 | The parameter "passthrough" allows you specify how you want the PCI | ||
144 | devices to appear in the guest. You can choose the default (0) where | ||
145 | PCI topology starts at 00.00.0, or (1) for passthrough if you want | ||
146 | the PCI devices topology appear the same as in the host. | ||
147 | |||
148 | The "hide" parameter (only applicable if backend driver is compiled | ||
149 | into the kernel) allows you to bind the PCI devices to this module | ||
150 | from the default device drivers. The argument is the list of PCI BDFs: | ||
151 | xen-pciback.hide=(03:00.0)(04:00.0) | ||
152 | |||
153 | If in doubt, say m. | ||
108 | endmenu | 154 | endmenu |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index bbc18258ecc5..72bbb27d7a68 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,6 +1,5 @@ | |||
1 | obj-y += grant-table.o features.o events.o manage.o balloon.o | 1 | obj-y += grant-table.o features.o events.o manage.o balloon.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | obj-y += tmem.o | ||
4 | 3 | ||
5 | nostackp := $(call cc-option, -fno-stack-protector) | 4 | nostackp := $(call cc-option, -fno-stack-protector) |
6 | CFLAGS_features.o := $(nostackp) | 5 | CFLAGS_features.o := $(nostackp) |
@@ -9,14 +8,17 @@ obj-$(CONFIG_BLOCK) += biomerge.o | |||
9 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | 8 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o |
10 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | 9 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o |
11 | obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o | 10 | obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o |
11 | obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o | ||
12 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o | 12 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o |
13 | obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o | 13 | obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o |
14 | obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o | 14 | obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o |
15 | obj-$(CONFIG_XENFS) += xenfs/ | 15 | obj-$(CONFIG_XENFS) += xenfs/ |
16 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o | 16 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o |
17 | obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o | 17 | obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o |
18 | obj-$(CONFIG_XEN_TMEM) += tmem.o | ||
18 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o | 19 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o |
19 | obj-$(CONFIG_XEN_DOM0) += pci.o | 20 | obj-$(CONFIG_XEN_DOM0) += pci.o |
21 | obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ | ||
20 | 22 | ||
21 | xen-evtchn-y := evtchn.o | 23 | xen-evtchn-y := evtchn.o |
22 | xen-gntdev-y := gntdev.o | 24 | xen-gntdev-y := gntdev.o |
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 816a44959ef0..d369965e8f8a 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Xen implementation for transcendent memory (tmem) | 2 | * Xen implementation for transcendent memory (tmem) |
3 | * | 3 | * |
4 | * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. | 4 | * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. |
5 | * Author: Dan Magenheimer | 5 | * Author: Dan Magenheimer |
6 | */ | 6 | */ |
7 | 7 | ||
@@ -9,8 +9,14 @@ | |||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
12 | #include <linux/module.h> | ||
12 | #include <linux/cleancache.h> | 13 | #include <linux/cleancache.h> |
13 | 14 | ||
15 | /* temporary ifdef until include/linux/frontswap.h is upstream */ | ||
16 | #ifdef CONFIG_FRONTSWAP | ||
17 | #include <linux/frontswap.h> | ||
18 | #endif | ||
19 | |||
14 | #include <xen/xen.h> | 20 | #include <xen/xen.h> |
15 | #include <xen/interface/xen.h> | 21 | #include <xen/interface/xen.h> |
16 | #include <asm/xen/hypercall.h> | 22 | #include <asm/xen/hypercall.h> |
@@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) | |||
122 | return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); | 128 | return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); |
123 | } | 129 | } |
124 | 130 | ||
125 | static int xen_tmem_destroy_pool(u32 pool_id) | 131 | int tmem_enabled __read_mostly; |
126 | { | 132 | EXPORT_SYMBOL(tmem_enabled); |
127 | struct tmem_oid oid = { { 0 } }; | ||
128 | |||
129 | return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); | ||
130 | } | ||
131 | |||
132 | int tmem_enabled; | ||
133 | 133 | ||
134 | static int __init enable_tmem(char *s) | 134 | static int __init enable_tmem(char *s) |
135 | { | 135 | { |
@@ -139,6 +139,14 @@ static int __init enable_tmem(char *s) | |||
139 | 139 | ||
140 | __setup("tmem", enable_tmem); | 140 | __setup("tmem", enable_tmem); |
141 | 141 | ||
142 | #ifdef CONFIG_CLEANCACHE | ||
143 | static int xen_tmem_destroy_pool(u32 pool_id) | ||
144 | { | ||
145 | struct tmem_oid oid = { { 0 } }; | ||
146 | |||
147 | return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); | ||
148 | } | ||
149 | |||
142 | /* cleancache ops */ | 150 | /* cleancache ops */ |
143 | 151 | ||
144 | static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, | 152 | static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, |
@@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = { | |||
240 | .init_shared_fs = tmem_cleancache_init_shared_fs, | 248 | .init_shared_fs = tmem_cleancache_init_shared_fs, |
241 | .init_fs = tmem_cleancache_init_fs | 249 | .init_fs = tmem_cleancache_init_fs |
242 | }; | 250 | }; |
251 | #endif | ||
243 | 252 | ||
244 | static int __init xen_tmem_init(void) | 253 | #ifdef CONFIG_FRONTSWAP |
254 | /* frontswap tmem operations */ | ||
255 | |||
256 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | ||
257 | static int tmem_frontswap_poolid; | ||
258 | |||
259 | /* | ||
260 | * Swizzling increases objects per swaptype, increasing tmem concurrency | ||
261 | * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS | ||
262 | */ | ||
263 | #define SWIZ_BITS 4 | ||
264 | #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) | ||
265 | #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) | ||
266 | #define iswiz(_ind) (_ind >> SWIZ_BITS) | ||
267 | |||
268 | static inline struct tmem_oid oswiz(unsigned type, u32 ind) | ||
245 | { | 269 | { |
246 | struct cleancache_ops old_ops; | 270 | struct tmem_oid oid = { .oid = { 0 } }; |
271 | oid.oid[0] = _oswiz(type, ind); | ||
272 | return oid; | ||
273 | } | ||
247 | 274 | ||
275 | /* returns 0 if the page was successfully put into frontswap, -1 if not */ | ||
276 | static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, | ||
277 | struct page *page) | ||
278 | { | ||
279 | u64 ind64 = (u64)offset; | ||
280 | u32 ind = (u32)offset; | ||
281 | unsigned long pfn = page_to_pfn(page); | ||
282 | int pool = tmem_frontswap_poolid; | ||
283 | int ret; | ||
284 | |||
285 | if (pool < 0) | ||
286 | return -1; | ||
287 | if (ind64 != ind) | ||
288 | return -1; | ||
289 | mb(); /* ensure page is quiescent; tmem may address it with an alias */ | ||
290 | ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); | ||
291 | /* translate Xen tmem return values to linux semantics */ | ||
292 | if (ret == 1) | ||
293 | return 0; | ||
294 | else | ||
295 | return -1; | ||
296 | } | ||
297 | |||
298 | /* | ||
299 | * returns 0 if the page was successfully gotten from frontswap, -1 if | ||
300 | * was not present (should never happen!) | ||
301 | */ | ||
302 | static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, | ||
303 | struct page *page) | ||
304 | { | ||
305 | u64 ind64 = (u64)offset; | ||
306 | u32 ind = (u32)offset; | ||
307 | unsigned long pfn = page_to_pfn(page); | ||
308 | int pool = tmem_frontswap_poolid; | ||
309 | int ret; | ||
310 | |||
311 | if (pool < 0) | ||
312 | return -1; | ||
313 | if (ind64 != ind) | ||
314 | return -1; | ||
315 | ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); | ||
316 | /* translate Xen tmem return values to linux semantics */ | ||
317 | if (ret == 1) | ||
318 | return 0; | ||
319 | else | ||
320 | return -1; | ||
321 | } | ||
322 | |||
323 | /* flush a single page from frontswap */ | ||
324 | static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) | ||
325 | { | ||
326 | u64 ind64 = (u64)offset; | ||
327 | u32 ind = (u32)offset; | ||
328 | int pool = tmem_frontswap_poolid; | ||
329 | |||
330 | if (pool < 0) | ||
331 | return; | ||
332 | if (ind64 != ind) | ||
333 | return; | ||
334 | (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); | ||
335 | } | ||
336 | |||
337 | /* flush all pages from the passed swaptype */ | ||
338 | static void tmem_frontswap_flush_area(unsigned type) | ||
339 | { | ||
340 | int pool = tmem_frontswap_poolid; | ||
341 | int ind; | ||
342 | |||
343 | if (pool < 0) | ||
344 | return; | ||
345 | for (ind = SWIZ_MASK; ind >= 0; ind--) | ||
346 | (void)xen_tmem_flush_object(pool, oswiz(type, ind)); | ||
347 | } | ||
348 | |||
349 | static void tmem_frontswap_init(unsigned ignored) | ||
350 | { | ||
351 | struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; | ||
352 | |||
353 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | ||
354 | if (tmem_frontswap_poolid < 0) | ||
355 | tmem_frontswap_poolid = | ||
356 | xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); | ||
357 | } | ||
358 | |||
359 | static int __initdata use_frontswap = 1; | ||
360 | |||
361 | static int __init no_frontswap(char *s) | ||
362 | { | ||
363 | use_frontswap = 0; | ||
364 | return 1; | ||
365 | } | ||
366 | |||
367 | __setup("nofrontswap", no_frontswap); | ||
368 | |||
369 | static struct frontswap_ops tmem_frontswap_ops = { | ||
370 | .put_page = tmem_frontswap_put_page, | ||
371 | .get_page = tmem_frontswap_get_page, | ||
372 | .flush_page = tmem_frontswap_flush_page, | ||
373 | .flush_area = tmem_frontswap_flush_area, | ||
374 | .init = tmem_frontswap_init | ||
375 | }; | ||
376 | #endif | ||
377 | |||
378 | static int __init xen_tmem_init(void) | ||
379 | { | ||
248 | if (!xen_domain()) | 380 | if (!xen_domain()) |
249 | return 0; | 381 | return 0; |
382 | #ifdef CONFIG_FRONTSWAP | ||
383 | if (tmem_enabled && use_frontswap) { | ||
384 | char *s = ""; | ||
385 | struct frontswap_ops old_ops = | ||
386 | frontswap_register_ops(&tmem_frontswap_ops); | ||
387 | |||
388 | tmem_frontswap_poolid = -1; | ||
389 | if (old_ops.init != NULL) | ||
390 | s = " (WARNING: frontswap_ops overridden)"; | ||
391 | printk(KERN_INFO "frontswap enabled, RAM provided by " | ||
392 | "Xen Transcendent Memory\n"); | ||
393 | } | ||
394 | #endif | ||
250 | #ifdef CONFIG_CLEANCACHE | 395 | #ifdef CONFIG_CLEANCACHE |
251 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); | 396 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); |
252 | if (tmem_enabled && use_cleancache) { | 397 | if (tmem_enabled && use_cleancache) { |
253 | char *s = ""; | 398 | char *s = ""; |
254 | old_ops = cleancache_register_ops(&tmem_cleancache_ops); | 399 | struct cleancache_ops old_ops = |
400 | cleancache_register_ops(&tmem_cleancache_ops); | ||
255 | if (old_ops.init_fs != NULL) | 401 | if (old_ops.init_fs != NULL) |
256 | s = " (WARNING: cleancache_ops overridden)"; | 402 | s = " (WARNING: cleancache_ops overridden)"; |
257 | printk(KERN_INFO "cleancache enabled, RAM provided by " | 403 | printk(KERN_INFO "cleancache enabled, RAM provided by " |
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index a4ff225ee868..5c9dc43c1e94 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c | |||
@@ -98,6 +98,8 @@ static int __init balloon_init(void) | |||
98 | 98 | ||
99 | register_balloon(&balloon_sysdev); | 99 | register_balloon(&balloon_sysdev); |
100 | 100 | ||
101 | register_xen_selfballooning(&balloon_sysdev); | ||
102 | |||
101 | target_watch.callback = watch_target; | 103 | target_watch.callback = watch_target; |
102 | xenstore_notifier.notifier_call = balloon_init_watcher; | 104 | xenstore_notifier.notifier_call = balloon_init_watcher; |
103 | 105 | ||
diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile new file mode 100644 index 000000000000..ffe0ad3438bd --- /dev/null +++ b/drivers/xen/xen-pciback/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o | ||
2 | |||
3 | xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o | ||
4 | xen-pciback-y += conf_space.o conf_space_header.o \ | ||
5 | conf_space_capability.o \ | ||
6 | conf_space_quirks.o vpci.o \ | ||
7 | passthrough.o | ||
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c new file mode 100644 index 000000000000..a8031445d94e --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space.c | |||
@@ -0,0 +1,438 @@ | |||
1 | /* | ||
2 | * PCI Backend - Functions for creating a virtual configuration space for | ||
3 | * exported PCI Devices. | ||
4 | * It's dangerous to allow PCI Driver Domains to change their | ||
5 | * device's resources (memory, i/o ports, interrupts). We need to | ||
6 | * restrict changes to certain PCI Configuration registers: | ||
7 | * BARs, INTERRUPT_PIN, most registers in the header... | ||
8 | * | ||
9 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/pci.h> | ||
14 | #include "pciback.h" | ||
15 | #include "conf_space.h" | ||
16 | #include "conf_space_quirks.h" | ||
17 | |||
18 | #define DRV_NAME "xen-pciback" | ||
19 | static int permissive; | ||
20 | module_param(permissive, bool, 0644); | ||
21 | |||
22 | /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, | ||
23 | * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */ | ||
24 | #define DEFINE_PCI_CONFIG(op, size, type) \ | ||
25 | int xen_pcibk_##op##_config_##size \ | ||
26 | (struct pci_dev *dev, int offset, type value, void *data) \ | ||
27 | { \ | ||
28 | return pci_##op##_config_##size(dev, offset, value); \ | ||
29 | } | ||
30 | |||
31 | DEFINE_PCI_CONFIG(read, byte, u8 *) | ||
32 | DEFINE_PCI_CONFIG(read, word, u16 *) | ||
33 | DEFINE_PCI_CONFIG(read, dword, u32 *) | ||
34 | |||
35 | DEFINE_PCI_CONFIG(write, byte, u8) | ||
36 | DEFINE_PCI_CONFIG(write, word, u16) | ||
37 | DEFINE_PCI_CONFIG(write, dword, u32) | ||
38 | |||
39 | static int conf_space_read(struct pci_dev *dev, | ||
40 | const struct config_field_entry *entry, | ||
41 | int offset, u32 *value) | ||
42 | { | ||
43 | int ret = 0; | ||
44 | const struct config_field *field = entry->field; | ||
45 | |||
46 | *value = 0; | ||
47 | |||
48 | switch (field->size) { | ||
49 | case 1: | ||
50 | if (field->u.b.read) | ||
51 | ret = field->u.b.read(dev, offset, (u8 *) value, | ||
52 | entry->data); | ||
53 | break; | ||
54 | case 2: | ||
55 | if (field->u.w.read) | ||
56 | ret = field->u.w.read(dev, offset, (u16 *) value, | ||
57 | entry->data); | ||
58 | break; | ||
59 | case 4: | ||
60 | if (field->u.dw.read) | ||
61 | ret = field->u.dw.read(dev, offset, value, entry->data); | ||
62 | break; | ||
63 | } | ||
64 | return ret; | ||
65 | } | ||
66 | |||
67 | static int conf_space_write(struct pci_dev *dev, | ||
68 | const struct config_field_entry *entry, | ||
69 | int offset, u32 value) | ||
70 | { | ||
71 | int ret = 0; | ||
72 | const struct config_field *field = entry->field; | ||
73 | |||
74 | switch (field->size) { | ||
75 | case 1: | ||
76 | if (field->u.b.write) | ||
77 | ret = field->u.b.write(dev, offset, (u8) value, | ||
78 | entry->data); | ||
79 | break; | ||
80 | case 2: | ||
81 | if (field->u.w.write) | ||
82 | ret = field->u.w.write(dev, offset, (u16) value, | ||
83 | entry->data); | ||
84 | break; | ||
85 | case 4: | ||
86 | if (field->u.dw.write) | ||
87 | ret = field->u.dw.write(dev, offset, value, | ||
88 | entry->data); | ||
89 | break; | ||
90 | } | ||
91 | return ret; | ||
92 | } | ||
93 | |||
94 | static inline u32 get_mask(int size) | ||
95 | { | ||
96 | if (size == 1) | ||
97 | return 0xff; | ||
98 | else if (size == 2) | ||
99 | return 0xffff; | ||
100 | else | ||
101 | return 0xffffffff; | ||
102 | } | ||
103 | |||
104 | static inline int valid_request(int offset, int size) | ||
105 | { | ||
106 | /* Validate request (no un-aligned requests) */ | ||
107 | if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0) | ||
108 | return 1; | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, | ||
113 | int offset) | ||
114 | { | ||
115 | if (offset >= 0) { | ||
116 | new_val_mask <<= (offset * 8); | ||
117 | new_val <<= (offset * 8); | ||
118 | } else { | ||
119 | new_val_mask >>= (offset * -8); | ||
120 | new_val >>= (offset * -8); | ||
121 | } | ||
122 | val = (val & ~new_val_mask) | (new_val & new_val_mask); | ||
123 | |||
124 | return val; | ||
125 | } | ||
126 | |||
127 | static int pcibios_err_to_errno(int err) | ||
128 | { | ||
129 | switch (err) { | ||
130 | case PCIBIOS_SUCCESSFUL: | ||
131 | return XEN_PCI_ERR_success; | ||
132 | case PCIBIOS_DEVICE_NOT_FOUND: | ||
133 | return XEN_PCI_ERR_dev_not_found; | ||
134 | case PCIBIOS_BAD_REGISTER_NUMBER: | ||
135 | return XEN_PCI_ERR_invalid_offset; | ||
136 | case PCIBIOS_FUNC_NOT_SUPPORTED: | ||
137 | return XEN_PCI_ERR_not_implemented; | ||
138 | case PCIBIOS_SET_FAILED: | ||
139 | return XEN_PCI_ERR_access_denied; | ||
140 | } | ||
141 | return err; | ||
142 | } | ||
143 | |||
144 | int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, | ||
145 | u32 *ret_val) | ||
146 | { | ||
147 | int err = 0; | ||
148 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
149 | const struct config_field_entry *cfg_entry; | ||
150 | const struct config_field *field; | ||
151 | int req_start, req_end, field_start, field_end; | ||
152 | /* if read fails for any reason, return 0 | ||
153 | * (as if device didn't respond) */ | ||
154 | u32 value = 0, tmp_val; | ||
155 | |||
156 | if (unlikely(verbose_request)) | ||
157 | printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n", | ||
158 | pci_name(dev), size, offset); | ||
159 | |||
160 | if (!valid_request(offset, size)) { | ||
161 | err = XEN_PCI_ERR_invalid_offset; | ||
162 | goto out; | ||
163 | } | ||
164 | |||
165 | /* Get the real value first, then modify as appropriate */ | ||
166 | switch (size) { | ||
167 | case 1: | ||
168 | err = pci_read_config_byte(dev, offset, (u8 *) &value); | ||
169 | break; | ||
170 | case 2: | ||
171 | err = pci_read_config_word(dev, offset, (u16 *) &value); | ||
172 | break; | ||
173 | case 4: | ||
174 | err = pci_read_config_dword(dev, offset, &value); | ||
175 | break; | ||
176 | } | ||
177 | |||
178 | list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | ||
179 | field = cfg_entry->field; | ||
180 | |||
181 | req_start = offset; | ||
182 | req_end = offset + size; | ||
183 | field_start = OFFSET(cfg_entry); | ||
184 | field_end = OFFSET(cfg_entry) + field->size; | ||
185 | |||
186 | if ((req_start >= field_start && req_start < field_end) | ||
187 | || (req_end > field_start && req_end <= field_end)) { | ||
188 | err = conf_space_read(dev, cfg_entry, field_start, | ||
189 | &tmp_val); | ||
190 | if (err) | ||
191 | goto out; | ||
192 | |||
193 | value = merge_value(value, tmp_val, | ||
194 | get_mask(field->size), | ||
195 | field_start - req_start); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | out: | ||
200 | if (unlikely(verbose_request)) | ||
201 | printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n", | ||
202 | pci_name(dev), size, offset, value); | ||
203 | |||
204 | *ret_val = value; | ||
205 | return pcibios_err_to_errno(err); | ||
206 | } | ||
207 | |||
208 | int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) | ||
209 | { | ||
210 | int err = 0, handled = 0; | ||
211 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
212 | const struct config_field_entry *cfg_entry; | ||
213 | const struct config_field *field; | ||
214 | u32 tmp_val; | ||
215 | int req_start, req_end, field_start, field_end; | ||
216 | |||
217 | if (unlikely(verbose_request)) | ||
218 | printk(KERN_DEBUG | ||
219 | DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n", | ||
220 | pci_name(dev), size, offset, value); | ||
221 | |||
222 | if (!valid_request(offset, size)) | ||
223 | return XEN_PCI_ERR_invalid_offset; | ||
224 | |||
225 | list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | ||
226 | field = cfg_entry->field; | ||
227 | |||
228 | req_start = offset; | ||
229 | req_end = offset + size; | ||
230 | field_start = OFFSET(cfg_entry); | ||
231 | field_end = OFFSET(cfg_entry) + field->size; | ||
232 | |||
233 | if ((req_start >= field_start && req_start < field_end) | ||
234 | || (req_end > field_start && req_end <= field_end)) { | ||
235 | tmp_val = 0; | ||
236 | |||
237 | err = xen_pcibk_config_read(dev, field_start, | ||
238 | field->size, &tmp_val); | ||
239 | if (err) | ||
240 | break; | ||
241 | |||
242 | tmp_val = merge_value(tmp_val, value, get_mask(size), | ||
243 | req_start - field_start); | ||
244 | |||
245 | err = conf_space_write(dev, cfg_entry, field_start, | ||
246 | tmp_val); | ||
247 | |||
248 | /* handled is set true here, but not every byte | ||
249 | * may have been written! Properly detecting if | ||
250 | * every byte is handled is unnecessary as the | ||
251 | * flag is used to detect devices that need | ||
252 | * special helpers to work correctly. | ||
253 | */ | ||
254 | handled = 1; | ||
255 | } | ||
256 | } | ||
257 | |||
258 | if (!handled && !err) { | ||
259 | /* By default, anything not specificially handled above is | ||
260 | * read-only. The permissive flag changes this behavior so | ||
261 | * that anything not specifically handled above is writable. | ||
262 | * This means that some fields may still be read-only because | ||
263 | * they have entries in the config_field list that intercept | ||
264 | * the write and do nothing. */ | ||
265 | if (dev_data->permissive || permissive) { | ||
266 | switch (size) { | ||
267 | case 1: | ||
268 | err = pci_write_config_byte(dev, offset, | ||
269 | (u8) value); | ||
270 | break; | ||
271 | case 2: | ||
272 | err = pci_write_config_word(dev, offset, | ||
273 | (u16) value); | ||
274 | break; | ||
275 | case 4: | ||
276 | err = pci_write_config_dword(dev, offset, | ||
277 | (u32) value); | ||
278 | break; | ||
279 | } | ||
280 | } else if (!dev_data->warned_on_write) { | ||
281 | dev_data->warned_on_write = 1; | ||
282 | dev_warn(&dev->dev, "Driver tried to write to a " | ||
283 | "read-only configuration space field at offset" | ||
284 | " 0x%x, size %d. This may be harmless, but if " | ||
285 | "you have problems with your device:\n" | ||
286 | "1) see permissive attribute in sysfs\n" | ||
287 | "2) report problems to the xen-devel " | ||
288 | "mailing list along with details of your " | ||
289 | "device obtained from lspci.\n", offset, size); | ||
290 | } | ||
291 | } | ||
292 | |||
293 | return pcibios_err_to_errno(err); | ||
294 | } | ||
295 | |||
296 | void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev) | ||
297 | { | ||
298 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
299 | struct config_field_entry *cfg_entry, *t; | ||
300 | const struct config_field *field; | ||
301 | |||
302 | dev_dbg(&dev->dev, "free-ing dynamically allocated virtual " | ||
303 | "configuration space fields\n"); | ||
304 | if (!dev_data) | ||
305 | return; | ||
306 | |||
307 | list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { | ||
308 | field = cfg_entry->field; | ||
309 | |||
310 | if (field->clean) { | ||
311 | field->clean((struct config_field *)field); | ||
312 | |||
313 | kfree(cfg_entry->data); | ||
314 | |||
315 | list_del(&cfg_entry->list); | ||
316 | kfree(cfg_entry); | ||
317 | } | ||
318 | |||
319 | } | ||
320 | } | ||
321 | |||
322 | void xen_pcibk_config_reset_dev(struct pci_dev *dev) | ||
323 | { | ||
324 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
325 | const struct config_field_entry *cfg_entry; | ||
326 | const struct config_field *field; | ||
327 | |||
328 | dev_dbg(&dev->dev, "resetting virtual configuration space\n"); | ||
329 | if (!dev_data) | ||
330 | return; | ||
331 | |||
332 | list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | ||
333 | field = cfg_entry->field; | ||
334 | |||
335 | if (field->reset) | ||
336 | field->reset(dev, OFFSET(cfg_entry), cfg_entry->data); | ||
337 | } | ||
338 | } | ||
339 | |||
340 | void xen_pcibk_config_free_dev(struct pci_dev *dev) | ||
341 | { | ||
342 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
343 | struct config_field_entry *cfg_entry, *t; | ||
344 | const struct config_field *field; | ||
345 | |||
346 | dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n"); | ||
347 | if (!dev_data) | ||
348 | return; | ||
349 | |||
350 | list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { | ||
351 | list_del(&cfg_entry->list); | ||
352 | |||
353 | field = cfg_entry->field; | ||
354 | |||
355 | if (field->release) | ||
356 | field->release(dev, OFFSET(cfg_entry), cfg_entry->data); | ||
357 | |||
358 | kfree(cfg_entry); | ||
359 | } | ||
360 | } | ||
361 | |||
362 | int xen_pcibk_config_add_field_offset(struct pci_dev *dev, | ||
363 | const struct config_field *field, | ||
364 | unsigned int base_offset) | ||
365 | { | ||
366 | int err = 0; | ||
367 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
368 | struct config_field_entry *cfg_entry; | ||
369 | void *tmp; | ||
370 | |||
371 | cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL); | ||
372 | if (!cfg_entry) { | ||
373 | err = -ENOMEM; | ||
374 | goto out; | ||
375 | } | ||
376 | |||
377 | cfg_entry->data = NULL; | ||
378 | cfg_entry->field = field; | ||
379 | cfg_entry->base_offset = base_offset; | ||
380 | |||
381 | /* silently ignore duplicate fields */ | ||
382 | err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry)); | ||
383 | if (err) | ||
384 | goto out; | ||
385 | |||
386 | if (field->init) { | ||
387 | tmp = field->init(dev, OFFSET(cfg_entry)); | ||
388 | |||
389 | if (IS_ERR(tmp)) { | ||
390 | err = PTR_ERR(tmp); | ||
391 | goto out; | ||
392 | } | ||
393 | |||
394 | cfg_entry->data = tmp; | ||
395 | } | ||
396 | |||
397 | dev_dbg(&dev->dev, "added config field at offset 0x%02x\n", | ||
398 | OFFSET(cfg_entry)); | ||
399 | list_add_tail(&cfg_entry->list, &dev_data->config_fields); | ||
400 | |||
401 | out: | ||
402 | if (err) | ||
403 | kfree(cfg_entry); | ||
404 | |||
405 | return err; | ||
406 | } | ||
407 | |||
408 | /* This sets up the device's virtual configuration space to keep track of | ||
409 | * certain registers (like the base address registers (BARs) so that we can | ||
410 | * keep the client from manipulating them directly. | ||
411 | */ | ||
412 | int xen_pcibk_config_init_dev(struct pci_dev *dev) | ||
413 | { | ||
414 | int err = 0; | ||
415 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
416 | |||
417 | dev_dbg(&dev->dev, "initializing virtual configuration space\n"); | ||
418 | |||
419 | INIT_LIST_HEAD(&dev_data->config_fields); | ||
420 | |||
421 | err = xen_pcibk_config_header_add_fields(dev); | ||
422 | if (err) | ||
423 | goto out; | ||
424 | |||
425 | err = xen_pcibk_config_capability_add_fields(dev); | ||
426 | if (err) | ||
427 | goto out; | ||
428 | |||
429 | err = xen_pcibk_config_quirks_init(dev); | ||
430 | |||
431 | out: | ||
432 | return err; | ||
433 | } | ||
434 | |||
435 | int xen_pcibk_config_init(void) | ||
436 | { | ||
437 | return xen_pcibk_config_capability_init(); | ||
438 | } | ||
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h new file mode 100644 index 000000000000..e56c934ad137 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space.h | |||
@@ -0,0 +1,126 @@ | |||
1 | /* | ||
2 | * PCI Backend - Common data structures for overriding the configuration space | ||
3 | * | ||
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
5 | */ | ||
6 | |||
7 | #ifndef __XEN_PCIBACK_CONF_SPACE_H__ | ||
8 | #define __XEN_PCIBACK_CONF_SPACE_H__ | ||
9 | |||
10 | #include <linux/list.h> | ||
11 | #include <linux/err.h> | ||
12 | |||
13 | /* conf_field_init can return an errno in a ptr with ERR_PTR() */ | ||
14 | typedef void *(*conf_field_init) (struct pci_dev *dev, int offset); | ||
15 | typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data); | ||
16 | typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data); | ||
17 | |||
18 | typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value, | ||
19 | void *data); | ||
20 | typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value, | ||
21 | void *data); | ||
22 | typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value, | ||
23 | void *data); | ||
24 | typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value, | ||
25 | void *data); | ||
26 | typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value, | ||
27 | void *data); | ||
28 | typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value, | ||
29 | void *data); | ||
30 | |||
31 | /* These are the fields within the configuration space which we | ||
32 | * are interested in intercepting reads/writes to and changing their | ||
33 | * values. | ||
34 | */ | ||
35 | struct config_field { | ||
36 | unsigned int offset; | ||
37 | unsigned int size; | ||
38 | unsigned int mask; | ||
39 | conf_field_init init; | ||
40 | conf_field_reset reset; | ||
41 | conf_field_free release; | ||
42 | void (*clean) (struct config_field *field); | ||
43 | union { | ||
44 | struct { | ||
45 | conf_dword_write write; | ||
46 | conf_dword_read read; | ||
47 | } dw; | ||
48 | struct { | ||
49 | conf_word_write write; | ||
50 | conf_word_read read; | ||
51 | } w; | ||
52 | struct { | ||
53 | conf_byte_write write; | ||
54 | conf_byte_read read; | ||
55 | } b; | ||
56 | } u; | ||
57 | struct list_head list; | ||
58 | }; | ||
59 | |||
60 | struct config_field_entry { | ||
61 | struct list_head list; | ||
62 | const struct config_field *field; | ||
63 | unsigned int base_offset; | ||
64 | void *data; | ||
65 | }; | ||
66 | |||
67 | #define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) | ||
68 | |||
69 | /* Add fields to a device - the add_fields macro expects to get a pointer to | ||
70 | * the first entry in an array (of which the ending is marked by size==0) | ||
71 | */ | ||
72 | int xen_pcibk_config_add_field_offset(struct pci_dev *dev, | ||
73 | const struct config_field *field, | ||
74 | unsigned int offset); | ||
75 | |||
76 | static inline int xen_pcibk_config_add_field(struct pci_dev *dev, | ||
77 | const struct config_field *field) | ||
78 | { | ||
79 | return xen_pcibk_config_add_field_offset(dev, field, 0); | ||
80 | } | ||
81 | |||
82 | static inline int xen_pcibk_config_add_fields(struct pci_dev *dev, | ||
83 | const struct config_field *field) | ||
84 | { | ||
85 | int i, err = 0; | ||
86 | for (i = 0; field[i].size != 0; i++) { | ||
87 | err = xen_pcibk_config_add_field(dev, &field[i]); | ||
88 | if (err) | ||
89 | break; | ||
90 | } | ||
91 | return err; | ||
92 | } | ||
93 | |||
94 | static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev, | ||
95 | const struct config_field *field, | ||
96 | unsigned int offset) | ||
97 | { | ||
98 | int i, err = 0; | ||
99 | for (i = 0; field[i].size != 0; i++) { | ||
100 | err = xen_pcibk_config_add_field_offset(dev, &field[i], offset); | ||
101 | if (err) | ||
102 | break; | ||
103 | } | ||
104 | return err; | ||
105 | } | ||
106 | |||
107 | /* Read/Write the real configuration space */ | ||
108 | int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value, | ||
109 | void *data); | ||
110 | int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value, | ||
111 | void *data); | ||
112 | int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value, | ||
113 | void *data); | ||
114 | int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value, | ||
115 | void *data); | ||
116 | int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value, | ||
117 | void *data); | ||
118 | int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value, | ||
119 | void *data); | ||
120 | |||
121 | int xen_pcibk_config_capability_init(void); | ||
122 | |||
123 | int xen_pcibk_config_header_add_fields(struct pci_dev *dev); | ||
124 | int xen_pcibk_config_capability_add_fields(struct pci_dev *dev); | ||
125 | |||
126 | #endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ | ||
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c new file mode 100644 index 000000000000..7f83e9083e9d --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_capability.c | |||
@@ -0,0 +1,207 @@ | |||
1 | /* | ||
2 | * PCI Backend - Handles the virtual fields found on the capability lists | ||
3 | * in the configuration space. | ||
4 | * | ||
5 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/pci.h> | ||
10 | #include "pciback.h" | ||
11 | #include "conf_space.h" | ||
12 | |||
13 | static LIST_HEAD(capabilities); | ||
14 | struct xen_pcibk_config_capability { | ||
15 | struct list_head cap_list; | ||
16 | |||
17 | int capability; | ||
18 | |||
19 | /* If the device has the capability found above, add these fields */ | ||
20 | const struct config_field *fields; | ||
21 | }; | ||
22 | |||
23 | static const struct config_field caplist_header[] = { | ||
24 | { | ||
25 | .offset = PCI_CAP_LIST_ID, | ||
26 | .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */ | ||
27 | .u.w.read = xen_pcibk_read_config_word, | ||
28 | .u.w.write = NULL, | ||
29 | }, | ||
30 | {} | ||
31 | }; | ||
32 | |||
33 | static inline void register_capability(struct xen_pcibk_config_capability *cap) | ||
34 | { | ||
35 | list_add_tail(&cap->cap_list, &capabilities); | ||
36 | } | ||
37 | |||
38 | int xen_pcibk_config_capability_add_fields(struct pci_dev *dev) | ||
39 | { | ||
40 | int err = 0; | ||
41 | struct xen_pcibk_config_capability *cap; | ||
42 | int cap_offset; | ||
43 | |||
44 | list_for_each_entry(cap, &capabilities, cap_list) { | ||
45 | cap_offset = pci_find_capability(dev, cap->capability); | ||
46 | if (cap_offset) { | ||
47 | dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n", | ||
48 | cap->capability, cap_offset); | ||
49 | |||
50 | err = xen_pcibk_config_add_fields_offset(dev, | ||
51 | caplist_header, | ||
52 | cap_offset); | ||
53 | if (err) | ||
54 | goto out; | ||
55 | err = xen_pcibk_config_add_fields_offset(dev, | ||
56 | cap->fields, | ||
57 | cap_offset); | ||
58 | if (err) | ||
59 | goto out; | ||
60 | } | ||
61 | } | ||
62 | |||
63 | out: | ||
64 | return err; | ||
65 | } | ||
66 | |||
67 | static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, | ||
68 | void *data) | ||
69 | { | ||
70 | /* Disallow writes to the vital product data */ | ||
71 | if (value & PCI_VPD_ADDR_F) | ||
72 | return PCIBIOS_SET_FAILED; | ||
73 | else | ||
74 | return pci_write_config_word(dev, offset, value); | ||
75 | } | ||
76 | |||
77 | static const struct config_field caplist_vpd[] = { | ||
78 | { | ||
79 | .offset = PCI_VPD_ADDR, | ||
80 | .size = 2, | ||
81 | .u.w.read = xen_pcibk_read_config_word, | ||
82 | .u.w.write = vpd_address_write, | ||
83 | }, | ||
84 | { | ||
85 | .offset = PCI_VPD_DATA, | ||
86 | .size = 4, | ||
87 | .u.dw.read = xen_pcibk_read_config_dword, | ||
88 | .u.dw.write = NULL, | ||
89 | }, | ||
90 | {} | ||
91 | }; | ||
92 | |||
93 | static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, | ||
94 | void *data) | ||
95 | { | ||
96 | int err; | ||
97 | u16 real_value; | ||
98 | |||
99 | err = pci_read_config_word(dev, offset, &real_value); | ||
100 | if (err) | ||
101 | goto out; | ||
102 | |||
103 | *value = real_value & ~PCI_PM_CAP_PME_MASK; | ||
104 | |||
105 | out: | ||
106 | return err; | ||
107 | } | ||
108 | |||
109 | /* PM_OK_BITS specifies the bits that the driver domain is allowed to change. | ||
110 | * Can't allow driver domain to enable PMEs - they're shared */ | ||
111 | #define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) | ||
112 | |||
113 | static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, | ||
114 | void *data) | ||
115 | { | ||
116 | int err; | ||
117 | u16 old_value; | ||
118 | pci_power_t new_state, old_state; | ||
119 | |||
120 | err = pci_read_config_word(dev, offset, &old_value); | ||
121 | if (err) | ||
122 | goto out; | ||
123 | |||
124 | old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); | ||
125 | new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); | ||
126 | |||
127 | new_value &= PM_OK_BITS; | ||
128 | if ((old_value & PM_OK_BITS) != new_value) { | ||
129 | new_value = (old_value & ~PM_OK_BITS) | new_value; | ||
130 | err = pci_write_config_word(dev, offset, new_value); | ||
131 | if (err) | ||
132 | goto out; | ||
133 | } | ||
134 | |||
135 | /* Let pci core handle the power management change */ | ||
136 | dev_dbg(&dev->dev, "set power state to %x\n", new_state); | ||
137 | err = pci_set_power_state(dev, new_state); | ||
138 | if (err) { | ||
139 | err = PCIBIOS_SET_FAILED; | ||
140 | goto out; | ||
141 | } | ||
142 | |||
143 | out: | ||
144 | return err; | ||
145 | } | ||
146 | |||
147 | /* Ensure PMEs are disabled */ | ||
148 | static void *pm_ctrl_init(struct pci_dev *dev, int offset) | ||
149 | { | ||
150 | int err; | ||
151 | u16 value; | ||
152 | |||
153 | err = pci_read_config_word(dev, offset, &value); | ||
154 | if (err) | ||
155 | goto out; | ||
156 | |||
157 | if (value & PCI_PM_CTRL_PME_ENABLE) { | ||
158 | value &= ~PCI_PM_CTRL_PME_ENABLE; | ||
159 | err = pci_write_config_word(dev, offset, value); | ||
160 | } | ||
161 | |||
162 | out: | ||
163 | return ERR_PTR(err); | ||
164 | } | ||
165 | |||
166 | static const struct config_field caplist_pm[] = { | ||
167 | { | ||
168 | .offset = PCI_PM_PMC, | ||
169 | .size = 2, | ||
170 | .u.w.read = pm_caps_read, | ||
171 | }, | ||
172 | { | ||
173 | .offset = PCI_PM_CTRL, | ||
174 | .size = 2, | ||
175 | .init = pm_ctrl_init, | ||
176 | .u.w.read = xen_pcibk_read_config_word, | ||
177 | .u.w.write = pm_ctrl_write, | ||
178 | }, | ||
179 | { | ||
180 | .offset = PCI_PM_PPB_EXTENSIONS, | ||
181 | .size = 1, | ||
182 | .u.b.read = xen_pcibk_read_config_byte, | ||
183 | }, | ||
184 | { | ||
185 | .offset = PCI_PM_DATA_REGISTER, | ||
186 | .size = 1, | ||
187 | .u.b.read = xen_pcibk_read_config_byte, | ||
188 | }, | ||
189 | {} | ||
190 | }; | ||
191 | |||
192 | static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = { | ||
193 | .capability = PCI_CAP_ID_PM, | ||
194 | .fields = caplist_pm, | ||
195 | }; | ||
196 | static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = { | ||
197 | .capability = PCI_CAP_ID_VPD, | ||
198 | .fields = caplist_vpd, | ||
199 | }; | ||
200 | |||
201 | int xen_pcibk_config_capability_init(void) | ||
202 | { | ||
203 | register_capability(&xen_pcibk_config_capability_vpd); | ||
204 | register_capability(&xen_pcibk_config_capability_pm); | ||
205 | |||
206 | return 0; | ||
207 | } | ||
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c new file mode 100644 index 000000000000..da3cbdfcb5dc --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_header.c | |||
@@ -0,0 +1,386 @@ | |||
1 | /* | ||
2 | * PCI Backend - Handles the virtual fields in the configuration space headers. | ||
3 | * | ||
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/pci.h> | ||
9 | #include "pciback.h" | ||
10 | #include "conf_space.h" | ||
11 | |||
12 | struct pci_bar_info { | ||
13 | u32 val; | ||
14 | u32 len_val; | ||
15 | int which; | ||
16 | }; | ||
17 | |||
18 | #define DRV_NAME "xen-pciback" | ||
19 | #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) | ||
20 | #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) | ||
21 | |||
22 | static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) | ||
23 | { | ||
24 | int i; | ||
25 | int ret; | ||
26 | |||
27 | ret = xen_pcibk_read_config_word(dev, offset, value, data); | ||
28 | if (!atomic_read(&dev->enable_cnt)) | ||
29 | return ret; | ||
30 | |||
31 | for (i = 0; i < PCI_ROM_RESOURCE; i++) { | ||
32 | if (dev->resource[i].flags & IORESOURCE_IO) | ||
33 | *value |= PCI_COMMAND_IO; | ||
34 | if (dev->resource[i].flags & IORESOURCE_MEM) | ||
35 | *value |= PCI_COMMAND_MEMORY; | ||
36 | } | ||
37 | |||
38 | return ret; | ||
39 | } | ||
40 | |||
41 | static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) | ||
42 | { | ||
43 | struct xen_pcibk_dev_data *dev_data; | ||
44 | int err; | ||
45 | |||
46 | dev_data = pci_get_drvdata(dev); | ||
47 | if (!pci_is_enabled(dev) && is_enable_cmd(value)) { | ||
48 | if (unlikely(verbose_request)) | ||
49 | printk(KERN_DEBUG DRV_NAME ": %s: enable\n", | ||
50 | pci_name(dev)); | ||
51 | err = pci_enable_device(dev); | ||
52 | if (err) | ||
53 | return err; | ||
54 | if (dev_data) | ||
55 | dev_data->enable_intx = 1; | ||
56 | } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { | ||
57 | if (unlikely(verbose_request)) | ||
58 | printk(KERN_DEBUG DRV_NAME ": %s: disable\n", | ||
59 | pci_name(dev)); | ||
60 | pci_disable_device(dev); | ||
61 | if (dev_data) | ||
62 | dev_data->enable_intx = 0; | ||
63 | } | ||
64 | |||
65 | if (!dev->is_busmaster && is_master_cmd(value)) { | ||
66 | if (unlikely(verbose_request)) | ||
67 | printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n", | ||
68 | pci_name(dev)); | ||
69 | pci_set_master(dev); | ||
70 | } | ||
71 | |||
72 | if (value & PCI_COMMAND_INVALIDATE) { | ||
73 | if (unlikely(verbose_request)) | ||
74 | printk(KERN_DEBUG | ||
75 | DRV_NAME ": %s: enable memory-write-invalidate\n", | ||
76 | pci_name(dev)); | ||
77 | err = pci_set_mwi(dev); | ||
78 | if (err) { | ||
79 | printk(KERN_WARNING | ||
80 | DRV_NAME ": %s: cannot enable " | ||
81 | "memory-write-invalidate (%d)\n", | ||
82 | pci_name(dev), err); | ||
83 | value &= ~PCI_COMMAND_INVALIDATE; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | return pci_write_config_word(dev, offset, value); | ||
88 | } | ||
89 | |||
90 | static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) | ||
91 | { | ||
92 | struct pci_bar_info *bar = data; | ||
93 | |||
94 | if (unlikely(!bar)) { | ||
95 | printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", | ||
96 | pci_name(dev)); | ||
97 | return XEN_PCI_ERR_op_failed; | ||
98 | } | ||
99 | |||
100 | /* A write to obtain the length must happen as a 32-bit write. | ||
101 | * This does not (yet) support writing individual bytes | ||
102 | */ | ||
103 | if (value == ~PCI_ROM_ADDRESS_ENABLE) | ||
104 | bar->which = 1; | ||
105 | else { | ||
106 | u32 tmpval; | ||
107 | pci_read_config_dword(dev, offset, &tmpval); | ||
108 | if (tmpval != bar->val && value == bar->val) { | ||
109 | /* Allow restoration of bar value. */ | ||
110 | pci_write_config_dword(dev, offset, bar->val); | ||
111 | } | ||
112 | bar->which = 0; | ||
113 | } | ||
114 | |||
115 | /* Do we need to support enabling/disabling the rom address here? */ | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | /* For the BARs, only allow writes which write ~0 or | ||
121 | * the correct resource information | ||
122 | * (Needed for when the driver probes the resource usage) | ||
123 | */ | ||
124 | static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) | ||
125 | { | ||
126 | struct pci_bar_info *bar = data; | ||
127 | |||
128 | if (unlikely(!bar)) { | ||
129 | printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", | ||
130 | pci_name(dev)); | ||
131 | return XEN_PCI_ERR_op_failed; | ||
132 | } | ||
133 | |||
134 | /* A write to obtain the length must happen as a 32-bit write. | ||
135 | * This does not (yet) support writing individual bytes | ||
136 | */ | ||
137 | if (value == ~0) | ||
138 | bar->which = 1; | ||
139 | else { | ||
140 | u32 tmpval; | ||
141 | pci_read_config_dword(dev, offset, &tmpval); | ||
142 | if (tmpval != bar->val && value == bar->val) { | ||
143 | /* Allow restoration of bar value. */ | ||
144 | pci_write_config_dword(dev, offset, bar->val); | ||
145 | } | ||
146 | bar->which = 0; | ||
147 | } | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) | ||
153 | { | ||
154 | struct pci_bar_info *bar = data; | ||
155 | |||
156 | if (unlikely(!bar)) { | ||
157 | printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", | ||
158 | pci_name(dev)); | ||
159 | return XEN_PCI_ERR_op_failed; | ||
160 | } | ||
161 | |||
162 | *value = bar->which ? bar->len_val : bar->val; | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static inline void read_dev_bar(struct pci_dev *dev, | ||
168 | struct pci_bar_info *bar_info, int offset, | ||
169 | u32 len_mask) | ||
170 | { | ||
171 | int pos; | ||
172 | struct resource *res = dev->resource; | ||
173 | |||
174 | if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1) | ||
175 | pos = PCI_ROM_RESOURCE; | ||
176 | else { | ||
177 | pos = (offset - PCI_BASE_ADDRESS_0) / 4; | ||
178 | if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE | | ||
179 | PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == | ||
180 | (PCI_BASE_ADDRESS_SPACE_MEMORY | | ||
181 | PCI_BASE_ADDRESS_MEM_TYPE_64))) { | ||
182 | bar_info->val = res[pos - 1].start >> 32; | ||
183 | bar_info->len_val = res[pos - 1].end >> 32; | ||
184 | return; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | bar_info->val = res[pos].start | | ||
189 | (res[pos].flags & PCI_REGION_FLAG_MASK); | ||
190 | bar_info->len_val = res[pos].end - res[pos].start + 1; | ||
191 | } | ||
192 | |||
193 | static void *bar_init(struct pci_dev *dev, int offset) | ||
194 | { | ||
195 | struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); | ||
196 | |||
197 | if (!bar) | ||
198 | return ERR_PTR(-ENOMEM); | ||
199 | |||
200 | read_dev_bar(dev, bar, offset, ~0); | ||
201 | bar->which = 0; | ||
202 | |||
203 | return bar; | ||
204 | } | ||
205 | |||
206 | static void *rom_init(struct pci_dev *dev, int offset) | ||
207 | { | ||
208 | struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); | ||
209 | |||
210 | if (!bar) | ||
211 | return ERR_PTR(-ENOMEM); | ||
212 | |||
213 | read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); | ||
214 | bar->which = 0; | ||
215 | |||
216 | return bar; | ||
217 | } | ||
218 | |||
219 | static void bar_reset(struct pci_dev *dev, int offset, void *data) | ||
220 | { | ||
221 | struct pci_bar_info *bar = data; | ||
222 | |||
223 | bar->which = 0; | ||
224 | } | ||
225 | |||
226 | static void bar_release(struct pci_dev *dev, int offset, void *data) | ||
227 | { | ||
228 | kfree(data); | ||
229 | } | ||
230 | |||
231 | static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset, | ||
232 | u16 *value, void *data) | ||
233 | { | ||
234 | *value = dev->vendor; | ||
235 | |||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | static int xen_pcibk_read_device(struct pci_dev *dev, int offset, | ||
240 | u16 *value, void *data) | ||
241 | { | ||
242 | *value = dev->device; | ||
243 | |||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, | ||
248 | void *data) | ||
249 | { | ||
250 | *value = (u8) dev->irq; | ||
251 | |||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data) | ||
256 | { | ||
257 | u8 cur_value; | ||
258 | int err; | ||
259 | |||
260 | err = pci_read_config_byte(dev, offset, &cur_value); | ||
261 | if (err) | ||
262 | goto out; | ||
263 | |||
264 | if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START) | ||
265 | || value == PCI_BIST_START) | ||
266 | err = pci_write_config_byte(dev, offset, value); | ||
267 | |||
268 | out: | ||
269 | return err; | ||
270 | } | ||
271 | |||
272 | static const struct config_field header_common[] = { | ||
273 | { | ||
274 | .offset = PCI_VENDOR_ID, | ||
275 | .size = 2, | ||
276 | .u.w.read = xen_pcibk_read_vendor, | ||
277 | }, | ||
278 | { | ||
279 | .offset = PCI_DEVICE_ID, | ||
280 | .size = 2, | ||
281 | .u.w.read = xen_pcibk_read_device, | ||
282 | }, | ||
283 | { | ||
284 | .offset = PCI_COMMAND, | ||
285 | .size = 2, | ||
286 | .u.w.read = command_read, | ||
287 | .u.w.write = command_write, | ||
288 | }, | ||
289 | { | ||
290 | .offset = PCI_INTERRUPT_LINE, | ||
291 | .size = 1, | ||
292 | .u.b.read = interrupt_read, | ||
293 | }, | ||
294 | { | ||
295 | .offset = PCI_INTERRUPT_PIN, | ||
296 | .size = 1, | ||
297 | .u.b.read = xen_pcibk_read_config_byte, | ||
298 | }, | ||
299 | { | ||
300 | /* Any side effects of letting driver domain control cache line? */ | ||
301 | .offset = PCI_CACHE_LINE_SIZE, | ||
302 | .size = 1, | ||
303 | .u.b.read = xen_pcibk_read_config_byte, | ||
304 | .u.b.write = xen_pcibk_write_config_byte, | ||
305 | }, | ||
306 | { | ||
307 | .offset = PCI_LATENCY_TIMER, | ||
308 | .size = 1, | ||
309 | .u.b.read = xen_pcibk_read_config_byte, | ||
310 | }, | ||
311 | { | ||
312 | .offset = PCI_BIST, | ||
313 | .size = 1, | ||
314 | .u.b.read = xen_pcibk_read_config_byte, | ||
315 | .u.b.write = bist_write, | ||
316 | }, | ||
317 | {} | ||
318 | }; | ||
319 | |||
320 | #define CFG_FIELD_BAR(reg_offset) \ | ||
321 | { \ | ||
322 | .offset = reg_offset, \ | ||
323 | .size = 4, \ | ||
324 | .init = bar_init, \ | ||
325 | .reset = bar_reset, \ | ||
326 | .release = bar_release, \ | ||
327 | .u.dw.read = bar_read, \ | ||
328 | .u.dw.write = bar_write, \ | ||
329 | } | ||
330 | |||
331 | #define CFG_FIELD_ROM(reg_offset) \ | ||
332 | { \ | ||
333 | .offset = reg_offset, \ | ||
334 | .size = 4, \ | ||
335 | .init = rom_init, \ | ||
336 | .reset = bar_reset, \ | ||
337 | .release = bar_release, \ | ||
338 | .u.dw.read = bar_read, \ | ||
339 | .u.dw.write = rom_write, \ | ||
340 | } | ||
341 | |||
342 | static const struct config_field header_0[] = { | ||
343 | CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), | ||
344 | CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), | ||
345 | CFG_FIELD_BAR(PCI_BASE_ADDRESS_2), | ||
346 | CFG_FIELD_BAR(PCI_BASE_ADDRESS_3), | ||
347 | CFG_FIELD_BAR(PCI_BASE_ADDRESS_4), | ||
348 | CFG_FIELD_BAR(PCI_BASE_ADDRESS_5), | ||
349 | CFG_FIELD_ROM(PCI_ROM_ADDRESS), | ||
350 | {} | ||
351 | }; | ||
352 | |||
353 | static const struct config_field header_1[] = { | ||
354 | CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), | ||
355 | CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), | ||
356 | CFG_FIELD_ROM(PCI_ROM_ADDRESS1), | ||
357 | {} | ||
358 | }; | ||
359 | |||
360 | int xen_pcibk_config_header_add_fields(struct pci_dev *dev) | ||
361 | { | ||
362 | int err; | ||
363 | |||
364 | err = xen_pcibk_config_add_fields(dev, header_common); | ||
365 | if (err) | ||
366 | goto out; | ||
367 | |||
368 | switch (dev->hdr_type) { | ||
369 | case PCI_HEADER_TYPE_NORMAL: | ||
370 | err = xen_pcibk_config_add_fields(dev, header_0); | ||
371 | break; | ||
372 | |||
373 | case PCI_HEADER_TYPE_BRIDGE: | ||
374 | err = xen_pcibk_config_add_fields(dev, header_1); | ||
375 | break; | ||
376 | |||
377 | default: | ||
378 | err = -EINVAL; | ||
379 | printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n", | ||
380 | pci_name(dev), dev->hdr_type); | ||
381 | break; | ||
382 | } | ||
383 | |||
384 | out: | ||
385 | return err; | ||
386 | } | ||
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c new file mode 100644 index 000000000000..921a889e65eb --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_quirks.c | |||
@@ -0,0 +1,140 @@ | |||
1 | /* | ||
2 | * PCI Backend - Handle special overlays for broken devices. | ||
3 | * | ||
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
5 | * Author: Chris Bookholt <hap10@epoch.ncsc.mil> | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/pci.h> | ||
10 | #include "pciback.h" | ||
11 | #include "conf_space.h" | ||
12 | #include "conf_space_quirks.h" | ||
13 | |||
14 | LIST_HEAD(xen_pcibk_quirks); | ||
15 | #define DRV_NAME "xen-pciback" | ||
16 | static inline const struct pci_device_id * | ||
17 | match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) | ||
18 | { | ||
19 | if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && | ||
20 | (id->device == PCI_ANY_ID || id->device == dev->device) && | ||
21 | (id->subvendor == PCI_ANY_ID || | ||
22 | id->subvendor == dev->subsystem_vendor) && | ||
23 | (id->subdevice == PCI_ANY_ID || | ||
24 | id->subdevice == dev->subsystem_device) && | ||
25 | !((id->class ^ dev->class) & id->class_mask)) | ||
26 | return id; | ||
27 | return NULL; | ||
28 | } | ||
29 | |||
30 | static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev) | ||
31 | { | ||
32 | struct xen_pcibk_config_quirk *tmp_quirk; | ||
33 | |||
34 | list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list) | ||
35 | if (match_one_device(&tmp_quirk->devid, dev) != NULL) | ||
36 | goto out; | ||
37 | tmp_quirk = NULL; | ||
38 | printk(KERN_DEBUG DRV_NAME | ||
39 | ":quirk didn't match any device xen_pciback knows about\n"); | ||
40 | out: | ||
41 | return tmp_quirk; | ||
42 | } | ||
43 | |||
44 | static inline void register_quirk(struct xen_pcibk_config_quirk *quirk) | ||
45 | { | ||
46 | list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks); | ||
47 | } | ||
48 | |||
49 | int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg) | ||
50 | { | ||
51 | int ret = 0; | ||
52 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
53 | struct config_field_entry *cfg_entry; | ||
54 | |||
55 | list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | ||
56 | if (OFFSET(cfg_entry) == reg) { | ||
57 | ret = 1; | ||
58 | break; | ||
59 | } | ||
60 | } | ||
61 | return ret; | ||
62 | } | ||
63 | |||
64 | int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field | ||
65 | *field) | ||
66 | { | ||
67 | int err = 0; | ||
68 | |||
69 | switch (field->size) { | ||
70 | case 1: | ||
71 | field->u.b.read = xen_pcibk_read_config_byte; | ||
72 | field->u.b.write = xen_pcibk_write_config_byte; | ||
73 | break; | ||
74 | case 2: | ||
75 | field->u.w.read = xen_pcibk_read_config_word; | ||
76 | field->u.w.write = xen_pcibk_write_config_word; | ||
77 | break; | ||
78 | case 4: | ||
79 | field->u.dw.read = xen_pcibk_read_config_dword; | ||
80 | field->u.dw.write = xen_pcibk_write_config_dword; | ||
81 | break; | ||
82 | default: | ||
83 | err = -EINVAL; | ||
84 | goto out; | ||
85 | } | ||
86 | |||
87 | xen_pcibk_config_add_field(dev, field); | ||
88 | |||
89 | out: | ||
90 | return err; | ||
91 | } | ||
92 | |||
93 | int xen_pcibk_config_quirks_init(struct pci_dev *dev) | ||
94 | { | ||
95 | struct xen_pcibk_config_quirk *quirk; | ||
96 | int ret = 0; | ||
97 | |||
98 | quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); | ||
99 | if (!quirk) { | ||
100 | ret = -ENOMEM; | ||
101 | goto out; | ||
102 | } | ||
103 | |||
104 | quirk->devid.vendor = dev->vendor; | ||
105 | quirk->devid.device = dev->device; | ||
106 | quirk->devid.subvendor = dev->subsystem_vendor; | ||
107 | quirk->devid.subdevice = dev->subsystem_device; | ||
108 | quirk->devid.class = 0; | ||
109 | quirk->devid.class_mask = 0; | ||
110 | quirk->devid.driver_data = 0UL; | ||
111 | |||
112 | quirk->pdev = dev; | ||
113 | |||
114 | register_quirk(quirk); | ||
115 | out: | ||
116 | return ret; | ||
117 | } | ||
118 | |||
119 | void xen_pcibk_config_field_free(struct config_field *field) | ||
120 | { | ||
121 | kfree(field); | ||
122 | } | ||
123 | |||
124 | int xen_pcibk_config_quirk_release(struct pci_dev *dev) | ||
125 | { | ||
126 | struct xen_pcibk_config_quirk *quirk; | ||
127 | int ret = 0; | ||
128 | |||
129 | quirk = xen_pcibk_find_quirk(dev); | ||
130 | if (!quirk) { | ||
131 | ret = -ENXIO; | ||
132 | goto out; | ||
133 | } | ||
134 | |||
135 | list_del(&quirk->quirks_list); | ||
136 | kfree(quirk); | ||
137 | |||
138 | out: | ||
139 | return ret; | ||
140 | } | ||
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.h b/drivers/xen/xen-pciback/conf_space_quirks.h new file mode 100644 index 000000000000..cfcc517e4570 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_quirks.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * PCI Backend - Data structures for special overlays for broken devices. | ||
3 | * | ||
4 | * Ryan Wilson <hap9@epoch.ncsc.mil> | ||
5 | * Chris Bookholt <hap10@epoch.ncsc.mil> | ||
6 | */ | ||
7 | |||
8 | #ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ | ||
9 | #define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ | ||
10 | |||
11 | #include <linux/pci.h> | ||
12 | #include <linux/list.h> | ||
13 | |||
14 | struct xen_pcibk_config_quirk { | ||
15 | struct list_head quirks_list; | ||
16 | struct pci_device_id devid; | ||
17 | struct pci_dev *pdev; | ||
18 | }; | ||
19 | |||
20 | int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field | ||
21 | *field); | ||
22 | |||
23 | int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg); | ||
24 | |||
25 | int xen_pcibk_config_quirks_init(struct pci_dev *dev); | ||
26 | |||
27 | void xen_pcibk_config_field_free(struct config_field *field); | ||
28 | |||
29 | int xen_pcibk_config_quirk_release(struct pci_dev *dev); | ||
30 | |||
31 | int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg); | ||
32 | |||
33 | #endif | ||
diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c new file mode 100644 index 000000000000..1d32a9a42c01 --- /dev/null +++ b/drivers/xen/xen-pciback/passthrough.c | |||
@@ -0,0 +1,194 @@ | |||
1 | /* | ||
2 | * PCI Backend - Provides restricted access to the real PCI bus topology | ||
3 | * to the frontend | ||
4 | * | ||
5 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
6 | */ | ||
7 | |||
8 | #include <linux/list.h> | ||
9 | #include <linux/pci.h> | ||
10 | #include <linux/spinlock.h> | ||
11 | #include "pciback.h" | ||
12 | |||
13 | struct passthrough_dev_data { | ||
14 | /* Access to dev_list must be protected by lock */ | ||
15 | struct list_head dev_list; | ||
16 | spinlock_t lock; | ||
17 | }; | ||
18 | |||
19 | static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, | ||
20 | unsigned int domain, | ||
21 | unsigned int bus, | ||
22 | unsigned int devfn) | ||
23 | { | ||
24 | struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | ||
25 | struct pci_dev_entry *dev_entry; | ||
26 | struct pci_dev *dev = NULL; | ||
27 | unsigned long flags; | ||
28 | |||
29 | spin_lock_irqsave(&dev_data->lock, flags); | ||
30 | |||
31 | list_for_each_entry(dev_entry, &dev_data->dev_list, list) { | ||
32 | if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) | ||
33 | && bus == (unsigned int)dev_entry->dev->bus->number | ||
34 | && devfn == dev_entry->dev->devfn) { | ||
35 | dev = dev_entry->dev; | ||
36 | break; | ||
37 | } | ||
38 | } | ||
39 | |||
40 | spin_unlock_irqrestore(&dev_data->lock, flags); | ||
41 | |||
42 | return dev; | ||
43 | } | ||
44 | |||
45 | static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, | ||
46 | struct pci_dev *dev, | ||
47 | int devid, publish_pci_dev_cb publish_cb) | ||
48 | { | ||
49 | struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | ||
50 | struct pci_dev_entry *dev_entry; | ||
51 | unsigned long flags; | ||
52 | unsigned int domain, bus, devfn; | ||
53 | int err; | ||
54 | |||
55 | dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); | ||
56 | if (!dev_entry) | ||
57 | return -ENOMEM; | ||
58 | dev_entry->dev = dev; | ||
59 | |||
60 | spin_lock_irqsave(&dev_data->lock, flags); | ||
61 | list_add_tail(&dev_entry->list, &dev_data->dev_list); | ||
62 | spin_unlock_irqrestore(&dev_data->lock, flags); | ||
63 | |||
64 | /* Publish this device. */ | ||
65 | domain = (unsigned int)pci_domain_nr(dev->bus); | ||
66 | bus = (unsigned int)dev->bus->number; | ||
67 | devfn = dev->devfn; | ||
68 | err = publish_cb(pdev, domain, bus, devfn, devid); | ||
69 | |||
70 | return err; | ||
71 | } | ||
72 | |||
73 | static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, | ||
74 | struct pci_dev *dev) | ||
75 | { | ||
76 | struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | ||
77 | struct pci_dev_entry *dev_entry, *t; | ||
78 | struct pci_dev *found_dev = NULL; | ||
79 | unsigned long flags; | ||
80 | |||
81 | spin_lock_irqsave(&dev_data->lock, flags); | ||
82 | |||
83 | list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { | ||
84 | if (dev_entry->dev == dev) { | ||
85 | list_del(&dev_entry->list); | ||
86 | found_dev = dev_entry->dev; | ||
87 | kfree(dev_entry); | ||
88 | } | ||
89 | } | ||
90 | |||
91 | spin_unlock_irqrestore(&dev_data->lock, flags); | ||
92 | |||
93 | if (found_dev) | ||
94 | pcistub_put_pci_dev(found_dev); | ||
95 | } | ||
96 | |||
97 | static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) | ||
98 | { | ||
99 | struct passthrough_dev_data *dev_data; | ||
100 | |||
101 | dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); | ||
102 | if (!dev_data) | ||
103 | return -ENOMEM; | ||
104 | |||
105 | spin_lock_init(&dev_data->lock); | ||
106 | |||
107 | INIT_LIST_HEAD(&dev_data->dev_list); | ||
108 | |||
109 | pdev->pci_dev_data = dev_data; | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, | ||
115 | publish_pci_root_cb publish_root_cb) | ||
116 | { | ||
117 | int err = 0; | ||
118 | struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | ||
119 | struct pci_dev_entry *dev_entry, *e, *tmp; | ||
120 | struct pci_dev *dev; | ||
121 | int found; | ||
122 | unsigned int domain, bus; | ||
123 | |||
124 | spin_lock(&dev_data->lock); | ||
125 | |||
126 | list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) { | ||
127 | /* Only publish this device as a root if none of its | ||
128 | * parent bridges are exported | ||
129 | */ | ||
130 | found = 0; | ||
131 | dev = dev_entry->dev->bus->self; | ||
132 | for (; !found && dev != NULL; dev = dev->bus->self) { | ||
133 | list_for_each_entry(e, &dev_data->dev_list, list) { | ||
134 | if (dev == e->dev) { | ||
135 | found = 1; | ||
136 | break; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | |||
141 | domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus); | ||
142 | bus = (unsigned int)dev_entry->dev->bus->number; | ||
143 | |||
144 | if (!found) { | ||
145 | spin_unlock(&dev_data->lock); | ||
146 | err = publish_root_cb(pdev, domain, bus); | ||
147 | if (err) | ||
148 | break; | ||
149 | spin_lock(&dev_data->lock); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | if (!err) | ||
154 | spin_unlock(&dev_data->lock); | ||
155 | |||
156 | return err; | ||
157 | } | ||
158 | |||
159 | static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev) | ||
160 | { | ||
161 | struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | ||
162 | struct pci_dev_entry *dev_entry, *t; | ||
163 | |||
164 | list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { | ||
165 | list_del(&dev_entry->list); | ||
166 | pcistub_put_pci_dev(dev_entry->dev); | ||
167 | kfree(dev_entry); | ||
168 | } | ||
169 | |||
170 | kfree(dev_data); | ||
171 | pdev->pci_dev_data = NULL; | ||
172 | } | ||
173 | |||
174 | static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, | ||
175 | struct xen_pcibk_device *pdev, | ||
176 | unsigned int *domain, unsigned int *bus, | ||
177 | unsigned int *devfn) | ||
178 | { | ||
179 | *domain = pci_domain_nr(pcidev->bus); | ||
180 | *bus = pcidev->bus->number; | ||
181 | *devfn = pcidev->devfn; | ||
182 | return 1; | ||
183 | } | ||
184 | |||
185 | struct xen_pcibk_backend xen_pcibk_passthrough_backend = { | ||
186 | .name = "passthrough", | ||
187 | .init = __xen_pcibk_init_devices, | ||
188 | .free = __xen_pcibk_release_devices, | ||
189 | .find = __xen_pcibk_get_pcifront_dev, | ||
190 | .publish = __xen_pcibk_publish_pci_roots, | ||
191 | .release = __xen_pcibk_release_pci_dev, | ||
192 | .add = __xen_pcibk_add_pci_dev, | ||
193 | .get = __xen_pcibk_get_pci_dev, | ||
194 | }; | ||
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c new file mode 100644 index 000000000000..aec214ac0a14 --- /dev/null +++ b/drivers/xen/xen-pciback/pci_stub.c | |||
@@ -0,0 +1,1376 @@ | |||
1 | /* | ||
2 | * PCI Stub Driver - Grabs devices in backend to be exported later | ||
3 | * | ||
4 | * Ryan Wilson <hap9@epoch.ncsc.mil> | ||
5 | * Chris Bookholt <hap10@epoch.ncsc.mil> | ||
6 | */ | ||
7 | #include <linux/module.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/rwsem.h> | ||
10 | #include <linux/list.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/kref.h> | ||
13 | #include <linux/pci.h> | ||
14 | #include <linux/wait.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/atomic.h> | ||
17 | #include <xen/events.h> | ||
18 | #include <asm/xen/pci.h> | ||
19 | #include <asm/xen/hypervisor.h> | ||
20 | #include "pciback.h" | ||
21 | #include "conf_space.h" | ||
22 | #include "conf_space_quirks.h" | ||
23 | |||
24 | #define DRV_NAME "xen-pciback" | ||
25 | |||
26 | static char *pci_devs_to_hide; | ||
27 | wait_queue_head_t xen_pcibk_aer_wait_queue; | ||
28 | /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, | ||
29 | * We want to avoid in middle of AER ops, xen_pcibk devices is being removed | ||
30 | */ | ||
31 | static DECLARE_RWSEM(pcistub_sem); | ||
32 | module_param_named(hide, pci_devs_to_hide, charp, 0444); | ||
33 | |||
34 | struct pcistub_device_id { | ||
35 | struct list_head slot_list; | ||
36 | int domain; | ||
37 | unsigned char bus; | ||
38 | unsigned int devfn; | ||
39 | }; | ||
40 | static LIST_HEAD(pcistub_device_ids); | ||
41 | static DEFINE_SPINLOCK(device_ids_lock); | ||
42 | |||
43 | struct pcistub_device { | ||
44 | struct kref kref; | ||
45 | struct list_head dev_list; | ||
46 | spinlock_t lock; | ||
47 | |||
48 | struct pci_dev *dev; | ||
49 | struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */ | ||
50 | }; | ||
51 | |||
52 | /* Access to pcistub_devices & seized_devices lists and the initialize_devices | ||
53 | * flag must be locked with pcistub_devices_lock | ||
54 | */ | ||
55 | static DEFINE_SPINLOCK(pcistub_devices_lock); | ||
56 | static LIST_HEAD(pcistub_devices); | ||
57 | |||
58 | /* wait for device_initcall before initializing our devices | ||
59 | * (see pcistub_init_devices_late) | ||
60 | */ | ||
61 | static int initialize_devices; | ||
62 | static LIST_HEAD(seized_devices); | ||
63 | |||
64 | static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) | ||
65 | { | ||
66 | struct pcistub_device *psdev; | ||
67 | |||
68 | dev_dbg(&dev->dev, "pcistub_device_alloc\n"); | ||
69 | |||
70 | psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC); | ||
71 | if (!psdev) | ||
72 | return NULL; | ||
73 | |||
74 | psdev->dev = pci_dev_get(dev); | ||
75 | if (!psdev->dev) { | ||
76 | kfree(psdev); | ||
77 | return NULL; | ||
78 | } | ||
79 | |||
80 | kref_init(&psdev->kref); | ||
81 | spin_lock_init(&psdev->lock); | ||
82 | |||
83 | return psdev; | ||
84 | } | ||
85 | |||
86 | /* Don't call this directly as it's called by pcistub_device_put */ | ||
87 | static void pcistub_device_release(struct kref *kref) | ||
88 | { | ||
89 | struct pcistub_device *psdev; | ||
90 | |||
91 | psdev = container_of(kref, struct pcistub_device, kref); | ||
92 | |||
93 | dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); | ||
94 | |||
95 | xen_unregister_device_domain_owner(psdev->dev); | ||
96 | |||
97 | /* Clean-up the device */ | ||
98 | xen_pcibk_reset_device(psdev->dev); | ||
99 | xen_pcibk_config_free_dyn_fields(psdev->dev); | ||
100 | xen_pcibk_config_free_dev(psdev->dev); | ||
101 | kfree(pci_get_drvdata(psdev->dev)); | ||
102 | pci_set_drvdata(psdev->dev, NULL); | ||
103 | |||
104 | pci_dev_put(psdev->dev); | ||
105 | |||
106 | kfree(psdev); | ||
107 | } | ||
108 | |||
109 | static inline void pcistub_device_get(struct pcistub_device *psdev) | ||
110 | { | ||
111 | kref_get(&psdev->kref); | ||
112 | } | ||
113 | |||
114 | static inline void pcistub_device_put(struct pcistub_device *psdev) | ||
115 | { | ||
116 | kref_put(&psdev->kref, pcistub_device_release); | ||
117 | } | ||
118 | |||
119 | static struct pcistub_device *pcistub_device_find(int domain, int bus, | ||
120 | int slot, int func) | ||
121 | { | ||
122 | struct pcistub_device *psdev = NULL; | ||
123 | unsigned long flags; | ||
124 | |||
125 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
126 | |||
127 | list_for_each_entry(psdev, &pcistub_devices, dev_list) { | ||
128 | if (psdev->dev != NULL | ||
129 | && domain == pci_domain_nr(psdev->dev->bus) | ||
130 | && bus == psdev->dev->bus->number | ||
131 | && PCI_DEVFN(slot, func) == psdev->dev->devfn) { | ||
132 | pcistub_device_get(psdev); | ||
133 | goto out; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | /* didn't find it */ | ||
138 | psdev = NULL; | ||
139 | |||
140 | out: | ||
141 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
142 | return psdev; | ||
143 | } | ||
144 | |||
145 | static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev, | ||
146 | struct pcistub_device *psdev) | ||
147 | { | ||
148 | struct pci_dev *pci_dev = NULL; | ||
149 | unsigned long flags; | ||
150 | |||
151 | pcistub_device_get(psdev); | ||
152 | |||
153 | spin_lock_irqsave(&psdev->lock, flags); | ||
154 | if (!psdev->pdev) { | ||
155 | psdev->pdev = pdev; | ||
156 | pci_dev = psdev->dev; | ||
157 | } | ||
158 | spin_unlock_irqrestore(&psdev->lock, flags); | ||
159 | |||
160 | if (!pci_dev) | ||
161 | pcistub_device_put(psdev); | ||
162 | |||
163 | return pci_dev; | ||
164 | } | ||
165 | |||
166 | struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, | ||
167 | int domain, int bus, | ||
168 | int slot, int func) | ||
169 | { | ||
170 | struct pcistub_device *psdev; | ||
171 | struct pci_dev *found_dev = NULL; | ||
172 | unsigned long flags; | ||
173 | |||
174 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
175 | |||
176 | list_for_each_entry(psdev, &pcistub_devices, dev_list) { | ||
177 | if (psdev->dev != NULL | ||
178 | && domain == pci_domain_nr(psdev->dev->bus) | ||
179 | && bus == psdev->dev->bus->number | ||
180 | && PCI_DEVFN(slot, func) == psdev->dev->devfn) { | ||
181 | found_dev = pcistub_device_get_pci_dev(pdev, psdev); | ||
182 | break; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
187 | return found_dev; | ||
188 | } | ||
189 | |||
190 | struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, | ||
191 | struct pci_dev *dev) | ||
192 | { | ||
193 | struct pcistub_device *psdev; | ||
194 | struct pci_dev *found_dev = NULL; | ||
195 | unsigned long flags; | ||
196 | |||
197 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
198 | |||
199 | list_for_each_entry(psdev, &pcistub_devices, dev_list) { | ||
200 | if (psdev->dev == dev) { | ||
201 | found_dev = pcistub_device_get_pci_dev(pdev, psdev); | ||
202 | break; | ||
203 | } | ||
204 | } | ||
205 | |||
206 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
207 | return found_dev; | ||
208 | } | ||
209 | |||
210 | void pcistub_put_pci_dev(struct pci_dev *dev) | ||
211 | { | ||
212 | struct pcistub_device *psdev, *found_psdev = NULL; | ||
213 | unsigned long flags; | ||
214 | |||
215 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
216 | |||
217 | list_for_each_entry(psdev, &pcistub_devices, dev_list) { | ||
218 | if (psdev->dev == dev) { | ||
219 | found_psdev = psdev; | ||
220 | break; | ||
221 | } | ||
222 | } | ||
223 | |||
224 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
225 | |||
226 | /*hold this lock for avoiding breaking link between | ||
227 | * pcistub and xen_pcibk when AER is in processing | ||
228 | */ | ||
229 | down_write(&pcistub_sem); | ||
230 | /* Cleanup our device | ||
231 | * (so it's ready for the next domain) | ||
232 | */ | ||
233 | xen_pcibk_reset_device(found_psdev->dev); | ||
234 | xen_pcibk_config_free_dyn_fields(found_psdev->dev); | ||
235 | xen_pcibk_config_reset_dev(found_psdev->dev); | ||
236 | |||
237 | spin_lock_irqsave(&found_psdev->lock, flags); | ||
238 | found_psdev->pdev = NULL; | ||
239 | spin_unlock_irqrestore(&found_psdev->lock, flags); | ||
240 | |||
241 | pcistub_device_put(found_psdev); | ||
242 | up_write(&pcistub_sem); | ||
243 | } | ||
244 | |||
245 | static int __devinit pcistub_match_one(struct pci_dev *dev, | ||
246 | struct pcistub_device_id *pdev_id) | ||
247 | { | ||
248 | /* Match the specified device by domain, bus, slot, func and also if | ||
249 | * any of the device's parent bridges match. | ||
250 | */ | ||
251 | for (; dev != NULL; dev = dev->bus->self) { | ||
252 | if (pci_domain_nr(dev->bus) == pdev_id->domain | ||
253 | && dev->bus->number == pdev_id->bus | ||
254 | && dev->devfn == pdev_id->devfn) | ||
255 | return 1; | ||
256 | |||
257 | /* Sometimes topmost bridge links to itself. */ | ||
258 | if (dev == dev->bus->self) | ||
259 | break; | ||
260 | } | ||
261 | |||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | static int __devinit pcistub_match(struct pci_dev *dev) | ||
266 | { | ||
267 | struct pcistub_device_id *pdev_id; | ||
268 | unsigned long flags; | ||
269 | int found = 0; | ||
270 | |||
271 | spin_lock_irqsave(&device_ids_lock, flags); | ||
272 | list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) { | ||
273 | if (pcistub_match_one(dev, pdev_id)) { | ||
274 | found = 1; | ||
275 | break; | ||
276 | } | ||
277 | } | ||
278 | spin_unlock_irqrestore(&device_ids_lock, flags); | ||
279 | |||
280 | return found; | ||
281 | } | ||
282 | |||
283 | static int __devinit pcistub_init_device(struct pci_dev *dev) | ||
284 | { | ||
285 | struct xen_pcibk_dev_data *dev_data; | ||
286 | int err = 0; | ||
287 | |||
288 | dev_dbg(&dev->dev, "initializing...\n"); | ||
289 | |||
290 | /* The PCI backend is not intended to be a module (or to work with | ||
291 | * removable PCI devices (yet). If it were, xen_pcibk_config_free() | ||
292 | * would need to be called somewhere to free the memory allocated | ||
293 | * here and then to call kfree(pci_get_drvdata(psdev->dev)). | ||
294 | */ | ||
295 | dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]") | ||
296 | + strlen(pci_name(dev)) + 1, GFP_ATOMIC); | ||
297 | if (!dev_data) { | ||
298 | err = -ENOMEM; | ||
299 | goto out; | ||
300 | } | ||
301 | pci_set_drvdata(dev, dev_data); | ||
302 | |||
303 | /* | ||
304 | * Setup name for fake IRQ handler. It will only be enabled | ||
305 | * once the device is turned on by the guest. | ||
306 | */ | ||
307 | sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev)); | ||
308 | |||
309 | dev_dbg(&dev->dev, "initializing config\n"); | ||
310 | |||
311 | init_waitqueue_head(&xen_pcibk_aer_wait_queue); | ||
312 | err = xen_pcibk_config_init_dev(dev); | ||
313 | if (err) | ||
314 | goto out; | ||
315 | |||
316 | /* HACK: Force device (& ACPI) to determine what IRQ it's on - we | ||
317 | * must do this here because pcibios_enable_device may specify | ||
318 | * the pci device's true irq (and possibly its other resources) | ||
319 | * if they differ from what's in the configuration space. | ||
320 | * This makes the assumption that the device's resources won't | ||
321 | * change after this point (otherwise this code may break!) | ||
322 | */ | ||
323 | dev_dbg(&dev->dev, "enabling device\n"); | ||
324 | err = pci_enable_device(dev); | ||
325 | if (err) | ||
326 | goto config_release; | ||
327 | |||
328 | /* Now disable the device (this also ensures some private device | ||
329 | * data is setup before we export) | ||
330 | */ | ||
331 | dev_dbg(&dev->dev, "reset device\n"); | ||
332 | xen_pcibk_reset_device(dev); | ||
333 | |||
334 | return 0; | ||
335 | |||
336 | config_release: | ||
337 | xen_pcibk_config_free_dev(dev); | ||
338 | |||
339 | out: | ||
340 | pci_set_drvdata(dev, NULL); | ||
341 | kfree(dev_data); | ||
342 | return err; | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * Because some initialization still happens on | ||
347 | * devices during fs_initcall, we need to defer | ||
348 | * full initialization of our devices until | ||
349 | * device_initcall. | ||
350 | */ | ||
351 | static int __init pcistub_init_devices_late(void) | ||
352 | { | ||
353 | struct pcistub_device *psdev; | ||
354 | unsigned long flags; | ||
355 | int err = 0; | ||
356 | |||
357 | pr_debug(DRV_NAME ": pcistub_init_devices_late\n"); | ||
358 | |||
359 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
360 | |||
361 | while (!list_empty(&seized_devices)) { | ||
362 | psdev = container_of(seized_devices.next, | ||
363 | struct pcistub_device, dev_list); | ||
364 | list_del(&psdev->dev_list); | ||
365 | |||
366 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
367 | |||
368 | err = pcistub_init_device(psdev->dev); | ||
369 | if (err) { | ||
370 | dev_err(&psdev->dev->dev, | ||
371 | "error %d initializing device\n", err); | ||
372 | kfree(psdev); | ||
373 | psdev = NULL; | ||
374 | } | ||
375 | |||
376 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
377 | |||
378 | if (psdev) | ||
379 | list_add_tail(&psdev->dev_list, &pcistub_devices); | ||
380 | } | ||
381 | |||
382 | initialize_devices = 1; | ||
383 | |||
384 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | static int __devinit pcistub_seize(struct pci_dev *dev) | ||
390 | { | ||
391 | struct pcistub_device *psdev; | ||
392 | unsigned long flags; | ||
393 | int err = 0; | ||
394 | |||
395 | psdev = pcistub_device_alloc(dev); | ||
396 | if (!psdev) | ||
397 | return -ENOMEM; | ||
398 | |||
399 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
400 | |||
401 | if (initialize_devices) { | ||
402 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
403 | |||
404 | /* don't want irqs disabled when calling pcistub_init_device */ | ||
405 | err = pcistub_init_device(psdev->dev); | ||
406 | |||
407 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
408 | |||
409 | if (!err) | ||
410 | list_add(&psdev->dev_list, &pcistub_devices); | ||
411 | } else { | ||
412 | dev_dbg(&dev->dev, "deferring initialization\n"); | ||
413 | list_add(&psdev->dev_list, &seized_devices); | ||
414 | } | ||
415 | |||
416 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
417 | |||
418 | if (err) | ||
419 | pcistub_device_put(psdev); | ||
420 | |||
421 | return err; | ||
422 | } | ||
423 | |||
424 | static int __devinit pcistub_probe(struct pci_dev *dev, | ||
425 | const struct pci_device_id *id) | ||
426 | { | ||
427 | int err = 0; | ||
428 | |||
429 | dev_dbg(&dev->dev, "probing...\n"); | ||
430 | |||
431 | if (pcistub_match(dev)) { | ||
432 | |||
433 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL | ||
434 | && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { | ||
435 | dev_err(&dev->dev, "can't export pci devices that " | ||
436 | "don't have a normal (0) or bridge (1) " | ||
437 | "header type!\n"); | ||
438 | err = -ENODEV; | ||
439 | goto out; | ||
440 | } | ||
441 | |||
442 | dev_info(&dev->dev, "seizing device\n"); | ||
443 | err = pcistub_seize(dev); | ||
444 | } else | ||
445 | /* Didn't find the device */ | ||
446 | err = -ENODEV; | ||
447 | |||
448 | out: | ||
449 | return err; | ||
450 | } | ||
451 | |||
452 | static void pcistub_remove(struct pci_dev *dev) | ||
453 | { | ||
454 | struct pcistub_device *psdev, *found_psdev = NULL; | ||
455 | unsigned long flags; | ||
456 | |||
457 | dev_dbg(&dev->dev, "removing\n"); | ||
458 | |||
459 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
460 | |||
461 | xen_pcibk_config_quirk_release(dev); | ||
462 | |||
463 | list_for_each_entry(psdev, &pcistub_devices, dev_list) { | ||
464 | if (psdev->dev == dev) { | ||
465 | found_psdev = psdev; | ||
466 | break; | ||
467 | } | ||
468 | } | ||
469 | |||
470 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
471 | |||
472 | if (found_psdev) { | ||
473 | dev_dbg(&dev->dev, "found device to remove - in use? %p\n", | ||
474 | found_psdev->pdev); | ||
475 | |||
476 | if (found_psdev->pdev) { | ||
477 | printk(KERN_WARNING DRV_NAME ": ****** removing device " | ||
478 | "%s while still in-use! ******\n", | ||
479 | pci_name(found_psdev->dev)); | ||
480 | printk(KERN_WARNING DRV_NAME ": ****** driver domain may" | ||
481 | " still access this device's i/o resources!\n"); | ||
482 | printk(KERN_WARNING DRV_NAME ": ****** shutdown driver " | ||
483 | "domain before binding device\n"); | ||
484 | printk(KERN_WARNING DRV_NAME ": ****** to other drivers " | ||
485 | "or domains\n"); | ||
486 | |||
487 | xen_pcibk_release_pci_dev(found_psdev->pdev, | ||
488 | found_psdev->dev); | ||
489 | } | ||
490 | |||
491 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
492 | list_del(&found_psdev->dev_list); | ||
493 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
494 | |||
495 | /* the final put for releasing from the list */ | ||
496 | pcistub_device_put(found_psdev); | ||
497 | } | ||
498 | } | ||
499 | |||
500 | static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = { | ||
501 | { | ||
502 | .vendor = PCI_ANY_ID, | ||
503 | .device = PCI_ANY_ID, | ||
504 | .subvendor = PCI_ANY_ID, | ||
505 | .subdevice = PCI_ANY_ID, | ||
506 | }, | ||
507 | {0,}, | ||
508 | }; | ||
509 | |||
510 | #define PCI_NODENAME_MAX 40 | ||
511 | static void kill_domain_by_device(struct pcistub_device *psdev) | ||
512 | { | ||
513 | struct xenbus_transaction xbt; | ||
514 | int err; | ||
515 | char nodename[PCI_NODENAME_MAX]; | ||
516 | |||
517 | if (!psdev) | ||
518 | dev_err(&psdev->dev->dev, | ||
519 | "device is NULL when do AER recovery/kill_domain\n"); | ||
520 | snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", | ||
521 | psdev->pdev->xdev->otherend_id); | ||
522 | nodename[strlen(nodename)] = '\0'; | ||
523 | |||
524 | again: | ||
525 | err = xenbus_transaction_start(&xbt); | ||
526 | if (err) { | ||
527 | dev_err(&psdev->dev->dev, | ||
528 | "error %d when start xenbus transaction\n", err); | ||
529 | return; | ||
530 | } | ||
531 | /*PV AER handlers will set this flag*/ | ||
532 | xenbus_printf(xbt, nodename, "aerState" , "aerfail"); | ||
533 | err = xenbus_transaction_end(xbt, 0); | ||
534 | if (err) { | ||
535 | if (err == -EAGAIN) | ||
536 | goto again; | ||
537 | dev_err(&psdev->dev->dev, | ||
538 | "error %d when end xenbus transaction\n", err); | ||
539 | return; | ||
540 | } | ||
541 | } | ||
542 | |||
543 | /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and | ||
544 | * backend need to have cooperation. In xen_pcibk, those steps will do similar | ||
545 | * jobs: send service request and waiting for front_end response. | ||
546 | */ | ||
547 | static pci_ers_result_t common_process(struct pcistub_device *psdev, | ||
548 | pci_channel_state_t state, int aer_cmd, | ||
549 | pci_ers_result_t result) | ||
550 | { | ||
551 | pci_ers_result_t res = result; | ||
552 | struct xen_pcie_aer_op *aer_op; | ||
553 | int ret; | ||
554 | |||
555 | /*with PV AER drivers*/ | ||
556 | aer_op = &(psdev->pdev->sh_info->aer_op); | ||
557 | aer_op->cmd = aer_cmd ; | ||
558 | /*useful for error_detected callback*/ | ||
559 | aer_op->err = state; | ||
560 | /*pcifront_end BDF*/ | ||
561 | ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev, | ||
562 | &aer_op->domain, &aer_op->bus, &aer_op->devfn); | ||
563 | if (!ret) { | ||
564 | dev_err(&psdev->dev->dev, | ||
565 | DRV_NAME ": failed to get pcifront device\n"); | ||
566 | return PCI_ERS_RESULT_NONE; | ||
567 | } | ||
568 | wmb(); | ||
569 | |||
570 | dev_dbg(&psdev->dev->dev, | ||
571 | DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n", | ||
572 | aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn); | ||
573 | /*local flag to mark there's aer request, xen_pcibk callback will use | ||
574 | * this flag to judge whether we need to check pci-front give aer | ||
575 | * service ack signal | ||
576 | */ | ||
577 | set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); | ||
578 | |||
579 | /*It is possible that a pcifront conf_read_write ops request invokes | ||
580 | * the callback which cause the spurious execution of wake_up. | ||
581 | * Yet it is harmless and better than a spinlock here | ||
582 | */ | ||
583 | set_bit(_XEN_PCIB_active, | ||
584 | (unsigned long *)&psdev->pdev->sh_info->flags); | ||
585 | wmb(); | ||
586 | notify_remote_via_irq(psdev->pdev->evtchn_irq); | ||
587 | |||
588 | ret = wait_event_timeout(xen_pcibk_aer_wait_queue, | ||
589 | !(test_bit(_XEN_PCIB_active, (unsigned long *) | ||
590 | &psdev->pdev->sh_info->flags)), 300*HZ); | ||
591 | |||
592 | if (!ret) { | ||
593 | if (test_bit(_XEN_PCIB_active, | ||
594 | (unsigned long *)&psdev->pdev->sh_info->flags)) { | ||
595 | dev_err(&psdev->dev->dev, | ||
596 | "pcifront aer process not responding!\n"); | ||
597 | clear_bit(_XEN_PCIB_active, | ||
598 | (unsigned long *)&psdev->pdev->sh_info->flags); | ||
599 | aer_op->err = PCI_ERS_RESULT_NONE; | ||
600 | return res; | ||
601 | } | ||
602 | } | ||
603 | clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); | ||
604 | |||
605 | if (test_bit(_XEN_PCIF_active, | ||
606 | (unsigned long *)&psdev->pdev->sh_info->flags)) { | ||
607 | dev_dbg(&psdev->dev->dev, | ||
608 | "schedule pci_conf service in xen_pcibk\n"); | ||
609 | xen_pcibk_test_and_schedule_op(psdev->pdev); | ||
610 | } | ||
611 | |||
612 | res = (pci_ers_result_t)aer_op->err; | ||
613 | return res; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * xen_pcibk_slot_reset: it will send the slot_reset request to pcifront in case | ||
618 | * of the device driver could provide this service, and then wait for pcifront | ||
619 | * ack. | ||
620 | * @dev: pointer to PCI devices | ||
621 | * return value is used by aer_core do_recovery policy | ||
622 | */ | ||
623 | static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev) | ||
624 | { | ||
625 | struct pcistub_device *psdev; | ||
626 | pci_ers_result_t result; | ||
627 | |||
628 | result = PCI_ERS_RESULT_RECOVERED; | ||
629 | dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n", | ||
630 | dev->bus->number, dev->devfn); | ||
631 | |||
632 | down_write(&pcistub_sem); | ||
633 | psdev = pcistub_device_find(pci_domain_nr(dev->bus), | ||
634 | dev->bus->number, | ||
635 | PCI_SLOT(dev->devfn), | ||
636 | PCI_FUNC(dev->devfn)); | ||
637 | |||
638 | if (!psdev || !psdev->pdev) { | ||
639 | dev_err(&dev->dev, | ||
640 | DRV_NAME " device is not found/assigned\n"); | ||
641 | goto end; | ||
642 | } | ||
643 | |||
644 | if (!psdev->pdev->sh_info) { | ||
645 | dev_err(&dev->dev, DRV_NAME " device is not connected or owned" | ||
646 | " by HVM, kill it\n"); | ||
647 | kill_domain_by_device(psdev); | ||
648 | goto release; | ||
649 | } | ||
650 | |||
651 | if (!test_bit(_XEN_PCIB_AERHANDLER, | ||
652 | (unsigned long *)&psdev->pdev->sh_info->flags)) { | ||
653 | dev_err(&dev->dev, | ||
654 | "guest with no AER driver should have been killed\n"); | ||
655 | goto release; | ||
656 | } | ||
657 | result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); | ||
658 | |||
659 | if (result == PCI_ERS_RESULT_NONE || | ||
660 | result == PCI_ERS_RESULT_DISCONNECT) { | ||
661 | dev_dbg(&dev->dev, | ||
662 | "No AER slot_reset service or disconnected!\n"); | ||
663 | kill_domain_by_device(psdev); | ||
664 | } | ||
665 | release: | ||
666 | pcistub_device_put(psdev); | ||
667 | end: | ||
668 | up_write(&pcistub_sem); | ||
669 | return result; | ||
670 | |||
671 | } | ||
672 | |||
673 | |||
674 | /*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to pcifront | ||
675 | * in case of the device driver could provide this service, and then wait | ||
676 | * for pcifront ack | ||
677 | * @dev: pointer to PCI devices | ||
678 | * return value is used by aer_core do_recovery policy | ||
679 | */ | ||
680 | |||
681 | static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev) | ||
682 | { | ||
683 | struct pcistub_device *psdev; | ||
684 | pci_ers_result_t result; | ||
685 | |||
686 | result = PCI_ERS_RESULT_RECOVERED; | ||
687 | dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n", | ||
688 | dev->bus->number, dev->devfn); | ||
689 | |||
690 | down_write(&pcistub_sem); | ||
691 | psdev = pcistub_device_find(pci_domain_nr(dev->bus), | ||
692 | dev->bus->number, | ||
693 | PCI_SLOT(dev->devfn), | ||
694 | PCI_FUNC(dev->devfn)); | ||
695 | |||
696 | if (!psdev || !psdev->pdev) { | ||
697 | dev_err(&dev->dev, | ||
698 | DRV_NAME " device is not found/assigned\n"); | ||
699 | goto end; | ||
700 | } | ||
701 | |||
702 | if (!psdev->pdev->sh_info) { | ||
703 | dev_err(&dev->dev, DRV_NAME " device is not connected or owned" | ||
704 | " by HVM, kill it\n"); | ||
705 | kill_domain_by_device(psdev); | ||
706 | goto release; | ||
707 | } | ||
708 | |||
709 | if (!test_bit(_XEN_PCIB_AERHANDLER, | ||
710 | (unsigned long *)&psdev->pdev->sh_info->flags)) { | ||
711 | dev_err(&dev->dev, | ||
712 | "guest with no AER driver should have been killed\n"); | ||
713 | goto release; | ||
714 | } | ||
715 | result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); | ||
716 | |||
717 | if (result == PCI_ERS_RESULT_NONE || | ||
718 | result == PCI_ERS_RESULT_DISCONNECT) { | ||
719 | dev_dbg(&dev->dev, | ||
720 | "No AER mmio_enabled service or disconnected!\n"); | ||
721 | kill_domain_by_device(psdev); | ||
722 | } | ||
723 | release: | ||
724 | pcistub_device_put(psdev); | ||
725 | end: | ||
726 | up_write(&pcistub_sem); | ||
727 | return result; | ||
728 | } | ||
729 | |||
730 | /*xen_pcibk_error_detected: it will send the error_detected request to pcifront | ||
731 | * in case of the device driver could provide this service, and then wait | ||
732 | * for pcifront ack. | ||
733 | * @dev: pointer to PCI devices | ||
734 | * @error: the current PCI connection state | ||
735 | * return value is used by aer_core do_recovery policy | ||
736 | */ | ||
737 | |||
738 | static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, | ||
739 | pci_channel_state_t error) | ||
740 | { | ||
741 | struct pcistub_device *psdev; | ||
742 | pci_ers_result_t result; | ||
743 | |||
744 | result = PCI_ERS_RESULT_CAN_RECOVER; | ||
745 | dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n", | ||
746 | dev->bus->number, dev->devfn); | ||
747 | |||
748 | down_write(&pcistub_sem); | ||
749 | psdev = pcistub_device_find(pci_domain_nr(dev->bus), | ||
750 | dev->bus->number, | ||
751 | PCI_SLOT(dev->devfn), | ||
752 | PCI_FUNC(dev->devfn)); | ||
753 | |||
754 | if (!psdev || !psdev->pdev) { | ||
755 | dev_err(&dev->dev, | ||
756 | DRV_NAME " device is not found/assigned\n"); | ||
757 | goto end; | ||
758 | } | ||
759 | |||
760 | if (!psdev->pdev->sh_info) { | ||
761 | dev_err(&dev->dev, DRV_NAME " device is not connected or owned" | ||
762 | " by HVM, kill it\n"); | ||
763 | kill_domain_by_device(psdev); | ||
764 | goto release; | ||
765 | } | ||
766 | |||
767 | /*Guest owns the device yet no aer handler regiested, kill guest*/ | ||
768 | if (!test_bit(_XEN_PCIB_AERHANDLER, | ||
769 | (unsigned long *)&psdev->pdev->sh_info->flags)) { | ||
770 | dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); | ||
771 | kill_domain_by_device(psdev); | ||
772 | goto release; | ||
773 | } | ||
774 | result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); | ||
775 | |||
776 | if (result == PCI_ERS_RESULT_NONE || | ||
777 | result == PCI_ERS_RESULT_DISCONNECT) { | ||
778 | dev_dbg(&dev->dev, | ||
779 | "No AER error_detected service or disconnected!\n"); | ||
780 | kill_domain_by_device(psdev); | ||
781 | } | ||
782 | release: | ||
783 | pcistub_device_put(psdev); | ||
784 | end: | ||
785 | up_write(&pcistub_sem); | ||
786 | return result; | ||
787 | } | ||
788 | |||
789 | /*xen_pcibk_error_resume: it will send the error_resume request to pcifront | ||
790 | * in case of the device driver could provide this service, and then wait | ||
791 | * for pcifront ack. | ||
792 | * @dev: pointer to PCI devices | ||
793 | */ | ||
794 | |||
795 | static void xen_pcibk_error_resume(struct pci_dev *dev) | ||
796 | { | ||
797 | struct pcistub_device *psdev; | ||
798 | |||
799 | dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n", | ||
800 | dev->bus->number, dev->devfn); | ||
801 | |||
802 | down_write(&pcistub_sem); | ||
803 | psdev = pcistub_device_find(pci_domain_nr(dev->bus), | ||
804 | dev->bus->number, | ||
805 | PCI_SLOT(dev->devfn), | ||
806 | PCI_FUNC(dev->devfn)); | ||
807 | |||
808 | if (!psdev || !psdev->pdev) { | ||
809 | dev_err(&dev->dev, | ||
810 | DRV_NAME " device is not found/assigned\n"); | ||
811 | goto end; | ||
812 | } | ||
813 | |||
814 | if (!psdev->pdev->sh_info) { | ||
815 | dev_err(&dev->dev, DRV_NAME " device is not connected or owned" | ||
816 | " by HVM, kill it\n"); | ||
817 | kill_domain_by_device(psdev); | ||
818 | goto release; | ||
819 | } | ||
820 | |||
821 | if (!test_bit(_XEN_PCIB_AERHANDLER, | ||
822 | (unsigned long *)&psdev->pdev->sh_info->flags)) { | ||
823 | dev_err(&dev->dev, | ||
824 | "guest with no AER driver should have been killed\n"); | ||
825 | kill_domain_by_device(psdev); | ||
826 | goto release; | ||
827 | } | ||
828 | common_process(psdev, 1, XEN_PCI_OP_aer_resume, | ||
829 | PCI_ERS_RESULT_RECOVERED); | ||
830 | release: | ||
831 | pcistub_device_put(psdev); | ||
832 | end: | ||
833 | up_write(&pcistub_sem); | ||
834 | return; | ||
835 | } | ||
836 | |||
837 | /*add xen_pcibk AER handling*/ | ||
838 | static struct pci_error_handlers xen_pcibk_error_handler = { | ||
839 | .error_detected = xen_pcibk_error_detected, | ||
840 | .mmio_enabled = xen_pcibk_mmio_enabled, | ||
841 | .slot_reset = xen_pcibk_slot_reset, | ||
842 | .resume = xen_pcibk_error_resume, | ||
843 | }; | ||
844 | |||
845 | /* | ||
846 | * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't | ||
847 | * for a normal device. I don't want it to be loaded automatically. | ||
848 | */ | ||
849 | |||
850 | static struct pci_driver xen_pcibk_pci_driver = { | ||
851 | /* The name should be xen_pciback, but until the tools are updated | ||
852 | * we will keep it as pciback. */ | ||
853 | .name = "pciback", | ||
854 | .id_table = pcistub_ids, | ||
855 | .probe = pcistub_probe, | ||
856 | .remove = pcistub_remove, | ||
857 | .err_handler = &xen_pcibk_error_handler, | ||
858 | }; | ||
859 | |||
860 | static inline int str_to_slot(const char *buf, int *domain, int *bus, | ||
861 | int *slot, int *func) | ||
862 | { | ||
863 | int err; | ||
864 | |||
865 | err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); | ||
866 | if (err == 4) | ||
867 | return 0; | ||
868 | else if (err < 0) | ||
869 | return -EINVAL; | ||
870 | |||
871 | /* try again without domain */ | ||
872 | *domain = 0; | ||
873 | err = sscanf(buf, " %x:%x.%x", bus, slot, func); | ||
874 | if (err == 3) | ||
875 | return 0; | ||
876 | |||
877 | return -EINVAL; | ||
878 | } | ||
879 | |||
880 | static inline int str_to_quirk(const char *buf, int *domain, int *bus, int | ||
881 | *slot, int *func, int *reg, int *size, int *mask) | ||
882 | { | ||
883 | int err; | ||
884 | |||
885 | err = | ||
886 | sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot, | ||
887 | func, reg, size, mask); | ||
888 | if (err == 7) | ||
889 | return 0; | ||
890 | return -EINVAL; | ||
891 | } | ||
892 | |||
893 | static int pcistub_device_id_add(int domain, int bus, int slot, int func) | ||
894 | { | ||
895 | struct pcistub_device_id *pci_dev_id; | ||
896 | unsigned long flags; | ||
897 | |||
898 | pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); | ||
899 | if (!pci_dev_id) | ||
900 | return -ENOMEM; | ||
901 | |||
902 | pci_dev_id->domain = domain; | ||
903 | pci_dev_id->bus = bus; | ||
904 | pci_dev_id->devfn = PCI_DEVFN(slot, func); | ||
905 | |||
906 | pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%01x\n", | ||
907 | domain, bus, slot, func); | ||
908 | |||
909 | spin_lock_irqsave(&device_ids_lock, flags); | ||
910 | list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids); | ||
911 | spin_unlock_irqrestore(&device_ids_lock, flags); | ||
912 | |||
913 | return 0; | ||
914 | } | ||
915 | |||
916 | static int pcistub_device_id_remove(int domain, int bus, int slot, int func) | ||
917 | { | ||
918 | struct pcistub_device_id *pci_dev_id, *t; | ||
919 | int devfn = PCI_DEVFN(slot, func); | ||
920 | int err = -ENOENT; | ||
921 | unsigned long flags; | ||
922 | |||
923 | spin_lock_irqsave(&device_ids_lock, flags); | ||
924 | list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, | ||
925 | slot_list) { | ||
926 | if (pci_dev_id->domain == domain | ||
927 | && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { | ||
928 | /* Don't break; here because it's possible the same | ||
929 | * slot could be in the list more than once | ||
930 | */ | ||
931 | list_del(&pci_dev_id->slot_list); | ||
932 | kfree(pci_dev_id); | ||
933 | |||
934 | err = 0; | ||
935 | |||
936 | pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%01x from " | ||
937 | "seize list\n", domain, bus, slot, func); | ||
938 | } | ||
939 | } | ||
940 | spin_unlock_irqrestore(&device_ids_lock, flags); | ||
941 | |||
942 | return err; | ||
943 | } | ||
944 | |||
945 | static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, | ||
946 | int size, int mask) | ||
947 | { | ||
948 | int err = 0; | ||
949 | struct pcistub_device *psdev; | ||
950 | struct pci_dev *dev; | ||
951 | struct config_field *field; | ||
952 | |||
953 | psdev = pcistub_device_find(domain, bus, slot, func); | ||
954 | if (!psdev || !psdev->dev) { | ||
955 | err = -ENODEV; | ||
956 | goto out; | ||
957 | } | ||
958 | dev = psdev->dev; | ||
959 | |||
960 | field = kzalloc(sizeof(*field), GFP_ATOMIC); | ||
961 | if (!field) { | ||
962 | err = -ENOMEM; | ||
963 | goto out; | ||
964 | } | ||
965 | |||
966 | field->offset = reg; | ||
967 | field->size = size; | ||
968 | field->mask = mask; | ||
969 | field->init = NULL; | ||
970 | field->reset = NULL; | ||
971 | field->release = NULL; | ||
972 | field->clean = xen_pcibk_config_field_free; | ||
973 | |||
974 | err = xen_pcibk_config_quirks_add_field(dev, field); | ||
975 | if (err) | ||
976 | kfree(field); | ||
977 | out: | ||
978 | return err; | ||
979 | } | ||
980 | |||
981 | static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, | ||
982 | size_t count) | ||
983 | { | ||
984 | int domain, bus, slot, func; | ||
985 | int err; | ||
986 | |||
987 | err = str_to_slot(buf, &domain, &bus, &slot, &func); | ||
988 | if (err) | ||
989 | goto out; | ||
990 | |||
991 | err = pcistub_device_id_add(domain, bus, slot, func); | ||
992 | |||
993 | out: | ||
994 | if (!err) | ||
995 | err = count; | ||
996 | return err; | ||
997 | } | ||
998 | |||
999 | DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); | ||
1000 | |||
1001 | static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, | ||
1002 | size_t count) | ||
1003 | { | ||
1004 | int domain, bus, slot, func; | ||
1005 | int err; | ||
1006 | |||
1007 | err = str_to_slot(buf, &domain, &bus, &slot, &func); | ||
1008 | if (err) | ||
1009 | goto out; | ||
1010 | |||
1011 | err = pcistub_device_id_remove(domain, bus, slot, func); | ||
1012 | |||
1013 | out: | ||
1014 | if (!err) | ||
1015 | err = count; | ||
1016 | return err; | ||
1017 | } | ||
1018 | |||
1019 | DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); | ||
1020 | |||
1021 | static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) | ||
1022 | { | ||
1023 | struct pcistub_device_id *pci_dev_id; | ||
1024 | size_t count = 0; | ||
1025 | unsigned long flags; | ||
1026 | |||
1027 | spin_lock_irqsave(&device_ids_lock, flags); | ||
1028 | list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) { | ||
1029 | if (count >= PAGE_SIZE) | ||
1030 | break; | ||
1031 | |||
1032 | count += scnprintf(buf + count, PAGE_SIZE - count, | ||
1033 | "%04x:%02x:%02x.%01x\n", | ||
1034 | pci_dev_id->domain, pci_dev_id->bus, | ||
1035 | PCI_SLOT(pci_dev_id->devfn), | ||
1036 | PCI_FUNC(pci_dev_id->devfn)); | ||
1037 | } | ||
1038 | spin_unlock_irqrestore(&device_ids_lock, flags); | ||
1039 | |||
1040 | return count; | ||
1041 | } | ||
1042 | |||
1043 | DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); | ||
1044 | |||
1045 | static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) | ||
1046 | { | ||
1047 | struct pcistub_device *psdev; | ||
1048 | struct xen_pcibk_dev_data *dev_data; | ||
1049 | size_t count = 0; | ||
1050 | unsigned long flags; | ||
1051 | |||
1052 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
1053 | list_for_each_entry(psdev, &pcistub_devices, dev_list) { | ||
1054 | if (count >= PAGE_SIZE) | ||
1055 | break; | ||
1056 | if (!psdev->dev) | ||
1057 | continue; | ||
1058 | dev_data = pci_get_drvdata(psdev->dev); | ||
1059 | if (!dev_data) | ||
1060 | continue; | ||
1061 | count += | ||
1062 | scnprintf(buf + count, PAGE_SIZE - count, | ||
1063 | "%s:%s:%sing:%ld\n", | ||
1064 | pci_name(psdev->dev), | ||
1065 | dev_data->isr_on ? "on" : "off", | ||
1066 | dev_data->ack_intr ? "ack" : "not ack", | ||
1067 | dev_data->handled); | ||
1068 | } | ||
1069 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
1070 | return count; | ||
1071 | } | ||
1072 | |||
1073 | DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); | ||
1074 | |||
1075 | static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, | ||
1076 | const char *buf, | ||
1077 | size_t count) | ||
1078 | { | ||
1079 | struct pcistub_device *psdev; | ||
1080 | struct xen_pcibk_dev_data *dev_data; | ||
1081 | int domain, bus, slot, func; | ||
1082 | int err = -ENOENT; | ||
1083 | |||
1084 | err = str_to_slot(buf, &domain, &bus, &slot, &func); | ||
1085 | if (err) | ||
1086 | goto out; | ||
1087 | |||
1088 | psdev = pcistub_device_find(domain, bus, slot, func); | ||
1089 | |||
1090 | if (!psdev) | ||
1091 | goto out; | ||
1092 | |||
1093 | dev_data = pci_get_drvdata(psdev->dev); | ||
1094 | if (!dev_data) | ||
1095 | goto out; | ||
1096 | |||
1097 | dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n", | ||
1098 | dev_data->irq_name, dev_data->isr_on, | ||
1099 | !dev_data->isr_on); | ||
1100 | |||
1101 | dev_data->isr_on = !(dev_data->isr_on); | ||
1102 | if (dev_data->isr_on) | ||
1103 | dev_data->ack_intr = 1; | ||
1104 | out: | ||
1105 | if (!err) | ||
1106 | err = count; | ||
1107 | return err; | ||
1108 | } | ||
1109 | DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); | ||
1110 | |||
1111 | static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, | ||
1112 | size_t count) | ||
1113 | { | ||
1114 | int domain, bus, slot, func, reg, size, mask; | ||
1115 | int err; | ||
1116 | |||
1117 | err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size, | ||
1118 | &mask); | ||
1119 | if (err) | ||
1120 | goto out; | ||
1121 | |||
1122 | err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask); | ||
1123 | |||
1124 | out: | ||
1125 | if (!err) | ||
1126 | err = count; | ||
1127 | return err; | ||
1128 | } | ||
1129 | |||
1130 | static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) | ||
1131 | { | ||
1132 | int count = 0; | ||
1133 | unsigned long flags; | ||
1134 | struct xen_pcibk_config_quirk *quirk; | ||
1135 | struct xen_pcibk_dev_data *dev_data; | ||
1136 | const struct config_field *field; | ||
1137 | const struct config_field_entry *cfg_entry; | ||
1138 | |||
1139 | spin_lock_irqsave(&device_ids_lock, flags); | ||
1140 | list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) { | ||
1141 | if (count >= PAGE_SIZE) | ||
1142 | goto out; | ||
1143 | |||
1144 | count += scnprintf(buf + count, PAGE_SIZE - count, | ||
1145 | "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n", | ||
1146 | quirk->pdev->bus->number, | ||
1147 | PCI_SLOT(quirk->pdev->devfn), | ||
1148 | PCI_FUNC(quirk->pdev->devfn), | ||
1149 | quirk->devid.vendor, quirk->devid.device, | ||
1150 | quirk->devid.subvendor, | ||
1151 | quirk->devid.subdevice); | ||
1152 | |||
1153 | dev_data = pci_get_drvdata(quirk->pdev); | ||
1154 | |||
1155 | list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | ||
1156 | field = cfg_entry->field; | ||
1157 | if (count >= PAGE_SIZE) | ||
1158 | goto out; | ||
1159 | |||
1160 | count += scnprintf(buf + count, PAGE_SIZE - count, | ||
1161 | "\t\t%08x:%01x:%08x\n", | ||
1162 | cfg_entry->base_offset + | ||
1163 | field->offset, field->size, | ||
1164 | field->mask); | ||
1165 | } | ||
1166 | } | ||
1167 | |||
1168 | out: | ||
1169 | spin_unlock_irqrestore(&device_ids_lock, flags); | ||
1170 | |||
1171 | return count; | ||
1172 | } | ||
1173 | |||
1174 | DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); | ||
1175 | |||
1176 | static ssize_t permissive_add(struct device_driver *drv, const char *buf, | ||
1177 | size_t count) | ||
1178 | { | ||
1179 | int domain, bus, slot, func; | ||
1180 | int err; | ||
1181 | struct pcistub_device *psdev; | ||
1182 | struct xen_pcibk_dev_data *dev_data; | ||
1183 | err = str_to_slot(buf, &domain, &bus, &slot, &func); | ||
1184 | if (err) | ||
1185 | goto out; | ||
1186 | psdev = pcistub_device_find(domain, bus, slot, func); | ||
1187 | if (!psdev) { | ||
1188 | err = -ENODEV; | ||
1189 | goto out; | ||
1190 | } | ||
1191 | if (!psdev->dev) { | ||
1192 | err = -ENODEV; | ||
1193 | goto release; | ||
1194 | } | ||
1195 | dev_data = pci_get_drvdata(psdev->dev); | ||
1196 | /* the driver data for a device should never be null at this point */ | ||
1197 | if (!dev_data) { | ||
1198 | err = -ENXIO; | ||
1199 | goto release; | ||
1200 | } | ||
1201 | if (!dev_data->permissive) { | ||
1202 | dev_data->permissive = 1; | ||
1203 | /* Let user know that what they're doing could be unsafe */ | ||
1204 | dev_warn(&psdev->dev->dev, "enabling permissive mode " | ||
1205 | "configuration space accesses!\n"); | ||
1206 | dev_warn(&psdev->dev->dev, | ||
1207 | "permissive mode is potentially unsafe!\n"); | ||
1208 | } | ||
1209 | release: | ||
1210 | pcistub_device_put(psdev); | ||
1211 | out: | ||
1212 | if (!err) | ||
1213 | err = count; | ||
1214 | return err; | ||
1215 | } | ||
1216 | |||
1217 | static ssize_t permissive_show(struct device_driver *drv, char *buf) | ||
1218 | { | ||
1219 | struct pcistub_device *psdev; | ||
1220 | struct xen_pcibk_dev_data *dev_data; | ||
1221 | size_t count = 0; | ||
1222 | unsigned long flags; | ||
1223 | spin_lock_irqsave(&pcistub_devices_lock, flags); | ||
1224 | list_for_each_entry(psdev, &pcistub_devices, dev_list) { | ||
1225 | if (count >= PAGE_SIZE) | ||
1226 | break; | ||
1227 | if (!psdev->dev) | ||
1228 | continue; | ||
1229 | dev_data = pci_get_drvdata(psdev->dev); | ||
1230 | if (!dev_data || !dev_data->permissive) | ||
1231 | continue; | ||
1232 | count += | ||
1233 | scnprintf(buf + count, PAGE_SIZE - count, "%s\n", | ||
1234 | pci_name(psdev->dev)); | ||
1235 | } | ||
1236 | spin_unlock_irqrestore(&pcistub_devices_lock, flags); | ||
1237 | return count; | ||
1238 | } | ||
1239 | |||
1240 | DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); | ||
1241 | |||
1242 | static void pcistub_exit(void) | ||
1243 | { | ||
1244 | driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot); | ||
1245 | driver_remove_file(&xen_pcibk_pci_driver.driver, | ||
1246 | &driver_attr_remove_slot); | ||
1247 | driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots); | ||
1248 | driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks); | ||
1249 | driver_remove_file(&xen_pcibk_pci_driver.driver, | ||
1250 | &driver_attr_permissive); | ||
1251 | driver_remove_file(&xen_pcibk_pci_driver.driver, | ||
1252 | &driver_attr_irq_handlers); | ||
1253 | driver_remove_file(&xen_pcibk_pci_driver.driver, | ||
1254 | &driver_attr_irq_handler_state); | ||
1255 | pci_unregister_driver(&xen_pcibk_pci_driver); | ||
1256 | } | ||
1257 | |||
1258 | static int __init pcistub_init(void) | ||
1259 | { | ||
1260 | int pos = 0; | ||
1261 | int err = 0; | ||
1262 | int domain, bus, slot, func; | ||
1263 | int parsed; | ||
1264 | |||
1265 | if (pci_devs_to_hide && *pci_devs_to_hide) { | ||
1266 | do { | ||
1267 | parsed = 0; | ||
1268 | |||
1269 | err = sscanf(pci_devs_to_hide + pos, | ||
1270 | " (%x:%x:%x.%x) %n", | ||
1271 | &domain, &bus, &slot, &func, &parsed); | ||
1272 | if (err != 4) { | ||
1273 | domain = 0; | ||
1274 | err = sscanf(pci_devs_to_hide + pos, | ||
1275 | " (%x:%x.%x) %n", | ||
1276 | &bus, &slot, &func, &parsed); | ||
1277 | if (err != 3) | ||
1278 | goto parse_error; | ||
1279 | } | ||
1280 | |||
1281 | err = pcistub_device_id_add(domain, bus, slot, func); | ||
1282 | if (err) | ||
1283 | goto out; | ||
1284 | |||
1285 | /* if parsed<=0, we've reached the end of the string */ | ||
1286 | pos += parsed; | ||
1287 | } while (parsed > 0 && pci_devs_to_hide[pos]); | ||
1288 | } | ||
1289 | |||
1290 | /* If we're the first PCI Device Driver to register, we're the | ||
1291 | * first one to get offered PCI devices as they become | ||
1292 | * available (and thus we can be the first to grab them) | ||
1293 | */ | ||
1294 | err = pci_register_driver(&xen_pcibk_pci_driver); | ||
1295 | if (err < 0) | ||
1296 | goto out; | ||
1297 | |||
1298 | err = driver_create_file(&xen_pcibk_pci_driver.driver, | ||
1299 | &driver_attr_new_slot); | ||
1300 | if (!err) | ||
1301 | err = driver_create_file(&xen_pcibk_pci_driver.driver, | ||
1302 | &driver_attr_remove_slot); | ||
1303 | if (!err) | ||
1304 | err = driver_create_file(&xen_pcibk_pci_driver.driver, | ||
1305 | &driver_attr_slots); | ||
1306 | if (!err) | ||
1307 | err = driver_create_file(&xen_pcibk_pci_driver.driver, | ||
1308 | &driver_attr_quirks); | ||
1309 | if (!err) | ||
1310 | err = driver_create_file(&xen_pcibk_pci_driver.driver, | ||
1311 | &driver_attr_permissive); | ||
1312 | |||
1313 | if (!err) | ||
1314 | err = driver_create_file(&xen_pcibk_pci_driver.driver, | ||
1315 | &driver_attr_irq_handlers); | ||
1316 | if (!err) | ||
1317 | err = driver_create_file(&xen_pcibk_pci_driver.driver, | ||
1318 | &driver_attr_irq_handler_state); | ||
1319 | if (err) | ||
1320 | pcistub_exit(); | ||
1321 | |||
1322 | out: | ||
1323 | return err; | ||
1324 | |||
1325 | parse_error: | ||
1326 | printk(KERN_ERR DRV_NAME ": Error parsing pci_devs_to_hide at \"%s\"\n", | ||
1327 | pci_devs_to_hide + pos); | ||
1328 | return -EINVAL; | ||
1329 | } | ||
1330 | |||
1331 | #ifndef MODULE | ||
1332 | /* | ||
1333 | * fs_initcall happens before device_initcall | ||
1334 | * so xen_pcibk *should* get called first (b/c we | ||
1335 | * want to suck up any device before other drivers | ||
1336 | * get a chance by being the first pci device | ||
1337 | * driver to register) | ||
1338 | */ | ||
1339 | fs_initcall(pcistub_init); | ||
1340 | #endif | ||
1341 | |||
1342 | static int __init xen_pcibk_init(void) | ||
1343 | { | ||
1344 | int err; | ||
1345 | |||
1346 | if (!xen_initial_domain()) | ||
1347 | return -ENODEV; | ||
1348 | |||
1349 | err = xen_pcibk_config_init(); | ||
1350 | if (err) | ||
1351 | return err; | ||
1352 | |||
1353 | #ifdef MODULE | ||
1354 | err = pcistub_init(); | ||
1355 | if (err < 0) | ||
1356 | return err; | ||
1357 | #endif | ||
1358 | |||
1359 | pcistub_init_devices_late(); | ||
1360 | err = xen_pcibk_xenbus_register(); | ||
1361 | if (err) | ||
1362 | pcistub_exit(); | ||
1363 | |||
1364 | return err; | ||
1365 | } | ||
1366 | |||
1367 | static void __exit xen_pcibk_cleanup(void) | ||
1368 | { | ||
1369 | xen_pcibk_xenbus_unregister(); | ||
1370 | pcistub_exit(); | ||
1371 | } | ||
1372 | |||
1373 | module_init(xen_pcibk_init); | ||
1374 | module_exit(xen_pcibk_cleanup); | ||
1375 | |||
1376 | MODULE_LICENSE("Dual BSD/GPL"); | ||
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h new file mode 100644 index 000000000000..a0e131a81503 --- /dev/null +++ b/drivers/xen/xen-pciback/pciback.h | |||
@@ -0,0 +1,183 @@ | |||
1 | /* | ||
2 | * PCI Backend Common Data Structures & Function Declarations | ||
3 | * | ||
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
5 | */ | ||
6 | #ifndef __XEN_PCIBACK_H__ | ||
7 | #define __XEN_PCIBACK_H__ | ||
8 | |||
9 | #include <linux/pci.h> | ||
10 | #include <linux/interrupt.h> | ||
11 | #include <xen/xenbus.h> | ||
12 | #include <linux/list.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <linux/workqueue.h> | ||
15 | #include <linux/atomic.h> | ||
16 | #include <xen/interface/io/pciif.h> | ||
17 | |||
18 | struct pci_dev_entry { | ||
19 | struct list_head list; | ||
20 | struct pci_dev *dev; | ||
21 | }; | ||
22 | |||
23 | #define _PDEVF_op_active (0) | ||
24 | #define PDEVF_op_active (1<<(_PDEVF_op_active)) | ||
25 | #define _PCIB_op_pending (1) | ||
26 | #define PCIB_op_pending (1<<(_PCIB_op_pending)) | ||
27 | |||
28 | struct xen_pcibk_device { | ||
29 | void *pci_dev_data; | ||
30 | spinlock_t dev_lock; | ||
31 | struct xenbus_device *xdev; | ||
32 | struct xenbus_watch be_watch; | ||
33 | u8 be_watching; | ||
34 | int evtchn_irq; | ||
35 | struct xen_pci_sharedinfo *sh_info; | ||
36 | unsigned long flags; | ||
37 | struct work_struct op_work; | ||
38 | }; | ||
39 | |||
40 | struct xen_pcibk_dev_data { | ||
41 | struct list_head config_fields; | ||
42 | unsigned int permissive:1; | ||
43 | unsigned int warned_on_write:1; | ||
44 | unsigned int enable_intx:1; | ||
45 | unsigned int isr_on:1; /* Whether the IRQ handler is installed. */ | ||
46 | unsigned int ack_intr:1; /* .. and ACK-ing */ | ||
47 | unsigned long handled; | ||
48 | unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ | ||
49 | char irq_name[0]; /* xen-pcibk[000:04:00.0] */ | ||
50 | }; | ||
51 | |||
52 | /* Used by XenBus and xen_pcibk_ops.c */ | ||
53 | extern wait_queue_head_t xen_pcibk_aer_wait_queue; | ||
54 | extern struct workqueue_struct *xen_pcibk_wq; | ||
55 | /* Used by pcistub.c and conf_space_quirks.c */ | ||
56 | extern struct list_head xen_pcibk_quirks; | ||
57 | |||
58 | /* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ | ||
59 | struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, | ||
60 | int domain, int bus, | ||
61 | int slot, int func); | ||
62 | struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, | ||
63 | struct pci_dev *dev); | ||
64 | void pcistub_put_pci_dev(struct pci_dev *dev); | ||
65 | |||
66 | /* Ensure a device is turned off or reset */ | ||
67 | void xen_pcibk_reset_device(struct pci_dev *pdev); | ||
68 | |||
69 | /* Access a virtual configuration space for a PCI device */ | ||
70 | int xen_pcibk_config_init(void); | ||
71 | int xen_pcibk_config_init_dev(struct pci_dev *dev); | ||
72 | void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev); | ||
73 | void xen_pcibk_config_reset_dev(struct pci_dev *dev); | ||
74 | void xen_pcibk_config_free_dev(struct pci_dev *dev); | ||
75 | int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, | ||
76 | u32 *ret_val); | ||
77 | int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, | ||
78 | u32 value); | ||
79 | |||
80 | /* Handle requests for specific devices from the frontend */ | ||
81 | typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev, | ||
82 | unsigned int domain, unsigned int bus, | ||
83 | unsigned int devfn, unsigned int devid); | ||
84 | typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev, | ||
85 | unsigned int domain, unsigned int bus); | ||
86 | |||
87 | /* Backend registration for the two types of BDF representation: | ||
88 | * vpci - BDFs start at 00 | ||
89 | * passthrough - BDFs are exactly like in the host. | ||
90 | */ | ||
91 | struct xen_pcibk_backend { | ||
92 | char *name; | ||
93 | int (*init)(struct xen_pcibk_device *pdev); | ||
94 | void (*free)(struct xen_pcibk_device *pdev); | ||
95 | int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, | ||
96 | unsigned int *domain, unsigned int *bus, | ||
97 | unsigned int *devfn); | ||
98 | int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb); | ||
99 | void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev); | ||
100 | int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev, | ||
101 | int devid, publish_pci_dev_cb publish_cb); | ||
102 | struct pci_dev *(*get)(struct xen_pcibk_device *pdev, | ||
103 | unsigned int domain, unsigned int bus, | ||
104 | unsigned int devfn); | ||
105 | }; | ||
106 | |||
107 | extern struct xen_pcibk_backend xen_pcibk_vpci_backend; | ||
108 | extern struct xen_pcibk_backend xen_pcibk_passthrough_backend; | ||
109 | extern struct xen_pcibk_backend *xen_pcibk_backend; | ||
110 | |||
111 | static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, | ||
112 | struct pci_dev *dev, | ||
113 | int devid, | ||
114 | publish_pci_dev_cb publish_cb) | ||
115 | { | ||
116 | if (xen_pcibk_backend && xen_pcibk_backend->add) | ||
117 | return xen_pcibk_backend->add(pdev, dev, devid, publish_cb); | ||
118 | return -1; | ||
119 | }; | ||
120 | static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, | ||
121 | struct pci_dev *dev) | ||
122 | { | ||
123 | if (xen_pcibk_backend && xen_pcibk_backend->free) | ||
124 | return xen_pcibk_backend->release(pdev, dev); | ||
125 | }; | ||
126 | |||
127 | static inline struct pci_dev * | ||
128 | xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, | ||
129 | unsigned int bus, unsigned int devfn) | ||
130 | { | ||
131 | if (xen_pcibk_backend && xen_pcibk_backend->get) | ||
132 | return xen_pcibk_backend->get(pdev, domain, bus, devfn); | ||
133 | return NULL; | ||
134 | }; | ||
135 | /** | ||
136 | * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk | ||
137 | * before sending aer request to pcifront, so that guest could identify | ||
138 | * device, coopearte with xen_pcibk to finish aer recovery job if device driver | ||
139 | * has the capability | ||
140 | */ | ||
141 | static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, | ||
142 | struct xen_pcibk_device *pdev, | ||
143 | unsigned int *domain, | ||
144 | unsigned int *bus, | ||
145 | unsigned int *devfn) | ||
146 | { | ||
147 | if (xen_pcibk_backend && xen_pcibk_backend->find) | ||
148 | return xen_pcibk_backend->find(pcidev, pdev, domain, bus, | ||
149 | devfn); | ||
150 | return -1; | ||
151 | }; | ||
152 | static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) | ||
153 | { | ||
154 | if (xen_pcibk_backend && xen_pcibk_backend->init) | ||
155 | return xen_pcibk_backend->init(pdev); | ||
156 | return -1; | ||
157 | }; | ||
158 | static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, | ||
159 | publish_pci_root_cb cb) | ||
160 | { | ||
161 | if (xen_pcibk_backend && xen_pcibk_backend->publish) | ||
162 | return xen_pcibk_backend->publish(pdev, cb); | ||
163 | return -1; | ||
164 | }; | ||
165 | static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) | ||
166 | { | ||
167 | if (xen_pcibk_backend && xen_pcibk_backend->free) | ||
168 | return xen_pcibk_backend->free(pdev); | ||
169 | }; | ||
170 | /* Handles events from front-end */ | ||
171 | irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); | ||
172 | void xen_pcibk_do_op(struct work_struct *data); | ||
173 | |||
174 | int xen_pcibk_xenbus_register(void); | ||
175 | void xen_pcibk_xenbus_unregister(void); | ||
176 | |||
177 | extern int verbose_request; | ||
178 | |||
179 | void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); | ||
180 | #endif | ||
181 | |||
182 | /* Handles shared IRQs that can to device domain and control domain. */ | ||
183 | void xen_pcibk_irq_handler(struct pci_dev *dev, int reset); | ||
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c new file mode 100644 index 000000000000..8c95c3415b75 --- /dev/null +++ b/drivers/xen/xen-pciback/pciback_ops.c | |||
@@ -0,0 +1,384 @@ | |||
1 | /* | ||
2 | * PCI Backend Operations - respond to PCI requests from Frontend | ||
3 | * | ||
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
5 | */ | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/wait.h> | ||
8 | #include <linux/bitops.h> | ||
9 | #include <xen/events.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include "pciback.h" | ||
12 | |||
13 | #define DRV_NAME "xen-pciback" | ||
14 | int verbose_request; | ||
15 | module_param(verbose_request, int, 0644); | ||
16 | |||
17 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); | ||
18 | |||
19 | /* Ensure a device is has the fake IRQ handler "turned on/off" and is | ||
20 | * ready to be exported. This MUST be run after xen_pcibk_reset_device | ||
21 | * which does the actual PCI device enable/disable. | ||
22 | */ | ||
23 | static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) | ||
24 | { | ||
25 | struct xen_pcibk_dev_data *dev_data; | ||
26 | int rc; | ||
27 | int enable = 0; | ||
28 | |||
29 | dev_data = pci_get_drvdata(dev); | ||
30 | if (!dev_data) | ||
31 | return; | ||
32 | |||
33 | /* We don't deal with bridges */ | ||
34 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) | ||
35 | return; | ||
36 | |||
37 | if (reset) { | ||
38 | dev_data->enable_intx = 0; | ||
39 | dev_data->ack_intr = 0; | ||
40 | } | ||
41 | enable = dev_data->enable_intx; | ||
42 | |||
43 | /* Asked to disable, but ISR isn't runnig */ | ||
44 | if (!enable && !dev_data->isr_on) | ||
45 | return; | ||
46 | |||
47 | /* Squirrel away the IRQs in the dev_data. We need this | ||
48 | * b/c when device transitions to MSI, the dev->irq is | ||
49 | * overwritten with the MSI vector. | ||
50 | */ | ||
51 | if (enable) | ||
52 | dev_data->irq = dev->irq; | ||
53 | |||
54 | /* | ||
55 | * SR-IOV devices in all use MSI-X and have no legacy | ||
56 | * interrupts, so inhibit creating a fake IRQ handler for them. | ||
57 | */ | ||
58 | if (dev_data->irq == 0) | ||
59 | goto out; | ||
60 | |||
61 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", | ||
62 | dev_data->irq_name, | ||
63 | dev_data->irq, | ||
64 | pci_is_enabled(dev) ? "on" : "off", | ||
65 | dev->msi_enabled ? "MSI" : "", | ||
66 | dev->msix_enabled ? "MSI/X" : "", | ||
67 | dev_data->isr_on ? "enable" : "disable", | ||
68 | enable ? "enable" : "disable"); | ||
69 | |||
70 | if (enable) { | ||
71 | rc = request_irq(dev_data->irq, | ||
72 | xen_pcibk_guest_interrupt, IRQF_SHARED, | ||
73 | dev_data->irq_name, dev); | ||
74 | if (rc) { | ||
75 | dev_err(&dev->dev, "%s: failed to install fake IRQ " \ | ||
76 | "handler for IRQ %d! (rc:%d)\n", | ||
77 | dev_data->irq_name, dev_data->irq, rc); | ||
78 | goto out; | ||
79 | } | ||
80 | } else { | ||
81 | free_irq(dev_data->irq, dev); | ||
82 | dev_data->irq = 0; | ||
83 | } | ||
84 | dev_data->isr_on = enable; | ||
85 | dev_data->ack_intr = enable; | ||
86 | out: | ||
87 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", | ||
88 | dev_data->irq_name, | ||
89 | dev_data->irq, | ||
90 | pci_is_enabled(dev) ? "on" : "off", | ||
91 | dev->msi_enabled ? "MSI" : "", | ||
92 | dev->msix_enabled ? "MSI/X" : "", | ||
93 | enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : | ||
94 | (dev_data->isr_on ? "failed to disable" : "disabled")); | ||
95 | } | ||
96 | |||
97 | /* Ensure a device is "turned off" and ready to be exported. | ||
98 | * (Also see xen_pcibk_config_reset to ensure virtual configuration space is | ||
99 | * ready to be re-exported) | ||
100 | */ | ||
101 | void xen_pcibk_reset_device(struct pci_dev *dev) | ||
102 | { | ||
103 | u16 cmd; | ||
104 | |||
105 | xen_pcibk_control_isr(dev, 1 /* reset device */); | ||
106 | |||
107 | /* Disable devices (but not bridges) */ | ||
108 | if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { | ||
109 | #ifdef CONFIG_PCI_MSI | ||
110 | /* The guest could have been abruptly killed without | ||
111 | * disabling MSI/MSI-X interrupts.*/ | ||
112 | if (dev->msix_enabled) | ||
113 | pci_disable_msix(dev); | ||
114 | if (dev->msi_enabled) | ||
115 | pci_disable_msi(dev); | ||
116 | #endif | ||
117 | pci_disable_device(dev); | ||
118 | |||
119 | pci_write_config_word(dev, PCI_COMMAND, 0); | ||
120 | |||
121 | dev->is_busmaster = 0; | ||
122 | } else { | ||
123 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | ||
124 | if (cmd & (PCI_COMMAND_INVALIDATE)) { | ||
125 | cmd &= ~(PCI_COMMAND_INVALIDATE); | ||
126 | pci_write_config_word(dev, PCI_COMMAND, cmd); | ||
127 | |||
128 | dev->is_busmaster = 0; | ||
129 | } | ||
130 | } | ||
131 | } | ||
132 | |||
133 | #ifdef CONFIG_PCI_MSI | ||
134 | static | ||
135 | int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, | ||
136 | struct pci_dev *dev, struct xen_pci_op *op) | ||
137 | { | ||
138 | struct xen_pcibk_dev_data *dev_data; | ||
139 | int otherend = pdev->xdev->otherend_id; | ||
140 | int status; | ||
141 | |||
142 | if (unlikely(verbose_request)) | ||
143 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); | ||
144 | |||
145 | status = pci_enable_msi(dev); | ||
146 | |||
147 | if (status) { | ||
148 | printk(KERN_ERR "error enable msi for guest %x status %x\n", | ||
149 | otherend, status); | ||
150 | op->value = 0; | ||
151 | return XEN_PCI_ERR_op_failed; | ||
152 | } | ||
153 | |||
154 | /* The value the guest needs is actually the IDT vector, not the | ||
155 | * the local domain's IRQ number. */ | ||
156 | |||
157 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | ||
158 | if (unlikely(verbose_request)) | ||
159 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | ||
160 | op->value); | ||
161 | |||
162 | dev_data = pci_get_drvdata(dev); | ||
163 | if (dev_data) | ||
164 | dev_data->ack_intr = 0; | ||
165 | |||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | static | ||
170 | int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, | ||
171 | struct pci_dev *dev, struct xen_pci_op *op) | ||
172 | { | ||
173 | struct xen_pcibk_dev_data *dev_data; | ||
174 | |||
175 | if (unlikely(verbose_request)) | ||
176 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", | ||
177 | pci_name(dev)); | ||
178 | pci_disable_msi(dev); | ||
179 | |||
180 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | ||
181 | if (unlikely(verbose_request)) | ||
182 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | ||
183 | op->value); | ||
184 | dev_data = pci_get_drvdata(dev); | ||
185 | if (dev_data) | ||
186 | dev_data->ack_intr = 1; | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static | ||
191 | int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, | ||
192 | struct pci_dev *dev, struct xen_pci_op *op) | ||
193 | { | ||
194 | struct xen_pcibk_dev_data *dev_data; | ||
195 | int i, result; | ||
196 | struct msix_entry *entries; | ||
197 | |||
198 | if (unlikely(verbose_request)) | ||
199 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", | ||
200 | pci_name(dev)); | ||
201 | if (op->value > SH_INFO_MAX_VEC) | ||
202 | return -EINVAL; | ||
203 | |||
204 | entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); | ||
205 | if (entries == NULL) | ||
206 | return -ENOMEM; | ||
207 | |||
208 | for (i = 0; i < op->value; i++) { | ||
209 | entries[i].entry = op->msix_entries[i].entry; | ||
210 | entries[i].vector = op->msix_entries[i].vector; | ||
211 | } | ||
212 | |||
213 | result = pci_enable_msix(dev, entries, op->value); | ||
214 | |||
215 | if (result == 0) { | ||
216 | for (i = 0; i < op->value; i++) { | ||
217 | op->msix_entries[i].entry = entries[i].entry; | ||
218 | if (entries[i].vector) | ||
219 | op->msix_entries[i].vector = | ||
220 | xen_pirq_from_irq(entries[i].vector); | ||
221 | if (unlikely(verbose_request)) | ||
222 | printk(KERN_DEBUG DRV_NAME ": %s: " \ | ||
223 | "MSI-X[%d]: %d\n", | ||
224 | pci_name(dev), i, | ||
225 | op->msix_entries[i].vector); | ||
226 | } | ||
227 | } else { | ||
228 | printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n", | ||
229 | pci_name(dev), result); | ||
230 | } | ||
231 | kfree(entries); | ||
232 | |||
233 | op->value = result; | ||
234 | dev_data = pci_get_drvdata(dev); | ||
235 | if (dev_data) | ||
236 | dev_data->ack_intr = 0; | ||
237 | |||
238 | return result; | ||
239 | } | ||
240 | |||
241 | static | ||
242 | int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, | ||
243 | struct pci_dev *dev, struct xen_pci_op *op) | ||
244 | { | ||
245 | struct xen_pcibk_dev_data *dev_data; | ||
246 | if (unlikely(verbose_request)) | ||
247 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", | ||
248 | pci_name(dev)); | ||
249 | pci_disable_msix(dev); | ||
250 | |||
251 | /* | ||
252 | * SR-IOV devices (which don't have any legacy IRQ) have | ||
253 | * an undefined IRQ value of zero. | ||
254 | */ | ||
255 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | ||
256 | if (unlikely(verbose_request)) | ||
257 | printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), | ||
258 | op->value); | ||
259 | dev_data = pci_get_drvdata(dev); | ||
260 | if (dev_data) | ||
261 | dev_data->ack_intr = 1; | ||
262 | return 0; | ||
263 | } | ||
264 | #endif | ||
265 | /* | ||
266 | * Now the same evtchn is used for both pcifront conf_read_write request | ||
267 | * as well as pcie aer front end ack. We use a new work_queue to schedule | ||
268 | * xen_pcibk conf_read_write service for avoiding confict with aer_core | ||
269 | * do_recovery job which also use the system default work_queue | ||
270 | */ | ||
271 | void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) | ||
272 | { | ||
273 | /* Check that frontend is requesting an operation and that we are not | ||
274 | * already processing a request */ | ||
275 | if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) | ||
276 | && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { | ||
277 | queue_work(xen_pcibk_wq, &pdev->op_work); | ||
278 | } | ||
279 | /*_XEN_PCIB_active should have been cleared by pcifront. And also make | ||
280 | sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ | ||
281 | if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) | ||
282 | && test_bit(_PCIB_op_pending, &pdev->flags)) { | ||
283 | wake_up(&xen_pcibk_aer_wait_queue); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | /* Performing the configuration space reads/writes must not be done in atomic | ||
288 | * context because some of the pci_* functions can sleep (mostly due to ACPI | ||
289 | * use of semaphores). This function is intended to be called from a work | ||
290 | * queue in process context taking a struct xen_pcibk_device as a parameter */ | ||
291 | |||
292 | void xen_pcibk_do_op(struct work_struct *data) | ||
293 | { | ||
294 | struct xen_pcibk_device *pdev = | ||
295 | container_of(data, struct xen_pcibk_device, op_work); | ||
296 | struct pci_dev *dev; | ||
297 | struct xen_pcibk_dev_data *dev_data = NULL; | ||
298 | struct xen_pci_op *op = &pdev->sh_info->op; | ||
299 | int test_intx = 0; | ||
300 | |||
301 | dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); | ||
302 | |||
303 | if (dev == NULL) | ||
304 | op->err = XEN_PCI_ERR_dev_not_found; | ||
305 | else { | ||
306 | dev_data = pci_get_drvdata(dev); | ||
307 | if (dev_data) | ||
308 | test_intx = dev_data->enable_intx; | ||
309 | switch (op->cmd) { | ||
310 | case XEN_PCI_OP_conf_read: | ||
311 | op->err = xen_pcibk_config_read(dev, | ||
312 | op->offset, op->size, &op->value); | ||
313 | break; | ||
314 | case XEN_PCI_OP_conf_write: | ||
315 | op->err = xen_pcibk_config_write(dev, | ||
316 | op->offset, op->size, op->value); | ||
317 | break; | ||
318 | #ifdef CONFIG_PCI_MSI | ||
319 | case XEN_PCI_OP_enable_msi: | ||
320 | op->err = xen_pcibk_enable_msi(pdev, dev, op); | ||
321 | break; | ||
322 | case XEN_PCI_OP_disable_msi: | ||
323 | op->err = xen_pcibk_disable_msi(pdev, dev, op); | ||
324 | break; | ||
325 | case XEN_PCI_OP_enable_msix: | ||
326 | op->err = xen_pcibk_enable_msix(pdev, dev, op); | ||
327 | break; | ||
328 | case XEN_PCI_OP_disable_msix: | ||
329 | op->err = xen_pcibk_disable_msix(pdev, dev, op); | ||
330 | break; | ||
331 | #endif | ||
332 | default: | ||
333 | op->err = XEN_PCI_ERR_not_implemented; | ||
334 | break; | ||
335 | } | ||
336 | } | ||
337 | if (!op->err && dev && dev_data) { | ||
338 | /* Transition detected */ | ||
339 | if ((dev_data->enable_intx != test_intx)) | ||
340 | xen_pcibk_control_isr(dev, 0 /* no reset */); | ||
341 | } | ||
342 | /* Tell the driver domain that we're done. */ | ||
343 | wmb(); | ||
344 | clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | ||
345 | notify_remote_via_irq(pdev->evtchn_irq); | ||
346 | |||
347 | /* Mark that we're done. */ | ||
348 | smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ | ||
349 | clear_bit(_PDEVF_op_active, &pdev->flags); | ||
350 | smp_mb__after_clear_bit(); /* /before/ final check for work */ | ||
351 | |||
352 | /* Check to see if the driver domain tried to start another request in | ||
353 | * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. | ||
354 | */ | ||
355 | xen_pcibk_test_and_schedule_op(pdev); | ||
356 | } | ||
357 | |||
358 | irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) | ||
359 | { | ||
360 | struct xen_pcibk_device *pdev = dev_id; | ||
361 | |||
362 | xen_pcibk_test_and_schedule_op(pdev); | ||
363 | |||
364 | return IRQ_HANDLED; | ||
365 | } | ||
366 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) | ||
367 | { | ||
368 | struct pci_dev *dev = (struct pci_dev *)dev_id; | ||
369 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); | ||
370 | |||
371 | if (dev_data->isr_on && dev_data->ack_intr) { | ||
372 | dev_data->handled++; | ||
373 | if ((dev_data->handled % 1000) == 0) { | ||
374 | if (xen_test_irq_shared(irq)) { | ||
375 | printk(KERN_INFO "%s IRQ line is not shared " | ||
376 | "with other domains. Turning ISR off\n", | ||
377 | dev_data->irq_name); | ||
378 | dev_data->ack_intr = 0; | ||
379 | } | ||
380 | } | ||
381 | return IRQ_HANDLED; | ||
382 | } | ||
383 | return IRQ_NONE; | ||
384 | } | ||
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c new file mode 100644 index 000000000000..4a42cfb0959d --- /dev/null +++ b/drivers/xen/xen-pciback/vpci.c | |||
@@ -0,0 +1,259 @@ | |||
1 | /* | ||
2 | * PCI Backend - Provides a Virtual PCI bus (with real devices) | ||
3 | * to the frontend | ||
4 | * | ||
5 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
6 | */ | ||
7 | |||
8 | #include <linux/list.h> | ||
9 | #include <linux/slab.h> | ||
10 | #include <linux/pci.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include "pciback.h" | ||
13 | |||
14 | #define PCI_SLOT_MAX 32 | ||
15 | #define DRV_NAME "xen-pciback" | ||
16 | |||
17 | struct vpci_dev_data { | ||
18 | /* Access to dev_list must be protected by lock */ | ||
19 | struct list_head dev_list[PCI_SLOT_MAX]; | ||
20 | spinlock_t lock; | ||
21 | }; | ||
22 | |||
23 | static inline struct list_head *list_first(struct list_head *head) | ||
24 | { | ||
25 | return head->next; | ||
26 | } | ||
27 | |||
28 | static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, | ||
29 | unsigned int domain, | ||
30 | unsigned int bus, | ||
31 | unsigned int devfn) | ||
32 | { | ||
33 | struct pci_dev_entry *entry; | ||
34 | struct pci_dev *dev = NULL; | ||
35 | struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | ||
36 | unsigned long flags; | ||
37 | |||
38 | if (domain != 0 || bus != 0) | ||
39 | return NULL; | ||
40 | |||
41 | if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { | ||
42 | spin_lock_irqsave(&vpci_dev->lock, flags); | ||
43 | |||
44 | list_for_each_entry(entry, | ||
45 | &vpci_dev->dev_list[PCI_SLOT(devfn)], | ||
46 | list) { | ||
47 | if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) { | ||
48 | dev = entry->dev; | ||
49 | break; | ||
50 | } | ||
51 | } | ||
52 | |||
53 | spin_unlock_irqrestore(&vpci_dev->lock, flags); | ||
54 | } | ||
55 | return dev; | ||
56 | } | ||
57 | |||
58 | static inline int match_slot(struct pci_dev *l, struct pci_dev *r) | ||
59 | { | ||
60 | if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus) | ||
61 | && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn)) | ||
62 | return 1; | ||
63 | |||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, | ||
68 | struct pci_dev *dev, int devid, | ||
69 | publish_pci_dev_cb publish_cb) | ||
70 | { | ||
71 | int err = 0, slot, func = -1; | ||
72 | struct pci_dev_entry *t, *dev_entry; | ||
73 | struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | ||
74 | unsigned long flags; | ||
75 | |||
76 | if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { | ||
77 | err = -EFAULT; | ||
78 | xenbus_dev_fatal(pdev->xdev, err, | ||
79 | "Can't export bridges on the virtual PCI bus"); | ||
80 | goto out; | ||
81 | } | ||
82 | |||
83 | dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); | ||
84 | if (!dev_entry) { | ||
85 | err = -ENOMEM; | ||
86 | xenbus_dev_fatal(pdev->xdev, err, | ||
87 | "Error adding entry to virtual PCI bus"); | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | dev_entry->dev = dev; | ||
92 | |||
93 | spin_lock_irqsave(&vpci_dev->lock, flags); | ||
94 | |||
95 | /* Keep multi-function devices together on the virtual PCI bus */ | ||
96 | for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | ||
97 | if (!list_empty(&vpci_dev->dev_list[slot])) { | ||
98 | t = list_entry(list_first(&vpci_dev->dev_list[slot]), | ||
99 | struct pci_dev_entry, list); | ||
100 | |||
101 | if (match_slot(dev, t->dev)) { | ||
102 | pr_info(DRV_NAME ": vpci: %s: " | ||
103 | "assign to virtual slot %d func %d\n", | ||
104 | pci_name(dev), slot, | ||
105 | PCI_FUNC(dev->devfn)); | ||
106 | list_add_tail(&dev_entry->list, | ||
107 | &vpci_dev->dev_list[slot]); | ||
108 | func = PCI_FUNC(dev->devfn); | ||
109 | goto unlock; | ||
110 | } | ||
111 | } | ||
112 | } | ||
113 | |||
114 | /* Assign to a new slot on the virtual PCI bus */ | ||
115 | for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | ||
116 | if (list_empty(&vpci_dev->dev_list[slot])) { | ||
117 | printk(KERN_INFO DRV_NAME | ||
118 | ": vpci: %s: assign to virtual slot %d\n", | ||
119 | pci_name(dev), slot); | ||
120 | list_add_tail(&dev_entry->list, | ||
121 | &vpci_dev->dev_list[slot]); | ||
122 | func = PCI_FUNC(dev->devfn); | ||
123 | goto unlock; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | err = -ENOMEM; | ||
128 | xenbus_dev_fatal(pdev->xdev, err, | ||
129 | "No more space on root virtual PCI bus"); | ||
130 | |||
131 | unlock: | ||
132 | spin_unlock_irqrestore(&vpci_dev->lock, flags); | ||
133 | |||
134 | /* Publish this device. */ | ||
135 | if (!err) | ||
136 | err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); | ||
137 | |||
138 | out: | ||
139 | return err; | ||
140 | } | ||
141 | |||
142 | static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, | ||
143 | struct pci_dev *dev) | ||
144 | { | ||
145 | int slot; | ||
146 | struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | ||
147 | struct pci_dev *found_dev = NULL; | ||
148 | unsigned long flags; | ||
149 | |||
150 | spin_lock_irqsave(&vpci_dev->lock, flags); | ||
151 | |||
152 | for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | ||
153 | struct pci_dev_entry *e, *tmp; | ||
154 | list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], | ||
155 | list) { | ||
156 | if (e->dev == dev) { | ||
157 | list_del(&e->list); | ||
158 | found_dev = e->dev; | ||
159 | kfree(e); | ||
160 | goto out; | ||
161 | } | ||
162 | } | ||
163 | } | ||
164 | |||
165 | out: | ||
166 | spin_unlock_irqrestore(&vpci_dev->lock, flags); | ||
167 | |||
168 | if (found_dev) | ||
169 | pcistub_put_pci_dev(found_dev); | ||
170 | } | ||
171 | |||
172 | static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) | ||
173 | { | ||
174 | int slot; | ||
175 | struct vpci_dev_data *vpci_dev; | ||
176 | |||
177 | vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL); | ||
178 | if (!vpci_dev) | ||
179 | return -ENOMEM; | ||
180 | |||
181 | spin_lock_init(&vpci_dev->lock); | ||
182 | |||
183 | for (slot = 0; slot < PCI_SLOT_MAX; slot++) | ||
184 | INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); | ||
185 | |||
186 | pdev->pci_dev_data = vpci_dev; | ||
187 | |||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, | ||
192 | publish_pci_root_cb publish_cb) | ||
193 | { | ||
194 | /* The Virtual PCI bus has only one root */ | ||
195 | return publish_cb(pdev, 0, 0); | ||
196 | } | ||
197 | |||
198 | static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev) | ||
199 | { | ||
200 | int slot; | ||
201 | struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | ||
202 | |||
203 | for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | ||
204 | struct pci_dev_entry *e, *tmp; | ||
205 | list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], | ||
206 | list) { | ||
207 | list_del(&e->list); | ||
208 | pcistub_put_pci_dev(e->dev); | ||
209 | kfree(e); | ||
210 | } | ||
211 | } | ||
212 | |||
213 | kfree(vpci_dev); | ||
214 | pdev->pci_dev_data = NULL; | ||
215 | } | ||
216 | |||
217 | static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, | ||
218 | struct xen_pcibk_device *pdev, | ||
219 | unsigned int *domain, unsigned int *bus, | ||
220 | unsigned int *devfn) | ||
221 | { | ||
222 | struct pci_dev_entry *entry; | ||
223 | struct pci_dev *dev = NULL; | ||
224 | struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | ||
225 | unsigned long flags; | ||
226 | int found = 0, slot; | ||
227 | |||
228 | spin_lock_irqsave(&vpci_dev->lock, flags); | ||
229 | for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | ||
230 | list_for_each_entry(entry, | ||
231 | &vpci_dev->dev_list[slot], | ||
232 | list) { | ||
233 | dev = entry->dev; | ||
234 | if (dev && dev->bus->number == pcidev->bus->number | ||
235 | && pci_domain_nr(dev->bus) == | ||
236 | pci_domain_nr(pcidev->bus) | ||
237 | && dev->devfn == pcidev->devfn) { | ||
238 | found = 1; | ||
239 | *domain = 0; | ||
240 | *bus = 0; | ||
241 | *devfn = PCI_DEVFN(slot, | ||
242 | PCI_FUNC(pcidev->devfn)); | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | spin_unlock_irqrestore(&vpci_dev->lock, flags); | ||
247 | return found; | ||
248 | } | ||
249 | |||
250 | struct xen_pcibk_backend xen_pcibk_vpci_backend = { | ||
251 | .name = "vpci", | ||
252 | .init = __xen_pcibk_init_devices, | ||
253 | .free = __xen_pcibk_release_devices, | ||
254 | .find = __xen_pcibk_get_pcifront_dev, | ||
255 | .publish = __xen_pcibk_publish_pci_roots, | ||
256 | .release = __xen_pcibk_release_pci_dev, | ||
257 | .add = __xen_pcibk_add_pci_dev, | ||
258 | .get = __xen_pcibk_get_pci_dev, | ||
259 | }; | ||
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c new file mode 100644 index 000000000000..206c4ce030bc --- /dev/null +++ b/drivers/xen/xen-pciback/xenbus.c | |||
@@ -0,0 +1,749 @@ | |||
1 | /* | ||
2 | * PCI Backend Xenbus Setup - handles setup with frontend and xend | ||
3 | * | ||
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | ||
5 | */ | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/list.h> | ||
9 | #include <linux/vmalloc.h> | ||
10 | #include <linux/workqueue.h> | ||
11 | #include <xen/xenbus.h> | ||
12 | #include <xen/events.h> | ||
13 | #include <asm/xen/pci.h> | ||
14 | #include <linux/workqueue.h> | ||
15 | #include "pciback.h" | ||
16 | |||
17 | #define DRV_NAME "xen-pciback" | ||
18 | #define INVALID_EVTCHN_IRQ (-1) | ||
19 | struct workqueue_struct *xen_pcibk_wq; | ||
20 | |||
21 | static int __read_mostly passthrough; | ||
22 | module_param(passthrough, bool, S_IRUGO); | ||
23 | MODULE_PARM_DESC(passthrough, | ||
24 | "Option to specify how to export PCI topology to guest:\n"\ | ||
25 | " 0 - (default) Hide the true PCI topology and makes the frontend\n"\ | ||
26 | " there is a single PCI bus with only the exported devices on it.\n"\ | ||
27 | " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\ | ||
28 | " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\ | ||
29 | " 1 - Passthrough provides a real view of the PCI topology to the\n"\ | ||
30 | " frontend (for example, a device at 06:01.b will still appear at\n"\ | ||
31 | " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\ | ||
32 | " exposed PCI devices to its driver domains. This may be required\n"\ | ||
33 | " for drivers which depend on finding their hardward in certain\n"\ | ||
34 | " bus/slot locations."); | ||
35 | |||
36 | static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) | ||
37 | { | ||
38 | struct xen_pcibk_device *pdev; | ||
39 | |||
40 | pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL); | ||
41 | if (pdev == NULL) | ||
42 | goto out; | ||
43 | dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); | ||
44 | |||
45 | pdev->xdev = xdev; | ||
46 | dev_set_drvdata(&xdev->dev, pdev); | ||
47 | |||
48 | spin_lock_init(&pdev->dev_lock); | ||
49 | |||
50 | pdev->sh_info = NULL; | ||
51 | pdev->evtchn_irq = INVALID_EVTCHN_IRQ; | ||
52 | pdev->be_watching = 0; | ||
53 | |||
54 | INIT_WORK(&pdev->op_work, xen_pcibk_do_op); | ||
55 | |||
56 | if (xen_pcibk_init_devices(pdev)) { | ||
57 | kfree(pdev); | ||
58 | pdev = NULL; | ||
59 | } | ||
60 | out: | ||
61 | return pdev; | ||
62 | } | ||
63 | |||
64 | static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) | ||
65 | { | ||
66 | spin_lock(&pdev->dev_lock); | ||
67 | |||
68 | /* Ensure the guest can't trigger our handler before removing devices */ | ||
69 | if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { | ||
70 | unbind_from_irqhandler(pdev->evtchn_irq, pdev); | ||
71 | pdev->evtchn_irq = INVALID_EVTCHN_IRQ; | ||
72 | } | ||
73 | spin_unlock(&pdev->dev_lock); | ||
74 | |||
75 | /* If the driver domain started an op, make sure we complete it | ||
76 | * before releasing the shared memory */ | ||
77 | |||
78 | /* Note, the workqueue does not use spinlocks at all.*/ | ||
79 | flush_workqueue(xen_pcibk_wq); | ||
80 | |||
81 | spin_lock(&pdev->dev_lock); | ||
82 | if (pdev->sh_info != NULL) { | ||
83 | xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); | ||
84 | pdev->sh_info = NULL; | ||
85 | } | ||
86 | spin_unlock(&pdev->dev_lock); | ||
87 | |||
88 | } | ||
89 | |||
90 | static void free_pdev(struct xen_pcibk_device *pdev) | ||
91 | { | ||
92 | if (pdev->be_watching) { | ||
93 | unregister_xenbus_watch(&pdev->be_watch); | ||
94 | pdev->be_watching = 0; | ||
95 | } | ||
96 | |||
97 | xen_pcibk_disconnect(pdev); | ||
98 | |||
99 | xen_pcibk_release_devices(pdev); | ||
100 | |||
101 | dev_set_drvdata(&pdev->xdev->dev, NULL); | ||
102 | pdev->xdev = NULL; | ||
103 | |||
104 | kfree(pdev); | ||
105 | } | ||
106 | |||
107 | static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, | ||
108 | int remote_evtchn) | ||
109 | { | ||
110 | int err = 0; | ||
111 | void *vaddr; | ||
112 | |||
113 | dev_dbg(&pdev->xdev->dev, | ||
114 | "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", | ||
115 | gnt_ref, remote_evtchn); | ||
116 | |||
117 | err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); | ||
118 | if (err < 0) { | ||
119 | xenbus_dev_fatal(pdev->xdev, err, | ||
120 | "Error mapping other domain page in ours."); | ||
121 | goto out; | ||
122 | } | ||
123 | |||
124 | spin_lock(&pdev->dev_lock); | ||
125 | pdev->sh_info = vaddr; | ||
126 | spin_unlock(&pdev->dev_lock); | ||
127 | |||
128 | err = bind_interdomain_evtchn_to_irqhandler( | ||
129 | pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, | ||
130 | 0, DRV_NAME, pdev); | ||
131 | if (err < 0) { | ||
132 | xenbus_dev_fatal(pdev->xdev, err, | ||
133 | "Error binding event channel to IRQ"); | ||
134 | goto out; | ||
135 | } | ||
136 | |||
137 | spin_lock(&pdev->dev_lock); | ||
138 | pdev->evtchn_irq = err; | ||
139 | spin_unlock(&pdev->dev_lock); | ||
140 | err = 0; | ||
141 | |||
142 | dev_dbg(&pdev->xdev->dev, "Attached!\n"); | ||
143 | out: | ||
144 | return err; | ||
145 | } | ||
146 | |||
147 | static int xen_pcibk_attach(struct xen_pcibk_device *pdev) | ||
148 | { | ||
149 | int err = 0; | ||
150 | int gnt_ref, remote_evtchn; | ||
151 | char *magic = NULL; | ||
152 | |||
153 | |||
154 | /* Make sure we only do this setup once */ | ||
155 | if (xenbus_read_driver_state(pdev->xdev->nodename) != | ||
156 | XenbusStateInitialised) | ||
157 | goto out; | ||
158 | |||
159 | /* Wait for frontend to state that it has published the configuration */ | ||
160 | if (xenbus_read_driver_state(pdev->xdev->otherend) != | ||
161 | XenbusStateInitialised) | ||
162 | goto out; | ||
163 | |||
164 | dev_dbg(&pdev->xdev->dev, "Reading frontend config\n"); | ||
165 | |||
166 | err = xenbus_gather(XBT_NIL, pdev->xdev->otherend, | ||
167 | "pci-op-ref", "%u", &gnt_ref, | ||
168 | "event-channel", "%u", &remote_evtchn, | ||
169 | "magic", NULL, &magic, NULL); | ||
170 | if (err) { | ||
171 | /* If configuration didn't get read correctly, wait longer */ | ||
172 | xenbus_dev_fatal(pdev->xdev, err, | ||
173 | "Error reading configuration from frontend"); | ||
174 | goto out; | ||
175 | } | ||
176 | |||
177 | if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { | ||
178 | xenbus_dev_fatal(pdev->xdev, -EFAULT, | ||
179 | "version mismatch (%s/%s) with pcifront - " | ||
180 | "halting xen_pcibk", | ||
181 | magic, XEN_PCI_MAGIC); | ||
182 | goto out; | ||
183 | } | ||
184 | |||
185 | err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn); | ||
186 | if (err) | ||
187 | goto out; | ||
188 | |||
189 | dev_dbg(&pdev->xdev->dev, "Connecting...\n"); | ||
190 | |||
191 | err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); | ||
192 | if (err) | ||
193 | xenbus_dev_fatal(pdev->xdev, err, | ||
194 | "Error switching to connected state!"); | ||
195 | |||
196 | dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); | ||
197 | out: | ||
198 | |||
199 | kfree(magic); | ||
200 | |||
201 | return err; | ||
202 | } | ||
203 | |||
204 | static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev, | ||
205 | unsigned int domain, unsigned int bus, | ||
206 | unsigned int devfn, unsigned int devid) | ||
207 | { | ||
208 | int err; | ||
209 | int len; | ||
210 | char str[64]; | ||
211 | |||
212 | len = snprintf(str, sizeof(str), "vdev-%d", devid); | ||
213 | if (unlikely(len >= (sizeof(str) - 1))) { | ||
214 | err = -ENOMEM; | ||
215 | goto out; | ||
216 | } | ||
217 | |||
218 | err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, | ||
219 | "%04x:%02x:%02x.%02x", domain, bus, | ||
220 | PCI_SLOT(devfn), PCI_FUNC(devfn)); | ||
221 | |||
222 | out: | ||
223 | return err; | ||
224 | } | ||
225 | |||
226 | static int xen_pcibk_export_device(struct xen_pcibk_device *pdev, | ||
227 | int domain, int bus, int slot, int func, | ||
228 | int devid) | ||
229 | { | ||
230 | struct pci_dev *dev; | ||
231 | int err = 0; | ||
232 | |||
233 | dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n", | ||
234 | domain, bus, slot, func); | ||
235 | |||
236 | dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func); | ||
237 | if (!dev) { | ||
238 | err = -EINVAL; | ||
239 | xenbus_dev_fatal(pdev->xdev, err, | ||
240 | "Couldn't locate PCI device " | ||
241 | "(%04x:%02x:%02x.%01x)! " | ||
242 | "perhaps already in-use?", | ||
243 | domain, bus, slot, func); | ||
244 | goto out; | ||
245 | } | ||
246 | |||
247 | err = xen_pcibk_add_pci_dev(pdev, dev, devid, | ||
248 | xen_pcibk_publish_pci_dev); | ||
249 | if (err) | ||
250 | goto out; | ||
251 | |||
252 | dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); | ||
253 | if (xen_register_device_domain_owner(dev, | ||
254 | pdev->xdev->otherend_id) != 0) { | ||
255 | dev_err(&dev->dev, "device has been assigned to another " \ | ||
256 | "domain! Over-writting the ownership, but beware.\n"); | ||
257 | xen_unregister_device_domain_owner(dev); | ||
258 | xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); | ||
259 | } | ||
260 | |||
261 | /* TODO: It'd be nice to export a bridge and have all of its children | ||
262 | * get exported with it. This may be best done in xend (which will | ||
263 | * have to calculate resource usage anyway) but we probably want to | ||
264 | * put something in here to ensure that if a bridge gets given to a | ||
265 | * driver domain, that all devices under that bridge are not given | ||
266 | * to other driver domains (as he who controls the bridge can disable | ||
267 | * it and stop the other devices from working). | ||
268 | */ | ||
269 | out: | ||
270 | return err; | ||
271 | } | ||
272 | |||
273 | static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev, | ||
274 | int domain, int bus, int slot, int func) | ||
275 | { | ||
276 | int err = 0; | ||
277 | struct pci_dev *dev; | ||
278 | |||
279 | dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", | ||
280 | domain, bus, slot, func); | ||
281 | |||
282 | dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); | ||
283 | if (!dev) { | ||
284 | err = -EINVAL; | ||
285 | dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " | ||
286 | "(%04x:%02x:%02x.%01x)! not owned by this domain\n", | ||
287 | domain, bus, slot, func); | ||
288 | goto out; | ||
289 | } | ||
290 | |||
291 | dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); | ||
292 | xen_unregister_device_domain_owner(dev); | ||
293 | |||
294 | xen_pcibk_release_pci_dev(pdev, dev); | ||
295 | |||
296 | out: | ||
297 | return err; | ||
298 | } | ||
299 | |||
300 | static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev, | ||
301 | unsigned int domain, unsigned int bus) | ||
302 | { | ||
303 | unsigned int d, b; | ||
304 | int i, root_num, len, err; | ||
305 | char str[64]; | ||
306 | |||
307 | dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n"); | ||
308 | |||
309 | err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | ||
310 | "root_num", "%d", &root_num); | ||
311 | if (err == 0 || err == -ENOENT) | ||
312 | root_num = 0; | ||
313 | else if (err < 0) | ||
314 | goto out; | ||
315 | |||
316 | /* Verify that we haven't already published this pci root */ | ||
317 | for (i = 0; i < root_num; i++) { | ||
318 | len = snprintf(str, sizeof(str), "root-%d", i); | ||
319 | if (unlikely(len >= (sizeof(str) - 1))) { | ||
320 | err = -ENOMEM; | ||
321 | goto out; | ||
322 | } | ||
323 | |||
324 | err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | ||
325 | str, "%x:%x", &d, &b); | ||
326 | if (err < 0) | ||
327 | goto out; | ||
328 | if (err != 2) { | ||
329 | err = -EINVAL; | ||
330 | goto out; | ||
331 | } | ||
332 | |||
333 | if (d == domain && b == bus) { | ||
334 | err = 0; | ||
335 | goto out; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | len = snprintf(str, sizeof(str), "root-%d", root_num); | ||
340 | if (unlikely(len >= (sizeof(str) - 1))) { | ||
341 | err = -ENOMEM; | ||
342 | goto out; | ||
343 | } | ||
344 | |||
345 | dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n", | ||
346 | root_num, domain, bus); | ||
347 | |||
348 | err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, | ||
349 | "%04x:%02x", domain, bus); | ||
350 | if (err) | ||
351 | goto out; | ||
352 | |||
353 | err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, | ||
354 | "root_num", "%d", (root_num + 1)); | ||
355 | |||
356 | out: | ||
357 | return err; | ||
358 | } | ||
359 | |||
360 | static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) | ||
361 | { | ||
362 | int err = 0; | ||
363 | int num_devs; | ||
364 | int domain, bus, slot, func; | ||
365 | int substate; | ||
366 | int i, len; | ||
367 | char state_str[64]; | ||
368 | char dev_str[64]; | ||
369 | |||
370 | |||
371 | dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); | ||
372 | |||
373 | /* Make sure we only reconfigure once */ | ||
374 | if (xenbus_read_driver_state(pdev->xdev->nodename) != | ||
375 | XenbusStateReconfiguring) | ||
376 | goto out; | ||
377 | |||
378 | err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", | ||
379 | &num_devs); | ||
380 | if (err != 1) { | ||
381 | if (err >= 0) | ||
382 | err = -EINVAL; | ||
383 | xenbus_dev_fatal(pdev->xdev, err, | ||
384 | "Error reading number of devices"); | ||
385 | goto out; | ||
386 | } | ||
387 | |||
388 | for (i = 0; i < num_devs; i++) { | ||
389 | len = snprintf(state_str, sizeof(state_str), "state-%d", i); | ||
390 | if (unlikely(len >= (sizeof(state_str) - 1))) { | ||
391 | err = -ENOMEM; | ||
392 | xenbus_dev_fatal(pdev->xdev, err, | ||
393 | "String overflow while reading " | ||
394 | "configuration"); | ||
395 | goto out; | ||
396 | } | ||
397 | err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, | ||
398 | "%d", &substate); | ||
399 | if (err != 1) | ||
400 | substate = XenbusStateUnknown; | ||
401 | |||
402 | switch (substate) { | ||
403 | case XenbusStateInitialising: | ||
404 | dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i); | ||
405 | |||
406 | len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); | ||
407 | if (unlikely(len >= (sizeof(dev_str) - 1))) { | ||
408 | err = -ENOMEM; | ||
409 | xenbus_dev_fatal(pdev->xdev, err, | ||
410 | "String overflow while " | ||
411 | "reading configuration"); | ||
412 | goto out; | ||
413 | } | ||
414 | err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | ||
415 | dev_str, "%x:%x:%x.%x", | ||
416 | &domain, &bus, &slot, &func); | ||
417 | if (err < 0) { | ||
418 | xenbus_dev_fatal(pdev->xdev, err, | ||
419 | "Error reading device " | ||
420 | "configuration"); | ||
421 | goto out; | ||
422 | } | ||
423 | if (err != 4) { | ||
424 | err = -EINVAL; | ||
425 | xenbus_dev_fatal(pdev->xdev, err, | ||
426 | "Error parsing pci device " | ||
427 | "configuration"); | ||
428 | goto out; | ||
429 | } | ||
430 | |||
431 | err = xen_pcibk_export_device(pdev, domain, bus, slot, | ||
432 | func, i); | ||
433 | if (err) | ||
434 | goto out; | ||
435 | |||
436 | /* Publish pci roots. */ | ||
437 | err = xen_pcibk_publish_pci_roots(pdev, | ||
438 | xen_pcibk_publish_pci_root); | ||
439 | if (err) { | ||
440 | xenbus_dev_fatal(pdev->xdev, err, | ||
441 | "Error while publish PCI root" | ||
442 | "buses for frontend"); | ||
443 | goto out; | ||
444 | } | ||
445 | |||
446 | err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, | ||
447 | state_str, "%d", | ||
448 | XenbusStateInitialised); | ||
449 | if (err) { | ||
450 | xenbus_dev_fatal(pdev->xdev, err, | ||
451 | "Error switching substate of " | ||
452 | "dev-%d\n", i); | ||
453 | goto out; | ||
454 | } | ||
455 | break; | ||
456 | |||
457 | case XenbusStateClosing: | ||
458 | dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i); | ||
459 | |||
460 | len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i); | ||
461 | if (unlikely(len >= (sizeof(dev_str) - 1))) { | ||
462 | err = -ENOMEM; | ||
463 | xenbus_dev_fatal(pdev->xdev, err, | ||
464 | "String overflow while " | ||
465 | "reading configuration"); | ||
466 | goto out; | ||
467 | } | ||
468 | err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | ||
469 | dev_str, "%x:%x:%x.%x", | ||
470 | &domain, &bus, &slot, &func); | ||
471 | if (err < 0) { | ||
472 | xenbus_dev_fatal(pdev->xdev, err, | ||
473 | "Error reading device " | ||
474 | "configuration"); | ||
475 | goto out; | ||
476 | } | ||
477 | if (err != 4) { | ||
478 | err = -EINVAL; | ||
479 | xenbus_dev_fatal(pdev->xdev, err, | ||
480 | "Error parsing pci device " | ||
481 | "configuration"); | ||
482 | goto out; | ||
483 | } | ||
484 | |||
485 | err = xen_pcibk_remove_device(pdev, domain, bus, slot, | ||
486 | func); | ||
487 | if (err) | ||
488 | goto out; | ||
489 | |||
490 | /* TODO: If at some point we implement support for pci | ||
491 | * root hot-remove on pcifront side, we'll need to | ||
492 | * remove unnecessary xenstore nodes of pci roots here. | ||
493 | */ | ||
494 | |||
495 | break; | ||
496 | |||
497 | default: | ||
498 | break; | ||
499 | } | ||
500 | } | ||
501 | |||
502 | err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); | ||
503 | if (err) { | ||
504 | xenbus_dev_fatal(pdev->xdev, err, | ||
505 | "Error switching to reconfigured state!"); | ||
506 | goto out; | ||
507 | } | ||
508 | |||
509 | out: | ||
510 | return 0; | ||
511 | } | ||
512 | |||
513 | static void xen_pcibk_frontend_changed(struct xenbus_device *xdev, | ||
514 | enum xenbus_state fe_state) | ||
515 | { | ||
516 | struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev); | ||
517 | |||
518 | dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); | ||
519 | |||
520 | switch (fe_state) { | ||
521 | case XenbusStateInitialised: | ||
522 | xen_pcibk_attach(pdev); | ||
523 | break; | ||
524 | |||
525 | case XenbusStateReconfiguring: | ||
526 | xen_pcibk_reconfigure(pdev); | ||
527 | break; | ||
528 | |||
529 | case XenbusStateConnected: | ||
530 | /* pcifront switched its state from reconfiguring to connected. | ||
531 | * Then switch to connected state. | ||
532 | */ | ||
533 | xenbus_switch_state(xdev, XenbusStateConnected); | ||
534 | break; | ||
535 | |||
536 | case XenbusStateClosing: | ||
537 | xen_pcibk_disconnect(pdev); | ||
538 | xenbus_switch_state(xdev, XenbusStateClosing); | ||
539 | break; | ||
540 | |||
541 | case XenbusStateClosed: | ||
542 | xen_pcibk_disconnect(pdev); | ||
543 | xenbus_switch_state(xdev, XenbusStateClosed); | ||
544 | if (xenbus_dev_is_online(xdev)) | ||
545 | break; | ||
546 | /* fall through if not online */ | ||
547 | case XenbusStateUnknown: | ||
548 | dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); | ||
549 | device_unregister(&xdev->dev); | ||
550 | break; | ||
551 | |||
552 | default: | ||
553 | break; | ||
554 | } | ||
555 | } | ||
556 | |||
557 | static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) | ||
558 | { | ||
559 | /* Get configuration from xend (if available now) */ | ||
560 | int domain, bus, slot, func; | ||
561 | int err = 0; | ||
562 | int i, num_devs; | ||
563 | char dev_str[64]; | ||
564 | char state_str[64]; | ||
565 | |||
566 | /* It's possible we could get the call to setup twice, so make sure | ||
567 | * we're not already connected. | ||
568 | */ | ||
569 | if (xenbus_read_driver_state(pdev->xdev->nodename) != | ||
570 | XenbusStateInitWait) | ||
571 | goto out; | ||
572 | |||
573 | dev_dbg(&pdev->xdev->dev, "getting be setup\n"); | ||
574 | |||
575 | err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", | ||
576 | &num_devs); | ||
577 | if (err != 1) { | ||
578 | if (err >= 0) | ||
579 | err = -EINVAL; | ||
580 | xenbus_dev_fatal(pdev->xdev, err, | ||
581 | "Error reading number of devices"); | ||
582 | goto out; | ||
583 | } | ||
584 | |||
585 | for (i = 0; i < num_devs; i++) { | ||
586 | int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); | ||
587 | if (unlikely(l >= (sizeof(dev_str) - 1))) { | ||
588 | err = -ENOMEM; | ||
589 | xenbus_dev_fatal(pdev->xdev, err, | ||
590 | "String overflow while reading " | ||
591 | "configuration"); | ||
592 | goto out; | ||
593 | } | ||
594 | |||
595 | err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str, | ||
596 | "%x:%x:%x.%x", &domain, &bus, &slot, &func); | ||
597 | if (err < 0) { | ||
598 | xenbus_dev_fatal(pdev->xdev, err, | ||
599 | "Error reading device configuration"); | ||
600 | goto out; | ||
601 | } | ||
602 | if (err != 4) { | ||
603 | err = -EINVAL; | ||
604 | xenbus_dev_fatal(pdev->xdev, err, | ||
605 | "Error parsing pci device " | ||
606 | "configuration"); | ||
607 | goto out; | ||
608 | } | ||
609 | |||
610 | err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i); | ||
611 | if (err) | ||
612 | goto out; | ||
613 | |||
614 | /* Switch substate of this device. */ | ||
615 | l = snprintf(state_str, sizeof(state_str), "state-%d", i); | ||
616 | if (unlikely(l >= (sizeof(state_str) - 1))) { | ||
617 | err = -ENOMEM; | ||
618 | xenbus_dev_fatal(pdev->xdev, err, | ||
619 | "String overflow while reading " | ||
620 | "configuration"); | ||
621 | goto out; | ||
622 | } | ||
623 | err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str, | ||
624 | "%d", XenbusStateInitialised); | ||
625 | if (err) { | ||
626 | xenbus_dev_fatal(pdev->xdev, err, "Error switching " | ||
627 | "substate of dev-%d\n", i); | ||
628 | goto out; | ||
629 | } | ||
630 | } | ||
631 | |||
632 | err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root); | ||
633 | if (err) { | ||
634 | xenbus_dev_fatal(pdev->xdev, err, | ||
635 | "Error while publish PCI root buses " | ||
636 | "for frontend"); | ||
637 | goto out; | ||
638 | } | ||
639 | |||
640 | err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); | ||
641 | if (err) | ||
642 | xenbus_dev_fatal(pdev->xdev, err, | ||
643 | "Error switching to initialised state!"); | ||
644 | |||
645 | out: | ||
646 | if (!err) | ||
647 | /* see if pcifront is already configured (if not, we'll wait) */ | ||
648 | xen_pcibk_attach(pdev); | ||
649 | |||
650 | return err; | ||
651 | } | ||
652 | |||
653 | static void xen_pcibk_be_watch(struct xenbus_watch *watch, | ||
654 | const char **vec, unsigned int len) | ||
655 | { | ||
656 | struct xen_pcibk_device *pdev = | ||
657 | container_of(watch, struct xen_pcibk_device, be_watch); | ||
658 | |||
659 | switch (xenbus_read_driver_state(pdev->xdev->nodename)) { | ||
660 | case XenbusStateInitWait: | ||
661 | xen_pcibk_setup_backend(pdev); | ||
662 | break; | ||
663 | |||
664 | default: | ||
665 | break; | ||
666 | } | ||
667 | } | ||
668 | |||
669 | static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, | ||
670 | const struct xenbus_device_id *id) | ||
671 | { | ||
672 | int err = 0; | ||
673 | struct xen_pcibk_device *pdev = alloc_pdev(dev); | ||
674 | |||
675 | if (pdev == NULL) { | ||
676 | err = -ENOMEM; | ||
677 | xenbus_dev_fatal(dev, err, | ||
678 | "Error allocating xen_pcibk_device struct"); | ||
679 | goto out; | ||
680 | } | ||
681 | |||
682 | /* wait for xend to configure us */ | ||
683 | err = xenbus_switch_state(dev, XenbusStateInitWait); | ||
684 | if (err) | ||
685 | goto out; | ||
686 | |||
687 | /* watch the backend node for backend configuration information */ | ||
688 | err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, | ||
689 | xen_pcibk_be_watch); | ||
690 | if (err) | ||
691 | goto out; | ||
692 | |||
693 | pdev->be_watching = 1; | ||
694 | |||
695 | /* We need to force a call to our callback here in case | ||
696 | * xend already configured us! | ||
697 | */ | ||
698 | xen_pcibk_be_watch(&pdev->be_watch, NULL, 0); | ||
699 | |||
700 | out: | ||
701 | return err; | ||
702 | } | ||
703 | |||
704 | static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) | ||
705 | { | ||
706 | struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev); | ||
707 | |||
708 | if (pdev != NULL) | ||
709 | free_pdev(pdev); | ||
710 | |||
711 | return 0; | ||
712 | } | ||
713 | |||
714 | static const struct xenbus_device_id xenpci_ids[] = { | ||
715 | {"pci"}, | ||
716 | {""}, | ||
717 | }; | ||
718 | |||
719 | static struct xenbus_driver xenbus_xen_pcibk_driver = { | ||
720 | .name = DRV_NAME, | ||
721 | .owner = THIS_MODULE, | ||
722 | .ids = xenpci_ids, | ||
723 | .probe = xen_pcibk_xenbus_probe, | ||
724 | .remove = xen_pcibk_xenbus_remove, | ||
725 | .otherend_changed = xen_pcibk_frontend_changed, | ||
726 | }; | ||
727 | |||
728 | struct xen_pcibk_backend *xen_pcibk_backend; | ||
729 | |||
730 | int __init xen_pcibk_xenbus_register(void) | ||
731 | { | ||
732 | xen_pcibk_wq = create_workqueue("xen_pciback_workqueue"); | ||
733 | if (!xen_pcibk_wq) { | ||
734 | printk(KERN_ERR "%s: create" | ||
735 | "xen_pciback_workqueue failed\n", __func__); | ||
736 | return -EFAULT; | ||
737 | } | ||
738 | xen_pcibk_backend = &xen_pcibk_vpci_backend; | ||
739 | if (passthrough) | ||
740 | xen_pcibk_backend = &xen_pcibk_passthrough_backend; | ||
741 | pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name); | ||
742 | return xenbus_register_backend(&xenbus_xen_pcibk_driver); | ||
743 | } | ||
744 | |||
745 | void __exit xen_pcibk_xenbus_unregister(void) | ||
746 | { | ||
747 | destroy_workqueue(xen_pcibk_wq); | ||
748 | xenbus_unregister_driver(&xenbus_xen_pcibk_driver); | ||
749 | } | ||
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c new file mode 100644 index 000000000000..010937b5a7c9 --- /dev/null +++ b/drivers/xen/xen-selfballoon.c | |||
@@ -0,0 +1,485 @@ | |||
1 | /****************************************************************************** | ||
2 | * Xen selfballoon driver (and optional frontswap self-shrinking driver) | ||
3 | * | ||
4 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. | ||
5 | * | ||
6 | * This code complements the cleancache and frontswap patchsets to optimize | ||
7 | * support for Xen Transcendent Memory ("tmem"). The policy it implements | ||
8 | * is rudimentary and will likely improve over time, but it does work well | ||
9 | * enough today. | ||
10 | * | ||
11 | * Two functionalities are implemented here which both use "control theory" | ||
12 | * (feedback) to optimize memory utilization. In a virtualized environment | ||
13 | * such as Xen, RAM is often a scarce resource and we would like to ensure | ||
14 | * that each of a possibly large number of virtual machines is using RAM | ||
15 | * efficiently, i.e. using as little as possible when under light load | ||
16 | * and obtaining as much as possible when memory demands are high. | ||
17 | * Since RAM needs vary highly dynamically and sometimes dramatically, | ||
18 | * "hysteresis" is used, that is, memory target is determined not just | ||
19 | * on current data but also on past data stored in the system. | ||
20 | * | ||
21 | * "Selfballooning" creates memory pressure by managing the Xen balloon | ||
22 | * driver to decrease and increase available kernel memory, driven | ||
23 | * largely by the target value of "Committed_AS" (see /proc/meminfo). | ||
24 | * Since Committed_AS does not account for clean mapped pages (i.e. pages | ||
25 | * in RAM that are identical to pages on disk), selfballooning has the | ||
26 | * affect of pushing less frequently used clean pagecache pages out of | ||
27 | * kernel RAM and, presumably using cleancache, into Xen tmem where | ||
28 | * Xen can more efficiently optimize RAM utilization for such pages. | ||
29 | * | ||
30 | * When kernel memory demand unexpectedly increases faster than Xen, via | ||
31 | * the selfballoon driver, is able to (or chooses to) provide usable RAM, | ||
32 | * the kernel may invoke swapping. In most cases, frontswap is able | ||
33 | * to absorb this swapping into Xen tmem. However, due to the fact | ||
34 | * that the kernel swap subsystem assumes swapping occurs to a disk, | ||
35 | * swapped pages may sit on the disk for a very long time; even if | ||
36 | * the kernel knows the page will never be used again. This is because | ||
37 | * the disk space costs very little and can be overwritten when | ||
38 | * necessary. When such stale pages are in frontswap, however, they | ||
39 | * are taking up valuable real estate. "Frontswap selfshrinking" works | ||
40 | * to resolve this: When frontswap activity is otherwise stable | ||
41 | * and the guest kernel is not under memory pressure, the "frontswap | ||
42 | * selfshrinking" accounts for this by providing pressure to remove some | ||
43 | * pages from frontswap and return them to kernel memory. | ||
44 | * | ||
45 | * For both "selfballooning" and "frontswap-selfshrinking", a worker | ||
46 | * thread is used and sysfs tunables are provided to adjust the frequency | ||
47 | * and rate of adjustments to achieve the goal, as well as to disable one | ||
48 | * or both functions independently. | ||
49 | * | ||
50 | * While some argue that this functionality can and should be implemented | ||
51 | * in userspace, it has been observed that bad things happen (e.g. OOMs). | ||
52 | * | ||
53 | * System configuration note: Selfballooning should not be enabled on | ||
54 | * systems without a sufficiently large swap device configured; for best | ||
55 | * results, it is recommended that total swap be increased by the size | ||
56 | * of the guest memory. Also, while technically not required to be | ||
57 | * configured, it is highly recommended that frontswap also be configured | ||
58 | * and enabled when selfballooning is running. So, selfballooning | ||
59 | * is disabled by default if frontswap is not configured and can only | ||
60 | * be enabled with the "selfballooning" kernel boot option; similarly | ||
61 | * selfballooning is enabled by default if frontswap is configured and | ||
62 | * can be disabled with the "noselfballooning" kernel boot option. Finally, | ||
63 | * when frontswap is configured, frontswap-selfshrinking can be disabled | ||
64 | * with the "noselfshrink" kernel boot option. | ||
65 | * | ||
66 | * Selfballooning is disallowed in domain0 and force-disabled. | ||
67 | * | ||
68 | */ | ||
69 | |||
70 | #include <linux/kernel.h> | ||
71 | #include <linux/mm.h> | ||
72 | #include <linux/mman.h> | ||
73 | |||
74 | #include <xen/balloon.h> | ||
75 | |||
76 | #include <xen/tmem.h> | ||
77 | |||
78 | /* Enable/disable with sysfs. */ | ||
79 | static int xen_selfballooning_enabled __read_mostly; | ||
80 | |||
81 | /* | ||
82 | * Controls rate at which memory target (this iteration) approaches | ||
83 | * ultimate goal when memory need is increasing (up-hysteresis) or | ||
84 | * decreasing (down-hysteresis). Higher values of hysteresis cause | ||
85 | * slower increases/decreases. The default values for the various | ||
86 | * parameters were deemed reasonable by experimentation, may be | ||
87 | * workload-dependent, and can all be adjusted via sysfs. | ||
88 | */ | ||
89 | static unsigned int selfballoon_downhysteresis __read_mostly = 8; | ||
90 | static unsigned int selfballoon_uphysteresis __read_mostly = 1; | ||
91 | |||
92 | /* In HZ, controls frequency of worker invocation. */ | ||
93 | static unsigned int selfballoon_interval __read_mostly = 5; | ||
94 | |||
95 | static void selfballoon_process(struct work_struct *work); | ||
96 | static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); | ||
97 | |||
98 | #ifdef CONFIG_FRONTSWAP | ||
99 | #include <linux/frontswap.h> | ||
100 | |||
101 | /* Enable/disable with sysfs. */ | ||
102 | static bool frontswap_selfshrinking __read_mostly; | ||
103 | |||
104 | /* Enable/disable with kernel boot option. */ | ||
105 | static bool use_frontswap_selfshrink __initdata = true; | ||
106 | |||
107 | /* | ||
108 | * The default values for the following parameters were deemed reasonable | ||
109 | * by experimentation, may be workload-dependent, and can all be | ||
110 | * adjusted via sysfs. | ||
111 | */ | ||
112 | |||
113 | /* Control rate for frontswap shrinking. Higher hysteresis is slower. */ | ||
114 | static unsigned int frontswap_hysteresis __read_mostly = 20; | ||
115 | |||
116 | /* | ||
117 | * Number of selfballoon worker invocations to wait before observing that | ||
118 | * frontswap selfshrinking should commence. Note that selfshrinking does | ||
119 | * not use a separate worker thread. | ||
120 | */ | ||
121 | static unsigned int frontswap_inertia __read_mostly = 3; | ||
122 | |||
123 | /* Countdown to next invocation of frontswap_shrink() */ | ||
124 | static unsigned long frontswap_inertia_counter; | ||
125 | |||
126 | /* | ||
127 | * Invoked by the selfballoon worker thread, uses current number of pages | ||
128 | * in frontswap (frontswap_curr_pages()), previous status, and control | ||
129 | * values (hysteresis and inertia) to determine if frontswap should be | ||
130 | * shrunk and what the new frontswap size should be. Note that | ||
131 | * frontswap_shrink is essentially a partial swapoff that immediately | ||
132 | * transfers pages from the "swap device" (frontswap) back into kernel | ||
133 | * RAM; despite the name, frontswap "shrinking" is very different from | ||
134 | * the "shrinker" interface used by the kernel MM subsystem to reclaim | ||
135 | * memory. | ||
136 | */ | ||
137 | static void frontswap_selfshrink(void) | ||
138 | { | ||
139 | static unsigned long cur_frontswap_pages; | ||
140 | static unsigned long last_frontswap_pages; | ||
141 | static unsigned long tgt_frontswap_pages; | ||
142 | |||
143 | last_frontswap_pages = cur_frontswap_pages; | ||
144 | cur_frontswap_pages = frontswap_curr_pages(); | ||
145 | if (!cur_frontswap_pages || | ||
146 | (cur_frontswap_pages > last_frontswap_pages)) { | ||
147 | frontswap_inertia_counter = frontswap_inertia; | ||
148 | return; | ||
149 | } | ||
150 | if (frontswap_inertia_counter && --frontswap_inertia_counter) | ||
151 | return; | ||
152 | if (cur_frontswap_pages <= frontswap_hysteresis) | ||
153 | tgt_frontswap_pages = 0; | ||
154 | else | ||
155 | tgt_frontswap_pages = cur_frontswap_pages - | ||
156 | (cur_frontswap_pages / frontswap_hysteresis); | ||
157 | frontswap_shrink(tgt_frontswap_pages); | ||
158 | } | ||
159 | |||
160 | static int __init xen_nofrontswap_selfshrink_setup(char *s) | ||
161 | { | ||
162 | use_frontswap_selfshrink = false; | ||
163 | return 1; | ||
164 | } | ||
165 | |||
166 | __setup("noselfshrink", xen_nofrontswap_selfshrink_setup); | ||
167 | |||
168 | /* Disable with kernel boot option. */ | ||
169 | static bool use_selfballooning __initdata = true; | ||
170 | |||
171 | static int __init xen_noselfballooning_setup(char *s) | ||
172 | { | ||
173 | use_selfballooning = false; | ||
174 | return 1; | ||
175 | } | ||
176 | |||
177 | __setup("noselfballooning", xen_noselfballooning_setup); | ||
178 | #else /* !CONFIG_FRONTSWAP */ | ||
179 | /* Enable with kernel boot option. */ | ||
180 | static bool use_selfballooning __initdata = false; | ||
181 | |||
182 | static int __init xen_selfballooning_setup(char *s) | ||
183 | { | ||
184 | use_selfballooning = true; | ||
185 | return 1; | ||
186 | } | ||
187 | |||
188 | __setup("selfballooning", xen_selfballooning_setup); | ||
189 | #endif /* CONFIG_FRONTSWAP */ | ||
190 | |||
191 | /* | ||
192 | * Use current balloon size, the goal (vm_committed_as), and hysteresis | ||
193 | * parameters to set a new target balloon size | ||
194 | */ | ||
195 | static void selfballoon_process(struct work_struct *work) | ||
196 | { | ||
197 | unsigned long cur_pages, goal_pages, tgt_pages; | ||
198 | bool reset_timer = false; | ||
199 | |||
200 | if (xen_selfballooning_enabled) { | ||
201 | cur_pages = balloon_stats.current_pages; | ||
202 | tgt_pages = cur_pages; /* default is no change */ | ||
203 | goal_pages = percpu_counter_read_positive(&vm_committed_as) + | ||
204 | balloon_stats.current_pages - totalram_pages; | ||
205 | #ifdef CONFIG_FRONTSWAP | ||
206 | /* allow space for frontswap pages to be repatriated */ | ||
207 | if (frontswap_selfshrinking && frontswap_enabled) | ||
208 | goal_pages += frontswap_curr_pages(); | ||
209 | #endif | ||
210 | if (cur_pages > goal_pages) | ||
211 | tgt_pages = cur_pages - | ||
212 | ((cur_pages - goal_pages) / | ||
213 | selfballoon_downhysteresis); | ||
214 | else if (cur_pages < goal_pages) | ||
215 | tgt_pages = cur_pages + | ||
216 | ((goal_pages - cur_pages) / | ||
217 | selfballoon_uphysteresis); | ||
218 | /* else if cur_pages == goal_pages, no change */ | ||
219 | balloon_set_new_target(tgt_pages); | ||
220 | reset_timer = true; | ||
221 | } | ||
222 | #ifdef CONFIG_FRONTSWAP | ||
223 | if (frontswap_selfshrinking && frontswap_enabled) { | ||
224 | frontswap_selfshrink(); | ||
225 | reset_timer = true; | ||
226 | } | ||
227 | #endif | ||
228 | if (reset_timer) | ||
229 | schedule_delayed_work(&selfballoon_worker, | ||
230 | selfballoon_interval * HZ); | ||
231 | } | ||
232 | |||
233 | #ifdef CONFIG_SYSFS | ||
234 | |||
235 | #include <linux/sysdev.h> | ||
236 | #include <linux/capability.h> | ||
237 | |||
238 | #define SELFBALLOON_SHOW(name, format, args...) \ | ||
239 | static ssize_t show_##name(struct sys_device *dev, \ | ||
240 | struct sysdev_attribute *attr, \ | ||
241 | char *buf) \ | ||
242 | { \ | ||
243 | return sprintf(buf, format, ##args); \ | ||
244 | } | ||
245 | |||
246 | SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); | ||
247 | |||
248 | static ssize_t store_selfballooning(struct sys_device *dev, | ||
249 | struct sysdev_attribute *attr, | ||
250 | const char *buf, | ||
251 | size_t count) | ||
252 | { | ||
253 | bool was_enabled = xen_selfballooning_enabled; | ||
254 | unsigned long tmp; | ||
255 | int err; | ||
256 | |||
257 | if (!capable(CAP_SYS_ADMIN)) | ||
258 | return -EPERM; | ||
259 | |||
260 | err = strict_strtoul(buf, 10, &tmp); | ||
261 | if (err || ((tmp != 0) && (tmp != 1))) | ||
262 | return -EINVAL; | ||
263 | |||
264 | xen_selfballooning_enabled = !!tmp; | ||
265 | if (!was_enabled && xen_selfballooning_enabled) | ||
266 | schedule_delayed_work(&selfballoon_worker, | ||
267 | selfballoon_interval * HZ); | ||
268 | |||
269 | return count; | ||
270 | } | ||
271 | |||
272 | static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR, | ||
273 | show_selfballooning, store_selfballooning); | ||
274 | |||
275 | SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); | ||
276 | |||
277 | static ssize_t store_selfballoon_interval(struct sys_device *dev, | ||
278 | struct sysdev_attribute *attr, | ||
279 | const char *buf, | ||
280 | size_t count) | ||
281 | { | ||
282 | unsigned long val; | ||
283 | int err; | ||
284 | |||
285 | if (!capable(CAP_SYS_ADMIN)) | ||
286 | return -EPERM; | ||
287 | err = strict_strtoul(buf, 10, &val); | ||
288 | if (err || val == 0) | ||
289 | return -EINVAL; | ||
290 | selfballoon_interval = val; | ||
291 | return count; | ||
292 | } | ||
293 | |||
294 | static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, | ||
295 | show_selfballoon_interval, store_selfballoon_interval); | ||
296 | |||
297 | SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); | ||
298 | |||
299 | static ssize_t store_selfballoon_downhys(struct sys_device *dev, | ||
300 | struct sysdev_attribute *attr, | ||
301 | const char *buf, | ||
302 | size_t count) | ||
303 | { | ||
304 | unsigned long val; | ||
305 | int err; | ||
306 | |||
307 | if (!capable(CAP_SYS_ADMIN)) | ||
308 | return -EPERM; | ||
309 | err = strict_strtoul(buf, 10, &val); | ||
310 | if (err || val == 0) | ||
311 | return -EINVAL; | ||
312 | selfballoon_downhysteresis = val; | ||
313 | return count; | ||
314 | } | ||
315 | |||
316 | static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, | ||
317 | show_selfballoon_downhys, store_selfballoon_downhys); | ||
318 | |||
319 | |||
320 | SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); | ||
321 | |||
322 | static ssize_t store_selfballoon_uphys(struct sys_device *dev, | ||
323 | struct sysdev_attribute *attr, | ||
324 | const char *buf, | ||
325 | size_t count) | ||
326 | { | ||
327 | unsigned long val; | ||
328 | int err; | ||
329 | |||
330 | if (!capable(CAP_SYS_ADMIN)) | ||
331 | return -EPERM; | ||
332 | err = strict_strtoul(buf, 10, &val); | ||
333 | if (err || val == 0) | ||
334 | return -EINVAL; | ||
335 | selfballoon_uphysteresis = val; | ||
336 | return count; | ||
337 | } | ||
338 | |||
339 | static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, | ||
340 | show_selfballoon_uphys, store_selfballoon_uphys); | ||
341 | |||
342 | #ifdef CONFIG_FRONTSWAP | ||
343 | SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); | ||
344 | |||
345 | static ssize_t store_frontswap_selfshrinking(struct sys_device *dev, | ||
346 | struct sysdev_attribute *attr, | ||
347 | const char *buf, | ||
348 | size_t count) | ||
349 | { | ||
350 | bool was_enabled = frontswap_selfshrinking; | ||
351 | unsigned long tmp; | ||
352 | int err; | ||
353 | |||
354 | if (!capable(CAP_SYS_ADMIN)) | ||
355 | return -EPERM; | ||
356 | err = strict_strtoul(buf, 10, &tmp); | ||
357 | if (err || ((tmp != 0) && (tmp != 1))) | ||
358 | return -EINVAL; | ||
359 | frontswap_selfshrinking = !!tmp; | ||
360 | if (!was_enabled && !xen_selfballooning_enabled && | ||
361 | frontswap_selfshrinking) | ||
362 | schedule_delayed_work(&selfballoon_worker, | ||
363 | selfballoon_interval * HZ); | ||
364 | |||
365 | return count; | ||
366 | } | ||
367 | |||
368 | static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, | ||
369 | show_frontswap_selfshrinking, store_frontswap_selfshrinking); | ||
370 | |||
371 | SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); | ||
372 | |||
373 | static ssize_t store_frontswap_inertia(struct sys_device *dev, | ||
374 | struct sysdev_attribute *attr, | ||
375 | const char *buf, | ||
376 | size_t count) | ||
377 | { | ||
378 | unsigned long val; | ||
379 | int err; | ||
380 | |||
381 | if (!capable(CAP_SYS_ADMIN)) | ||
382 | return -EPERM; | ||
383 | err = strict_strtoul(buf, 10, &val); | ||
384 | if (err || val == 0) | ||
385 | return -EINVAL; | ||
386 | frontswap_inertia = val; | ||
387 | frontswap_inertia_counter = val; | ||
388 | return count; | ||
389 | } | ||
390 | |||
391 | static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, | ||
392 | show_frontswap_inertia, store_frontswap_inertia); | ||
393 | |||
394 | SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); | ||
395 | |||
396 | static ssize_t store_frontswap_hysteresis(struct sys_device *dev, | ||
397 | struct sysdev_attribute *attr, | ||
398 | const char *buf, | ||
399 | size_t count) | ||
400 | { | ||
401 | unsigned long val; | ||
402 | int err; | ||
403 | |||
404 | if (!capable(CAP_SYS_ADMIN)) | ||
405 | return -EPERM; | ||
406 | err = strict_strtoul(buf, 10, &val); | ||
407 | if (err || val == 0) | ||
408 | return -EINVAL; | ||
409 | frontswap_hysteresis = val; | ||
410 | return count; | ||
411 | } | ||
412 | |||
413 | static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, | ||
414 | show_frontswap_hysteresis, store_frontswap_hysteresis); | ||
415 | |||
416 | #endif /* CONFIG_FRONTSWAP */ | ||
417 | |||
418 | static struct attribute *selfballoon_attrs[] = { | ||
419 | &attr_selfballooning.attr, | ||
420 | &attr_selfballoon_interval.attr, | ||
421 | &attr_selfballoon_downhysteresis.attr, | ||
422 | &attr_selfballoon_uphysteresis.attr, | ||
423 | #ifdef CONFIG_FRONTSWAP | ||
424 | &attr_frontswap_selfshrinking.attr, | ||
425 | &attr_frontswap_hysteresis.attr, | ||
426 | &attr_frontswap_inertia.attr, | ||
427 | #endif | ||
428 | NULL | ||
429 | }; | ||
430 | |||
431 | static struct attribute_group selfballoon_group = { | ||
432 | .name = "selfballoon", | ||
433 | .attrs = selfballoon_attrs | ||
434 | }; | ||
435 | #endif | ||
436 | |||
437 | int register_xen_selfballooning(struct sys_device *sysdev) | ||
438 | { | ||
439 | int error = -1; | ||
440 | |||
441 | #ifdef CONFIG_SYSFS | ||
442 | error = sysfs_create_group(&sysdev->kobj, &selfballoon_group); | ||
443 | #endif | ||
444 | return error; | ||
445 | } | ||
446 | EXPORT_SYMBOL(register_xen_selfballooning); | ||
447 | |||
448 | static int __init xen_selfballoon_init(void) | ||
449 | { | ||
450 | bool enable = false; | ||
451 | |||
452 | if (!xen_domain()) | ||
453 | return -ENODEV; | ||
454 | |||
455 | if (xen_initial_domain()) { | ||
456 | pr_info("xen/balloon: Xen selfballooning driver " | ||
457 | "disabled for domain0.\n"); | ||
458 | return -ENODEV; | ||
459 | } | ||
460 | |||
461 | xen_selfballooning_enabled = tmem_enabled && use_selfballooning; | ||
462 | if (xen_selfballooning_enabled) { | ||
463 | pr_info("xen/balloon: Initializing Xen " | ||
464 | "selfballooning driver.\n"); | ||
465 | enable = true; | ||
466 | } | ||
467 | #ifdef CONFIG_FRONTSWAP | ||
468 | frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; | ||
469 | if (frontswap_selfshrinking) { | ||
470 | pr_info("xen/balloon: Initializing frontswap " | ||
471 | "selfshrinking driver.\n"); | ||
472 | enable = true; | ||
473 | } | ||
474 | #endif | ||
475 | if (!enable) | ||
476 | return -ENODEV; | ||
477 | |||
478 | schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); | ||
479 | |||
480 | return 0; | ||
481 | } | ||
482 | |||
483 | subsys_initcall(xen_selfballoon_init); | ||
484 | |||
485 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 739769551e33..bd2f90c9ac8b 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -378,26 +378,32 @@ static void xenbus_dev_release(struct device *dev) | |||
378 | kfree(to_xenbus_device(dev)); | 378 | kfree(to_xenbus_device(dev)); |
379 | } | 379 | } |
380 | 380 | ||
381 | static ssize_t xendev_show_nodename(struct device *dev, | 381 | static ssize_t nodename_show(struct device *dev, |
382 | struct device_attribute *attr, char *buf) | 382 | struct device_attribute *attr, char *buf) |
383 | { | 383 | { |
384 | return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); | 384 | return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); |
385 | } | 385 | } |
386 | static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); | ||
387 | 386 | ||
388 | static ssize_t xendev_show_devtype(struct device *dev, | 387 | static ssize_t devtype_show(struct device *dev, |
389 | struct device_attribute *attr, char *buf) | 388 | struct device_attribute *attr, char *buf) |
390 | { | 389 | { |
391 | return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); | 390 | return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); |
392 | } | 391 | } |
393 | static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); | ||
394 | 392 | ||
395 | static ssize_t xendev_show_modalias(struct device *dev, | 393 | static ssize_t modalias_show(struct device *dev, |
396 | struct device_attribute *attr, char *buf) | 394 | struct device_attribute *attr, char *buf) |
397 | { | 395 | { |
398 | return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); | 396 | return sprintf(buf, "%s:%s\n", dev->bus->name, |
397 | to_xenbus_device(dev)->devicetype); | ||
399 | } | 398 | } |
400 | static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); | 399 | |
400 | struct device_attribute xenbus_dev_attrs[] = { | ||
401 | __ATTR_RO(nodename), | ||
402 | __ATTR_RO(devtype), | ||
403 | __ATTR_RO(modalias), | ||
404 | __ATTR_NULL | ||
405 | }; | ||
406 | EXPORT_SYMBOL_GPL(xenbus_dev_attrs); | ||
401 | 407 | ||
402 | int xenbus_probe_node(struct xen_bus_type *bus, | 408 | int xenbus_probe_node(struct xen_bus_type *bus, |
403 | const char *type, | 409 | const char *type, |
@@ -449,25 +455,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, | |||
449 | if (err) | 455 | if (err) |
450 | goto fail; | 456 | goto fail; |
451 | 457 | ||
452 | err = device_create_file(&xendev->dev, &dev_attr_nodename); | ||
453 | if (err) | ||
454 | goto fail_unregister; | ||
455 | |||
456 | err = device_create_file(&xendev->dev, &dev_attr_devtype); | ||
457 | if (err) | ||
458 | goto fail_remove_nodename; | ||
459 | |||
460 | err = device_create_file(&xendev->dev, &dev_attr_modalias); | ||
461 | if (err) | ||
462 | goto fail_remove_devtype; | ||
463 | |||
464 | return 0; | 458 | return 0; |
465 | fail_remove_devtype: | ||
466 | device_remove_file(&xendev->dev, &dev_attr_devtype); | ||
467 | fail_remove_nodename: | ||
468 | device_remove_file(&xendev->dev, &dev_attr_nodename); | ||
469 | fail_unregister: | ||
470 | device_unregister(&xendev->dev); | ||
471 | fail: | 459 | fail: |
472 | kfree(xendev); | 460 | kfree(xendev); |
473 | return err; | 461 | return err; |
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 888b9900ca08..b814935378c7 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h | |||
@@ -48,6 +48,8 @@ struct xen_bus_type | |||
48 | struct bus_type bus; | 48 | struct bus_type bus; |
49 | }; | 49 | }; |
50 | 50 | ||
51 | extern struct device_attribute xenbus_dev_attrs[]; | ||
52 | |||
51 | extern int xenbus_match(struct device *_dev, struct device_driver *_drv); | 53 | extern int xenbus_match(struct device *_dev, struct device_driver *_drv); |
52 | extern int xenbus_dev_probe(struct device *_dev); | 54 | extern int xenbus_dev_probe(struct device *_dev); |
53 | extern int xenbus_dev_remove(struct device *_dev); | 55 | extern int xenbus_dev_remove(struct device *_dev); |
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 6cf467bf63ec..60adf919d78d 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c | |||
@@ -107,6 +107,9 @@ static int xenbus_uevent_backend(struct device *dev, | |||
107 | if (xdev == NULL) | 107 | if (xdev == NULL) |
108 | return -ENODEV; | 108 | return -ENODEV; |
109 | 109 | ||
110 | if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) | ||
111 | return -ENOMEM; | ||
112 | |||
110 | /* stuff we want to pass to /sbin/hotplug */ | 113 | /* stuff we want to pass to /sbin/hotplug */ |
111 | if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) | 114 | if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) |
112 | return -ENOMEM; | 115 | return -ENOMEM; |
@@ -183,10 +186,6 @@ static void frontend_changed(struct xenbus_watch *watch, | |||
183 | xenbus_otherend_changed(watch, vec, len, 0); | 186 | xenbus_otherend_changed(watch, vec, len, 0); |
184 | } | 187 | } |
185 | 188 | ||
186 | static struct device_attribute xenbus_backend_dev_attrs[] = { | ||
187 | __ATTR_NULL | ||
188 | }; | ||
189 | |||
190 | static struct xen_bus_type xenbus_backend = { | 189 | static struct xen_bus_type xenbus_backend = { |
191 | .root = "backend", | 190 | .root = "backend", |
192 | .levels = 3, /* backend/type/<frontend>/<id> */ | 191 | .levels = 3, /* backend/type/<frontend>/<id> */ |
@@ -200,7 +199,7 @@ static struct xen_bus_type xenbus_backend = { | |||
200 | .probe = xenbus_dev_probe, | 199 | .probe = xenbus_dev_probe, |
201 | .remove = xenbus_dev_remove, | 200 | .remove = xenbus_dev_remove, |
202 | .shutdown = xenbus_dev_shutdown, | 201 | .shutdown = xenbus_dev_shutdown, |
203 | .dev_attrs = xenbus_backend_dev_attrs, | 202 | .dev_attrs = xenbus_dev_attrs, |
204 | }, | 203 | }, |
205 | }; | 204 | }; |
206 | 205 | ||
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index b6a2690c9d49..ed2ba474a560 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c | |||
@@ -81,10 +81,6 @@ static void backend_changed(struct xenbus_watch *watch, | |||
81 | xenbus_otherend_changed(watch, vec, len, 1); | 81 | xenbus_otherend_changed(watch, vec, len, 1); |
82 | } | 82 | } |
83 | 83 | ||
84 | static struct device_attribute xenbus_frontend_dev_attrs[] = { | ||
85 | __ATTR_NULL | ||
86 | }; | ||
87 | |||
88 | static const struct dev_pm_ops xenbus_pm_ops = { | 84 | static const struct dev_pm_ops xenbus_pm_ops = { |
89 | .suspend = xenbus_dev_suspend, | 85 | .suspend = xenbus_dev_suspend, |
90 | .resume = xenbus_dev_resume, | 86 | .resume = xenbus_dev_resume, |
@@ -106,7 +102,7 @@ static struct xen_bus_type xenbus_frontend = { | |||
106 | .probe = xenbus_dev_probe, | 102 | .probe = xenbus_dev_probe, |
107 | .remove = xenbus_dev_remove, | 103 | .remove = xenbus_dev_remove, |
108 | .shutdown = xenbus_dev_shutdown, | 104 | .shutdown = xenbus_dev_shutdown, |
109 | .dev_attrs = xenbus_frontend_dev_attrs, | 105 | .dev_attrs = xenbus_dev_attrs, |
110 | 106 | ||
111 | .pm = &xenbus_pm_ops, | 107 | .pm = &xenbus_pm_ops, |
112 | }, | 108 | }, |