aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 16:45:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 16:45:15 -0400
commit111ad119d1765b1bbef2629a5f2bd825caeb7e74 (patch)
tree167ee4a4e6e9276bb7178ddcce85d6860543cfb4 /drivers
parent997271cf5e12c1b38aec0764187094663501c984 (diff)
parent3a6d28b11a895d08b6b4fc6f16dd9ff995844b45 (diff)
Merge branch 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen/pciback: Have 'passthrough' option instead of XEN_PCIDEV_BACKEND_PASS and XEN_PCIDEV_BACKEND_VPCI xen/pciback: Remove the DEBUG option. xen/pciback: Drop two backends, squash and cleanup some code. xen/pciback: Print out the MSI/MSI-X (PIRQ) values xen/pciback: Don't setup an fake IRQ handler for SR-IOV devices. xen: rename pciback module to xen-pciback. xen/pciback: Fine-grain the spinlocks and fix BUG: scheduling while atomic cases. xen/pciback: Allocate IRQ handler for device that is shared with guest. xen/pciback: Disable MSI/MSI-X when reseting a device xen/pciback: guest SR-IOV support for PV guest xen/pciback: Register the owner (domain) of the PCI device. xen/pciback: Cleanup the driver based on checkpatch warnings and errors. xen/pciback: xen pci backend driver. xen: tmem: self-ballooning and frontswap-selfshrinking xen: Add module alias to autoload backend drivers xen: Populate xenbus device attributes xen: Add __attribute__((format(printf... where appropriate xen: prepare tmem shim to handle frontswap xen: allow enable use of VGA console on dom0
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/xen-blkback/xenbus.c2
-rw-r--r--drivers/xen/Kconfig46
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/tmem.c170
-rw-r--r--drivers/xen/xen-balloon.c2
-rw-r--r--drivers/xen/xen-pciback/Makefile7
-rw-r--r--drivers/xen/xen-pciback/conf_space.c438
-rw-r--r--drivers/xen/xen-pciback/conf_space.h126
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.c207
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c386
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.c140
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.h33
-rw-r--r--drivers/xen/xen-pciback/passthrough.c194
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c1376
-rw-r--r--drivers/xen/xen-pciback/pciback.h183
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c384
-rw-r--r--drivers/xen/xen-pciback/vpci.c259
-rw-r--r--drivers/xen/xen-pciback/xenbus.c749
-rw-r--r--drivers/xen/xen-selfballoon.c485
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c44
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h2
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c9
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c6
23 files changed, 5200 insertions, 52 deletions
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 6cc0db1bf522..3f129b45451a 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -684,7 +684,7 @@ again:
684 684
685 err = xenbus_switch_state(dev, XenbusStateConnected); 685 err = xenbus_switch_state(dev, XenbusStateConnected);
686 if (err) 686 if (err)
687 xenbus_dev_fatal(dev, err, "switching to Connected state", 687 xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
688 dev->nodename); 688 dev->nodename);
689 689
690 return; 690 return;
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a59638b37c1a..03bc471c3eed 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -9,6 +9,23 @@ config XEN_BALLOON
9 the system to expand the domain's memory allocation, or alternatively 9 the system to expand the domain's memory allocation, or alternatively
10 return unneeded memory to the system. 10 return unneeded memory to the system.
11 11
12config XEN_SELFBALLOONING
13 bool "Dynamically self-balloon kernel memory to target"
14 depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP
15 default n
16 help
17 Self-ballooning dynamically balloons available kernel memory driven
18 by the current usage of anonymous memory ("committed AS") and
19 controlled by various sysfs-settable parameters. Configuring
20 FRONTSWAP is highly recommended; if it is not configured, self-
21 ballooning is disabled by default but can be enabled with the
22 'selfballooning' kernel boot parameter. If FRONTSWAP is configured,
23 frontswap-selfshrinking is enabled by default but can be disabled
24 with the 'noselfshrink' kernel boot parameter; and self-ballooning
25 is enabled by default but can be disabled with the 'noselfballooning'
26 kernel boot parameter. Note that systems without a sufficiently
27 large swap device should not enable self-ballooning.
28
12config XEN_SCRUB_PAGES 29config XEN_SCRUB_PAGES
13 bool "Scrub pages before returning them to system" 30 bool "Scrub pages before returning them to system"
14 depends on XEN_BALLOON 31 depends on XEN_BALLOON
@@ -105,4 +122,33 @@ config SWIOTLB_XEN
105 depends on PCI 122 depends on PCI
106 select SWIOTLB 123 select SWIOTLB
107 124
125config XEN_TMEM
126 bool
127 default y if (CLEANCACHE || FRONTSWAP)
128 help
129 Shim to interface in-kernel Transcendent Memory hooks
130 (e.g. cleancache and frontswap) to Xen tmem hypercalls.
131
132config XEN_PCIDEV_BACKEND
133 tristate "Xen PCI-device backend driver"
134 depends on PCI && X86 && XEN
135 depends on XEN_BACKEND
136 default m
137 help
138 The PCI device backend driver allows the kernel to export arbitrary
139 PCI devices to other guests. If you select this to be a module, you
140 will need to make sure no other driver has bound to the device(s)
141 you want to make visible to other guests.
142
143 The parameter "passthrough" allows you specify how you want the PCI
144 devices to appear in the guest. You can choose the default (0) where
145 PCI topology starts at 00.00.0, or (1) for passthrough if you want
146 the PCI devices topology appear the same as in the host.
147
148 The "hide" parameter (only applicable if backend driver is compiled
149 into the kernel) allows you to bind the PCI devices to this module
150 from the default device drivers. The argument is the list of PCI BDFs:
151 xen-pciback.hide=(03:00.0)(04:00.0)
152
153 If in doubt, say m.
108endmenu 154endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index bbc18258ecc5..72bbb27d7a68 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,6 +1,5 @@
1obj-y += grant-table.o features.o events.o manage.o balloon.o 1obj-y += grant-table.o features.o events.o manage.o balloon.o
2obj-y += xenbus/ 2obj-y += xenbus/
3obj-y += tmem.o
4 3
5nostackp := $(call cc-option, -fno-stack-protector) 4nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_features.o := $(nostackp) 5CFLAGS_features.o := $(nostackp)
@@ -9,14 +8,17 @@ obj-$(CONFIG_BLOCK) += biomerge.o
9obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 8obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
10obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 9obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
11obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o 10obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
11obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o
12obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o 12obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
13obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o 13obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
14obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o 14obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
15obj-$(CONFIG_XENFS) += xenfs/ 15obj-$(CONFIG_XENFS) += xenfs/
16obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o 16obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
17obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o 17obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
18obj-$(CONFIG_XEN_TMEM) += tmem.o
18obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o 19obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
19obj-$(CONFIG_XEN_DOM0) += pci.o 20obj-$(CONFIG_XEN_DOM0) += pci.o
21obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
20 22
21xen-evtchn-y := evtchn.o 23xen-evtchn-y := evtchn.o
22xen-gntdev-y := gntdev.o 24xen-gntdev-y := gntdev.o
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 816a44959ef0..d369965e8f8a 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Xen implementation for transcendent memory (tmem) 2 * Xen implementation for transcendent memory (tmem)
3 * 3 *
4 * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. 4 * Copyright (C) 2009-2011 Oracle Corp. All rights reserved.
5 * Author: Dan Magenheimer 5 * Author: Dan Magenheimer
6 */ 6 */
7 7
@@ -9,8 +9,14 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/module.h>
12#include <linux/cleancache.h> 13#include <linux/cleancache.h>
13 14
15/* temporary ifdef until include/linux/frontswap.h is upstream */
16#ifdef CONFIG_FRONTSWAP
17#include <linux/frontswap.h>
18#endif
19
14#include <xen/xen.h> 20#include <xen/xen.h>
15#include <xen/interface/xen.h> 21#include <xen/interface/xen.h>
16#include <asm/xen/hypercall.h> 22#include <asm/xen/hypercall.h>
@@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
122 return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); 128 return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
123} 129}
124 130
125static int xen_tmem_destroy_pool(u32 pool_id) 131int tmem_enabled __read_mostly;
126{ 132EXPORT_SYMBOL(tmem_enabled);
127 struct tmem_oid oid = { { 0 } };
128
129 return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
130}
131
132int tmem_enabled;
133 133
134static int __init enable_tmem(char *s) 134static int __init enable_tmem(char *s)
135{ 135{
@@ -139,6 +139,14 @@ static int __init enable_tmem(char *s)
139 139
140__setup("tmem", enable_tmem); 140__setup("tmem", enable_tmem);
141 141
142#ifdef CONFIG_CLEANCACHE
143static int xen_tmem_destroy_pool(u32 pool_id)
144{
145 struct tmem_oid oid = { { 0 } };
146
147 return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
148}
149
142/* cleancache ops */ 150/* cleancache ops */
143 151
144static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, 152static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
@@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = {
240 .init_shared_fs = tmem_cleancache_init_shared_fs, 248 .init_shared_fs = tmem_cleancache_init_shared_fs,
241 .init_fs = tmem_cleancache_init_fs 249 .init_fs = tmem_cleancache_init_fs
242}; 250};
251#endif
243 252
244static int __init xen_tmem_init(void) 253#ifdef CONFIG_FRONTSWAP
254/* frontswap tmem operations */
255
256/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
257static int tmem_frontswap_poolid;
258
259/*
260 * Swizzling increases objects per swaptype, increasing tmem concurrency
261 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
262 */
263#define SWIZ_BITS 4
264#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
265#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
266#define iswiz(_ind) (_ind >> SWIZ_BITS)
267
268static inline struct tmem_oid oswiz(unsigned type, u32 ind)
245{ 269{
246 struct cleancache_ops old_ops; 270 struct tmem_oid oid = { .oid = { 0 } };
271 oid.oid[0] = _oswiz(type, ind);
272 return oid;
273}
247 274
275/* returns 0 if the page was successfully put into frontswap, -1 if not */
276static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
277 struct page *page)
278{
279 u64 ind64 = (u64)offset;
280 u32 ind = (u32)offset;
281 unsigned long pfn = page_to_pfn(page);
282 int pool = tmem_frontswap_poolid;
283 int ret;
284
285 if (pool < 0)
286 return -1;
287 if (ind64 != ind)
288 return -1;
289 mb(); /* ensure page is quiescent; tmem may address it with an alias */
290 ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
291 /* translate Xen tmem return values to linux semantics */
292 if (ret == 1)
293 return 0;
294 else
295 return -1;
296}
297
298/*
299 * returns 0 if the page was successfully gotten from frontswap, -1 if
300 * was not present (should never happen!)
301 */
302static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
303 struct page *page)
304{
305 u64 ind64 = (u64)offset;
306 u32 ind = (u32)offset;
307 unsigned long pfn = page_to_pfn(page);
308 int pool = tmem_frontswap_poolid;
309 int ret;
310
311 if (pool < 0)
312 return -1;
313 if (ind64 != ind)
314 return -1;
315 ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
316 /* translate Xen tmem return values to linux semantics */
317 if (ret == 1)
318 return 0;
319 else
320 return -1;
321}
322
323/* flush a single page from frontswap */
324static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset)
325{
326 u64 ind64 = (u64)offset;
327 u32 ind = (u32)offset;
328 int pool = tmem_frontswap_poolid;
329
330 if (pool < 0)
331 return;
332 if (ind64 != ind)
333 return;
334 (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind));
335}
336
337/* flush all pages from the passed swaptype */
338static void tmem_frontswap_flush_area(unsigned type)
339{
340 int pool = tmem_frontswap_poolid;
341 int ind;
342
343 if (pool < 0)
344 return;
345 for (ind = SWIZ_MASK; ind >= 0; ind--)
346 (void)xen_tmem_flush_object(pool, oswiz(type, ind));
347}
348
349static void tmem_frontswap_init(unsigned ignored)
350{
351 struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID;
352
353 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
354 if (tmem_frontswap_poolid < 0)
355 tmem_frontswap_poolid =
356 xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
357}
358
359static int __initdata use_frontswap = 1;
360
361static int __init no_frontswap(char *s)
362{
363 use_frontswap = 0;
364 return 1;
365}
366
367__setup("nofrontswap", no_frontswap);
368
369static struct frontswap_ops tmem_frontswap_ops = {
370 .put_page = tmem_frontswap_put_page,
371 .get_page = tmem_frontswap_get_page,
372 .flush_page = tmem_frontswap_flush_page,
373 .flush_area = tmem_frontswap_flush_area,
374 .init = tmem_frontswap_init
375};
376#endif
377
378static int __init xen_tmem_init(void)
379{
248 if (!xen_domain()) 380 if (!xen_domain())
249 return 0; 381 return 0;
382#ifdef CONFIG_FRONTSWAP
383 if (tmem_enabled && use_frontswap) {
384 char *s = "";
385 struct frontswap_ops old_ops =
386 frontswap_register_ops(&tmem_frontswap_ops);
387
388 tmem_frontswap_poolid = -1;
389 if (old_ops.init != NULL)
390 s = " (WARNING: frontswap_ops overridden)";
391 printk(KERN_INFO "frontswap enabled, RAM provided by "
392 "Xen Transcendent Memory\n");
393 }
394#endif
250#ifdef CONFIG_CLEANCACHE 395#ifdef CONFIG_CLEANCACHE
251 BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); 396 BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
252 if (tmem_enabled && use_cleancache) { 397 if (tmem_enabled && use_cleancache) {
253 char *s = ""; 398 char *s = "";
254 old_ops = cleancache_register_ops(&tmem_cleancache_ops); 399 struct cleancache_ops old_ops =
400 cleancache_register_ops(&tmem_cleancache_ops);
255 if (old_ops.init_fs != NULL) 401 if (old_ops.init_fs != NULL)
256 s = " (WARNING: cleancache_ops overridden)"; 402 s = " (WARNING: cleancache_ops overridden)";
257 printk(KERN_INFO "cleancache enabled, RAM provided by " 403 printk(KERN_INFO "cleancache enabled, RAM provided by "
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index a4ff225ee868..5c9dc43c1e94 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -98,6 +98,8 @@ static int __init balloon_init(void)
98 98
99 register_balloon(&balloon_sysdev); 99 register_balloon(&balloon_sysdev);
100 100
101 register_xen_selfballooning(&balloon_sysdev);
102
101 target_watch.callback = watch_target; 103 target_watch.callback = watch_target;
102 xenstore_notifier.notifier_call = balloon_init_watcher; 104 xenstore_notifier.notifier_call = balloon_init_watcher;
103 105
diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile
new file mode 100644
index 000000000000..ffe0ad3438bd
--- /dev/null
+++ b/drivers/xen/xen-pciback/Makefile
@@ -0,0 +1,7 @@
1obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
2
3xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
4xen-pciback-y += conf_space.o conf_space_header.o \
5 conf_space_capability.o \
6 conf_space_quirks.o vpci.o \
7 passthrough.o
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
new file mode 100644
index 000000000000..a8031445d94e
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -0,0 +1,438 @@
1/*
2 * PCI Backend - Functions for creating a virtual configuration space for
3 * exported PCI Devices.
4 * It's dangerous to allow PCI Driver Domains to change their
5 * device's resources (memory, i/o ports, interrupts). We need to
6 * restrict changes to certain PCI Configuration registers:
7 * BARs, INTERRUPT_PIN, most registers in the header...
8 *
9 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
10 */
11
12#include <linux/kernel.h>
13#include <linux/pci.h>
14#include "pciback.h"
15#include "conf_space.h"
16#include "conf_space_quirks.h"
17
18#define DRV_NAME "xen-pciback"
19static int permissive;
20module_param(permissive, bool, 0644);
21
22/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
23 * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
24#define DEFINE_PCI_CONFIG(op, size, type) \
25int xen_pcibk_##op##_config_##size \
26(struct pci_dev *dev, int offset, type value, void *data) \
27{ \
28 return pci_##op##_config_##size(dev, offset, value); \
29}
30
31DEFINE_PCI_CONFIG(read, byte, u8 *)
32DEFINE_PCI_CONFIG(read, word, u16 *)
33DEFINE_PCI_CONFIG(read, dword, u32 *)
34
35DEFINE_PCI_CONFIG(write, byte, u8)
36DEFINE_PCI_CONFIG(write, word, u16)
37DEFINE_PCI_CONFIG(write, dword, u32)
38
39static int conf_space_read(struct pci_dev *dev,
40 const struct config_field_entry *entry,
41 int offset, u32 *value)
42{
43 int ret = 0;
44 const struct config_field *field = entry->field;
45
46 *value = 0;
47
48 switch (field->size) {
49 case 1:
50 if (field->u.b.read)
51 ret = field->u.b.read(dev, offset, (u8 *) value,
52 entry->data);
53 break;
54 case 2:
55 if (field->u.w.read)
56 ret = field->u.w.read(dev, offset, (u16 *) value,
57 entry->data);
58 break;
59 case 4:
60 if (field->u.dw.read)
61 ret = field->u.dw.read(dev, offset, value, entry->data);
62 break;
63 }
64 return ret;
65}
66
67static int conf_space_write(struct pci_dev *dev,
68 const struct config_field_entry *entry,
69 int offset, u32 value)
70{
71 int ret = 0;
72 const struct config_field *field = entry->field;
73
74 switch (field->size) {
75 case 1:
76 if (field->u.b.write)
77 ret = field->u.b.write(dev, offset, (u8) value,
78 entry->data);
79 break;
80 case 2:
81 if (field->u.w.write)
82 ret = field->u.w.write(dev, offset, (u16) value,
83 entry->data);
84 break;
85 case 4:
86 if (field->u.dw.write)
87 ret = field->u.dw.write(dev, offset, value,
88 entry->data);
89 break;
90 }
91 return ret;
92}
93
94static inline u32 get_mask(int size)
95{
96 if (size == 1)
97 return 0xff;
98 else if (size == 2)
99 return 0xffff;
100 else
101 return 0xffffffff;
102}
103
104static inline int valid_request(int offset, int size)
105{
106 /* Validate request (no un-aligned requests) */
107 if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
108 return 1;
109 return 0;
110}
111
112static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
113 int offset)
114{
115 if (offset >= 0) {
116 new_val_mask <<= (offset * 8);
117 new_val <<= (offset * 8);
118 } else {
119 new_val_mask >>= (offset * -8);
120 new_val >>= (offset * -8);
121 }
122 val = (val & ~new_val_mask) | (new_val & new_val_mask);
123
124 return val;
125}
126
127static int pcibios_err_to_errno(int err)
128{
129 switch (err) {
130 case PCIBIOS_SUCCESSFUL:
131 return XEN_PCI_ERR_success;
132 case PCIBIOS_DEVICE_NOT_FOUND:
133 return XEN_PCI_ERR_dev_not_found;
134 case PCIBIOS_BAD_REGISTER_NUMBER:
135 return XEN_PCI_ERR_invalid_offset;
136 case PCIBIOS_FUNC_NOT_SUPPORTED:
137 return XEN_PCI_ERR_not_implemented;
138 case PCIBIOS_SET_FAILED:
139 return XEN_PCI_ERR_access_denied;
140 }
141 return err;
142}
143
144int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
145 u32 *ret_val)
146{
147 int err = 0;
148 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
149 const struct config_field_entry *cfg_entry;
150 const struct config_field *field;
151 int req_start, req_end, field_start, field_end;
152 /* if read fails for any reason, return 0
153 * (as if device didn't respond) */
154 u32 value = 0, tmp_val;
155
156 if (unlikely(verbose_request))
157 printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
158 pci_name(dev), size, offset);
159
160 if (!valid_request(offset, size)) {
161 err = XEN_PCI_ERR_invalid_offset;
162 goto out;
163 }
164
165 /* Get the real value first, then modify as appropriate */
166 switch (size) {
167 case 1:
168 err = pci_read_config_byte(dev, offset, (u8 *) &value);
169 break;
170 case 2:
171 err = pci_read_config_word(dev, offset, (u16 *) &value);
172 break;
173 case 4:
174 err = pci_read_config_dword(dev, offset, &value);
175 break;
176 }
177
178 list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
179 field = cfg_entry->field;
180
181 req_start = offset;
182 req_end = offset + size;
183 field_start = OFFSET(cfg_entry);
184 field_end = OFFSET(cfg_entry) + field->size;
185
186 if ((req_start >= field_start && req_start < field_end)
187 || (req_end > field_start && req_end <= field_end)) {
188 err = conf_space_read(dev, cfg_entry, field_start,
189 &tmp_val);
190 if (err)
191 goto out;
192
193 value = merge_value(value, tmp_val,
194 get_mask(field->size),
195 field_start - req_start);
196 }
197 }
198
199out:
200 if (unlikely(verbose_request))
201 printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
202 pci_name(dev), size, offset, value);
203
204 *ret_val = value;
205 return pcibios_err_to_errno(err);
206}
207
208int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
209{
210 int err = 0, handled = 0;
211 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
212 const struct config_field_entry *cfg_entry;
213 const struct config_field *field;
214 u32 tmp_val;
215 int req_start, req_end, field_start, field_end;
216
217 if (unlikely(verbose_request))
218 printk(KERN_DEBUG
219 DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
220 pci_name(dev), size, offset, value);
221
222 if (!valid_request(offset, size))
223 return XEN_PCI_ERR_invalid_offset;
224
225 list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
226 field = cfg_entry->field;
227
228 req_start = offset;
229 req_end = offset + size;
230 field_start = OFFSET(cfg_entry);
231 field_end = OFFSET(cfg_entry) + field->size;
232
233 if ((req_start >= field_start && req_start < field_end)
234 || (req_end > field_start && req_end <= field_end)) {
235 tmp_val = 0;
236
237 err = xen_pcibk_config_read(dev, field_start,
238 field->size, &tmp_val);
239 if (err)
240 break;
241
242 tmp_val = merge_value(tmp_val, value, get_mask(size),
243 req_start - field_start);
244
245 err = conf_space_write(dev, cfg_entry, field_start,
246 tmp_val);
247
248 /* handled is set true here, but not every byte
249 * may have been written! Properly detecting if
250 * every byte is handled is unnecessary as the
251 * flag is used to detect devices that need
252 * special helpers to work correctly.
253 */
254 handled = 1;
255 }
256 }
257
258 if (!handled && !err) {
259 /* By default, anything not specificially handled above is
260 * read-only. The permissive flag changes this behavior so
261 * that anything not specifically handled above is writable.
262 * This means that some fields may still be read-only because
263 * they have entries in the config_field list that intercept
264 * the write and do nothing. */
265 if (dev_data->permissive || permissive) {
266 switch (size) {
267 case 1:
268 err = pci_write_config_byte(dev, offset,
269 (u8) value);
270 break;
271 case 2:
272 err = pci_write_config_word(dev, offset,
273 (u16) value);
274 break;
275 case 4:
276 err = pci_write_config_dword(dev, offset,
277 (u32) value);
278 break;
279 }
280 } else if (!dev_data->warned_on_write) {
281 dev_data->warned_on_write = 1;
282 dev_warn(&dev->dev, "Driver tried to write to a "
283 "read-only configuration space field at offset"
284 " 0x%x, size %d. This may be harmless, but if "
285 "you have problems with your device:\n"
286 "1) see permissive attribute in sysfs\n"
287 "2) report problems to the xen-devel "
288 "mailing list along with details of your "
289 "device obtained from lspci.\n", offset, size);
290 }
291 }
292
293 return pcibios_err_to_errno(err);
294}
295
296void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
297{
298 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
299 struct config_field_entry *cfg_entry, *t;
300 const struct config_field *field;
301
302 dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
303 "configuration space fields\n");
304 if (!dev_data)
305 return;
306
307 list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
308 field = cfg_entry->field;
309
310 if (field->clean) {
311 field->clean((struct config_field *)field);
312
313 kfree(cfg_entry->data);
314
315 list_del(&cfg_entry->list);
316 kfree(cfg_entry);
317 }
318
319 }
320}
321
322void xen_pcibk_config_reset_dev(struct pci_dev *dev)
323{
324 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
325 const struct config_field_entry *cfg_entry;
326 const struct config_field *field;
327
328 dev_dbg(&dev->dev, "resetting virtual configuration space\n");
329 if (!dev_data)
330 return;
331
332 list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
333 field = cfg_entry->field;
334
335 if (field->reset)
336 field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
337 }
338}
339
340void xen_pcibk_config_free_dev(struct pci_dev *dev)
341{
342 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
343 struct config_field_entry *cfg_entry, *t;
344 const struct config_field *field;
345
346 dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
347 if (!dev_data)
348 return;
349
350 list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
351 list_del(&cfg_entry->list);
352
353 field = cfg_entry->field;
354
355 if (field->release)
356 field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
357
358 kfree(cfg_entry);
359 }
360}
361
362int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
363 const struct config_field *field,
364 unsigned int base_offset)
365{
366 int err = 0;
367 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
368 struct config_field_entry *cfg_entry;
369 void *tmp;
370
371 cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
372 if (!cfg_entry) {
373 err = -ENOMEM;
374 goto out;
375 }
376
377 cfg_entry->data = NULL;
378 cfg_entry->field = field;
379 cfg_entry->base_offset = base_offset;
380
381 /* silently ignore duplicate fields */
382 err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry));
383 if (err)
384 goto out;
385
386 if (field->init) {
387 tmp = field->init(dev, OFFSET(cfg_entry));
388
389 if (IS_ERR(tmp)) {
390 err = PTR_ERR(tmp);
391 goto out;
392 }
393
394 cfg_entry->data = tmp;
395 }
396
397 dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
398 OFFSET(cfg_entry));
399 list_add_tail(&cfg_entry->list, &dev_data->config_fields);
400
401out:
402 if (err)
403 kfree(cfg_entry);
404
405 return err;
406}
407
408/* This sets up the device's virtual configuration space to keep track of
409 * certain registers (like the base address registers (BARs) so that we can
410 * keep the client from manipulating them directly.
411 */
412int xen_pcibk_config_init_dev(struct pci_dev *dev)
413{
414 int err = 0;
415 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
416
417 dev_dbg(&dev->dev, "initializing virtual configuration space\n");
418
419 INIT_LIST_HEAD(&dev_data->config_fields);
420
421 err = xen_pcibk_config_header_add_fields(dev);
422 if (err)
423 goto out;
424
425 err = xen_pcibk_config_capability_add_fields(dev);
426 if (err)
427 goto out;
428
429 err = xen_pcibk_config_quirks_init(dev);
430
431out:
432 return err;
433}
434
435int xen_pcibk_config_init(void)
436{
437 return xen_pcibk_config_capability_init();
438}
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h
new file mode 100644
index 000000000000..e56c934ad137
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space.h
@@ -0,0 +1,126 @@
1/*
2 * PCI Backend - Common data structures for overriding the configuration space
3 *
4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
5 */
6
7#ifndef __XEN_PCIBACK_CONF_SPACE_H__
8#define __XEN_PCIBACK_CONF_SPACE_H__
9
10#include <linux/list.h>
11#include <linux/err.h>
12
13/* conf_field_init can return an errno in a ptr with ERR_PTR() */
14typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
15typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
16typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
17
18typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
19 void *data);
20typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
21 void *data);
22typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
23 void *data);
24typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
25 void *data);
26typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
27 void *data);
28typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
29 void *data);
30
31/* These are the fields within the configuration space which we
32 * are interested in intercepting reads/writes to and changing their
33 * values.
34 */
35struct config_field {
36 unsigned int offset;
37 unsigned int size;
38 unsigned int mask;
39 conf_field_init init;
40 conf_field_reset reset;
41 conf_field_free release;
42 void (*clean) (struct config_field *field);
43 union {
44 struct {
45 conf_dword_write write;
46 conf_dword_read read;
47 } dw;
48 struct {
49 conf_word_write write;
50 conf_word_read read;
51 } w;
52 struct {
53 conf_byte_write write;
54 conf_byte_read read;
55 } b;
56 } u;
57 struct list_head list;
58};
59
60struct config_field_entry {
61 struct list_head list;
62 const struct config_field *field;
63 unsigned int base_offset;
64 void *data;
65};
66
67#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
68
69/* Add fields to a device - the add_fields macro expects to get a pointer to
70 * the first entry in an array (of which the ending is marked by size==0)
71 */
72int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
73 const struct config_field *field,
74 unsigned int offset);
75
76static inline int xen_pcibk_config_add_field(struct pci_dev *dev,
77 const struct config_field *field)
78{
79 return xen_pcibk_config_add_field_offset(dev, field, 0);
80}
81
82static inline int xen_pcibk_config_add_fields(struct pci_dev *dev,
83 const struct config_field *field)
84{
85 int i, err = 0;
86 for (i = 0; field[i].size != 0; i++) {
87 err = xen_pcibk_config_add_field(dev, &field[i]);
88 if (err)
89 break;
90 }
91 return err;
92}
93
94static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev,
95 const struct config_field *field,
96 unsigned int offset)
97{
98 int i, err = 0;
99 for (i = 0; field[i].size != 0; i++) {
100 err = xen_pcibk_config_add_field_offset(dev, &field[i], offset);
101 if (err)
102 break;
103 }
104 return err;
105}
106
107/* Read/Write the real configuration space */
108int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
109 void *data);
110int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value,
111 void *data);
112int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
113 void *data);
114int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value,
115 void *data);
116int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value,
117 void *data);
118int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value,
119 void *data);
120
121int xen_pcibk_config_capability_init(void);
122
123int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
124int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
125
126#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
new file mode 100644
index 000000000000..7f83e9083e9d
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -0,0 +1,207 @@
1/*
2 * PCI Backend - Handles the virtual fields found on the capability lists
3 * in the configuration space.
4 *
5 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
6 */
7
8#include <linux/kernel.h>
9#include <linux/pci.h>
10#include "pciback.h"
11#include "conf_space.h"
12
13static LIST_HEAD(capabilities);
14struct xen_pcibk_config_capability {
15 struct list_head cap_list;
16
17 int capability;
18
19 /* If the device has the capability found above, add these fields */
20 const struct config_field *fields;
21};
22
23static const struct config_field caplist_header[] = {
24 {
25 .offset = PCI_CAP_LIST_ID,
26 .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
27 .u.w.read = xen_pcibk_read_config_word,
28 .u.w.write = NULL,
29 },
30 {}
31};
32
33static inline void register_capability(struct xen_pcibk_config_capability *cap)
34{
35 list_add_tail(&cap->cap_list, &capabilities);
36}
37
38int xen_pcibk_config_capability_add_fields(struct pci_dev *dev)
39{
40 int err = 0;
41 struct xen_pcibk_config_capability *cap;
42 int cap_offset;
43
44 list_for_each_entry(cap, &capabilities, cap_list) {
45 cap_offset = pci_find_capability(dev, cap->capability);
46 if (cap_offset) {
47 dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
48 cap->capability, cap_offset);
49
50 err = xen_pcibk_config_add_fields_offset(dev,
51 caplist_header,
52 cap_offset);
53 if (err)
54 goto out;
55 err = xen_pcibk_config_add_fields_offset(dev,
56 cap->fields,
57 cap_offset);
58 if (err)
59 goto out;
60 }
61 }
62
63out:
64 return err;
65}
66
67static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
68 void *data)
69{
70 /* Disallow writes to the vital product data */
71 if (value & PCI_VPD_ADDR_F)
72 return PCIBIOS_SET_FAILED;
73 else
74 return pci_write_config_word(dev, offset, value);
75}
76
77static const struct config_field caplist_vpd[] = {
78 {
79 .offset = PCI_VPD_ADDR,
80 .size = 2,
81 .u.w.read = xen_pcibk_read_config_word,
82 .u.w.write = vpd_address_write,
83 },
84 {
85 .offset = PCI_VPD_DATA,
86 .size = 4,
87 .u.dw.read = xen_pcibk_read_config_dword,
88 .u.dw.write = NULL,
89 },
90 {}
91};
92
93static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
94 void *data)
95{
96 int err;
97 u16 real_value;
98
99 err = pci_read_config_word(dev, offset, &real_value);
100 if (err)
101 goto out;
102
103 *value = real_value & ~PCI_PM_CAP_PME_MASK;
104
105out:
106 return err;
107}
108
109/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
110 * Can't allow driver domain to enable PMEs - they're shared */
111#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
112
113static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
114 void *data)
115{
116 int err;
117 u16 old_value;
118 pci_power_t new_state, old_state;
119
120 err = pci_read_config_word(dev, offset, &old_value);
121 if (err)
122 goto out;
123
124 old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
125 new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
126
127 new_value &= PM_OK_BITS;
128 if ((old_value & PM_OK_BITS) != new_value) {
129 new_value = (old_value & ~PM_OK_BITS) | new_value;
130 err = pci_write_config_word(dev, offset, new_value);
131 if (err)
132 goto out;
133 }
134
135 /* Let pci core handle the power management change */
136 dev_dbg(&dev->dev, "set power state to %x\n", new_state);
137 err = pci_set_power_state(dev, new_state);
138 if (err) {
139 err = PCIBIOS_SET_FAILED;
140 goto out;
141 }
142
143 out:
144 return err;
145}
146
147/* Ensure PMEs are disabled */
148static void *pm_ctrl_init(struct pci_dev *dev, int offset)
149{
150 int err;
151 u16 value;
152
153 err = pci_read_config_word(dev, offset, &value);
154 if (err)
155 goto out;
156
157 if (value & PCI_PM_CTRL_PME_ENABLE) {
158 value &= ~PCI_PM_CTRL_PME_ENABLE;
159 err = pci_write_config_word(dev, offset, value);
160 }
161
162out:
163 return ERR_PTR(err);
164}
165
166static const struct config_field caplist_pm[] = {
167 {
168 .offset = PCI_PM_PMC,
169 .size = 2,
170 .u.w.read = pm_caps_read,
171 },
172 {
173 .offset = PCI_PM_CTRL,
174 .size = 2,
175 .init = pm_ctrl_init,
176 .u.w.read = xen_pcibk_read_config_word,
177 .u.w.write = pm_ctrl_write,
178 },
179 {
180 .offset = PCI_PM_PPB_EXTENSIONS,
181 .size = 1,
182 .u.b.read = xen_pcibk_read_config_byte,
183 },
184 {
185 .offset = PCI_PM_DATA_REGISTER,
186 .size = 1,
187 .u.b.read = xen_pcibk_read_config_byte,
188 },
189 {}
190};
191
192static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
193 .capability = PCI_CAP_ID_PM,
194 .fields = caplist_pm,
195};
196static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = {
197 .capability = PCI_CAP_ID_VPD,
198 .fields = caplist_vpd,
199};
200
201int xen_pcibk_config_capability_init(void)
202{
203 register_capability(&xen_pcibk_config_capability_vpd);
204 register_capability(&xen_pcibk_config_capability_pm);
205
206 return 0;
207}
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
new file mode 100644
index 000000000000..da3cbdfcb5dc
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -0,0 +1,386 @@
1/*
2 * PCI Backend - Handles the virtual fields in the configuration space headers.
3 *
4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
5 */
6
7#include <linux/kernel.h>
8#include <linux/pci.h>
9#include "pciback.h"
10#include "conf_space.h"
11
12struct pci_bar_info {
13 u32 val;
14 u32 len_val;
15 int which;
16};
17
18#define DRV_NAME "xen-pciback"
19#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
20#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
21
22static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
23{
24 int i;
25 int ret;
26
27 ret = xen_pcibk_read_config_word(dev, offset, value, data);
28 if (!atomic_read(&dev->enable_cnt))
29 return ret;
30
31 for (i = 0; i < PCI_ROM_RESOURCE; i++) {
32 if (dev->resource[i].flags & IORESOURCE_IO)
33 *value |= PCI_COMMAND_IO;
34 if (dev->resource[i].flags & IORESOURCE_MEM)
35 *value |= PCI_COMMAND_MEMORY;
36 }
37
38 return ret;
39}
40
41static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
42{
43 struct xen_pcibk_dev_data *dev_data;
44 int err;
45
46 dev_data = pci_get_drvdata(dev);
47 if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
48 if (unlikely(verbose_request))
49 printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
50 pci_name(dev));
51 err = pci_enable_device(dev);
52 if (err)
53 return err;
54 if (dev_data)
55 dev_data->enable_intx = 1;
56 } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
57 if (unlikely(verbose_request))
58 printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
59 pci_name(dev));
60 pci_disable_device(dev);
61 if (dev_data)
62 dev_data->enable_intx = 0;
63 }
64
65 if (!dev->is_busmaster && is_master_cmd(value)) {
66 if (unlikely(verbose_request))
67 printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
68 pci_name(dev));
69 pci_set_master(dev);
70 }
71
72 if (value & PCI_COMMAND_INVALIDATE) {
73 if (unlikely(verbose_request))
74 printk(KERN_DEBUG
75 DRV_NAME ": %s: enable memory-write-invalidate\n",
76 pci_name(dev));
77 err = pci_set_mwi(dev);
78 if (err) {
79 printk(KERN_WARNING
80 DRV_NAME ": %s: cannot enable "
81 "memory-write-invalidate (%d)\n",
82 pci_name(dev), err);
83 value &= ~PCI_COMMAND_INVALIDATE;
84 }
85 }
86
87 return pci_write_config_word(dev, offset, value);
88}
89
90static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
91{
92 struct pci_bar_info *bar = data;
93
94 if (unlikely(!bar)) {
95 printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
96 pci_name(dev));
97 return XEN_PCI_ERR_op_failed;
98 }
99
100 /* A write to obtain the length must happen as a 32-bit write.
101 * This does not (yet) support writing individual bytes
102 */
103 if (value == ~PCI_ROM_ADDRESS_ENABLE)
104 bar->which = 1;
105 else {
106 u32 tmpval;
107 pci_read_config_dword(dev, offset, &tmpval);
108 if (tmpval != bar->val && value == bar->val) {
109 /* Allow restoration of bar value. */
110 pci_write_config_dword(dev, offset, bar->val);
111 }
112 bar->which = 0;
113 }
114
115 /* Do we need to support enabling/disabling the rom address here? */
116
117 return 0;
118}
119
120/* For the BARs, only allow writes which write ~0 or
121 * the correct resource information
122 * (Needed for when the driver probes the resource usage)
123 */
124static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
125{
126 struct pci_bar_info *bar = data;
127
128 if (unlikely(!bar)) {
129 printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
130 pci_name(dev));
131 return XEN_PCI_ERR_op_failed;
132 }
133
134 /* A write to obtain the length must happen as a 32-bit write.
135 * This does not (yet) support writing individual bytes
136 */
137 if (value == ~0)
138 bar->which = 1;
139 else {
140 u32 tmpval;
141 pci_read_config_dword(dev, offset, &tmpval);
142 if (tmpval != bar->val && value == bar->val) {
143 /* Allow restoration of bar value. */
144 pci_write_config_dword(dev, offset, bar->val);
145 }
146 bar->which = 0;
147 }
148
149 return 0;
150}
151
152static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
153{
154 struct pci_bar_info *bar = data;
155
156 if (unlikely(!bar)) {
157 printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
158 pci_name(dev));
159 return XEN_PCI_ERR_op_failed;
160 }
161
162 *value = bar->which ? bar->len_val : bar->val;
163
164 return 0;
165}
166
167static inline void read_dev_bar(struct pci_dev *dev,
168 struct pci_bar_info *bar_info, int offset,
169 u32 len_mask)
170{
171 int pos;
172 struct resource *res = dev->resource;
173
174 if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
175 pos = PCI_ROM_RESOURCE;
176 else {
177 pos = (offset - PCI_BASE_ADDRESS_0) / 4;
178 if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
179 PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
180 (PCI_BASE_ADDRESS_SPACE_MEMORY |
181 PCI_BASE_ADDRESS_MEM_TYPE_64))) {
182 bar_info->val = res[pos - 1].start >> 32;
183 bar_info->len_val = res[pos - 1].end >> 32;
184 return;
185 }
186 }
187
188 bar_info->val = res[pos].start |
189 (res[pos].flags & PCI_REGION_FLAG_MASK);
190 bar_info->len_val = res[pos].end - res[pos].start + 1;
191}
192
193static void *bar_init(struct pci_dev *dev, int offset)
194{
195 struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
196
197 if (!bar)
198 return ERR_PTR(-ENOMEM);
199
200 read_dev_bar(dev, bar, offset, ~0);
201 bar->which = 0;
202
203 return bar;
204}
205
206static void *rom_init(struct pci_dev *dev, int offset)
207{
208 struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
209
210 if (!bar)
211 return ERR_PTR(-ENOMEM);
212
213 read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
214 bar->which = 0;
215
216 return bar;
217}
218
219static void bar_reset(struct pci_dev *dev, int offset, void *data)
220{
221 struct pci_bar_info *bar = data;
222
223 bar->which = 0;
224}
225
226static void bar_release(struct pci_dev *dev, int offset, void *data)
227{
228 kfree(data);
229}
230
231static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset,
232 u16 *value, void *data)
233{
234 *value = dev->vendor;
235
236 return 0;
237}
238
239static int xen_pcibk_read_device(struct pci_dev *dev, int offset,
240 u16 *value, void *data)
241{
242 *value = dev->device;
243
244 return 0;
245}
246
247static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
248 void *data)
249{
250 *value = (u8) dev->irq;
251
252 return 0;
253}
254
255static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
256{
257 u8 cur_value;
258 int err;
259
260 err = pci_read_config_byte(dev, offset, &cur_value);
261 if (err)
262 goto out;
263
264 if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
265 || value == PCI_BIST_START)
266 err = pci_write_config_byte(dev, offset, value);
267
268out:
269 return err;
270}
271
272static const struct config_field header_common[] = {
273 {
274 .offset = PCI_VENDOR_ID,
275 .size = 2,
276 .u.w.read = xen_pcibk_read_vendor,
277 },
278 {
279 .offset = PCI_DEVICE_ID,
280 .size = 2,
281 .u.w.read = xen_pcibk_read_device,
282 },
283 {
284 .offset = PCI_COMMAND,
285 .size = 2,
286 .u.w.read = command_read,
287 .u.w.write = command_write,
288 },
289 {
290 .offset = PCI_INTERRUPT_LINE,
291 .size = 1,
292 .u.b.read = interrupt_read,
293 },
294 {
295 .offset = PCI_INTERRUPT_PIN,
296 .size = 1,
297 .u.b.read = xen_pcibk_read_config_byte,
298 },
299 {
300 /* Any side effects of letting driver domain control cache line? */
301 .offset = PCI_CACHE_LINE_SIZE,
302 .size = 1,
303 .u.b.read = xen_pcibk_read_config_byte,
304 .u.b.write = xen_pcibk_write_config_byte,
305 },
306 {
307 .offset = PCI_LATENCY_TIMER,
308 .size = 1,
309 .u.b.read = xen_pcibk_read_config_byte,
310 },
311 {
312 .offset = PCI_BIST,
313 .size = 1,
314 .u.b.read = xen_pcibk_read_config_byte,
315 .u.b.write = bist_write,
316 },
317 {}
318};
319
320#define CFG_FIELD_BAR(reg_offset) \
321 { \
322 .offset = reg_offset, \
323 .size = 4, \
324 .init = bar_init, \
325 .reset = bar_reset, \
326 .release = bar_release, \
327 .u.dw.read = bar_read, \
328 .u.dw.write = bar_write, \
329 }
330
331#define CFG_FIELD_ROM(reg_offset) \
332 { \
333 .offset = reg_offset, \
334 .size = 4, \
335 .init = rom_init, \
336 .reset = bar_reset, \
337 .release = bar_release, \
338 .u.dw.read = bar_read, \
339 .u.dw.write = rom_write, \
340 }
341
342static const struct config_field header_0[] = {
343 CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
344 CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
345 CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
346 CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
347 CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
348 CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
349 CFG_FIELD_ROM(PCI_ROM_ADDRESS),
350 {}
351};
352
353static const struct config_field header_1[] = {
354 CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
355 CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
356 CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
357 {}
358};
359
360int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
361{
362 int err;
363
364 err = xen_pcibk_config_add_fields(dev, header_common);
365 if (err)
366 goto out;
367
368 switch (dev->hdr_type) {
369 case PCI_HEADER_TYPE_NORMAL:
370 err = xen_pcibk_config_add_fields(dev, header_0);
371 break;
372
373 case PCI_HEADER_TYPE_BRIDGE:
374 err = xen_pcibk_config_add_fields(dev, header_1);
375 break;
376
377 default:
378 err = -EINVAL;
379 printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n",
380 pci_name(dev), dev->hdr_type);
381 break;
382 }
383
384out:
385 return err;
386}
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c
new file mode 100644
index 000000000000..921a889e65eb
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_quirks.c
@@ -0,0 +1,140 @@
1/*
2 * PCI Backend - Handle special overlays for broken devices.
3 *
4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
5 * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
6 */
7
8#include <linux/kernel.h>
9#include <linux/pci.h>
10#include "pciback.h"
11#include "conf_space.h"
12#include "conf_space_quirks.h"
13
14LIST_HEAD(xen_pcibk_quirks);
15#define DRV_NAME "xen-pciback"
16static inline const struct pci_device_id *
17match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
18{
19 if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
20 (id->device == PCI_ANY_ID || id->device == dev->device) &&
21 (id->subvendor == PCI_ANY_ID ||
22 id->subvendor == dev->subsystem_vendor) &&
23 (id->subdevice == PCI_ANY_ID ||
24 id->subdevice == dev->subsystem_device) &&
25 !((id->class ^ dev->class) & id->class_mask))
26 return id;
27 return NULL;
28}
29
30static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
31{
32 struct xen_pcibk_config_quirk *tmp_quirk;
33
34 list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list)
35 if (match_one_device(&tmp_quirk->devid, dev) != NULL)
36 goto out;
37 tmp_quirk = NULL;
38 printk(KERN_DEBUG DRV_NAME
39 ":quirk didn't match any device xen_pciback knows about\n");
40out:
41 return tmp_quirk;
42}
43
44static inline void register_quirk(struct xen_pcibk_config_quirk *quirk)
45{
46 list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks);
47}
48
49int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg)
50{
51 int ret = 0;
52 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
53 struct config_field_entry *cfg_entry;
54
55 list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
56 if (OFFSET(cfg_entry) == reg) {
57 ret = 1;
58 break;
59 }
60 }
61 return ret;
62}
63
64int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
65 *field)
66{
67 int err = 0;
68
69 switch (field->size) {
70 case 1:
71 field->u.b.read = xen_pcibk_read_config_byte;
72 field->u.b.write = xen_pcibk_write_config_byte;
73 break;
74 case 2:
75 field->u.w.read = xen_pcibk_read_config_word;
76 field->u.w.write = xen_pcibk_write_config_word;
77 break;
78 case 4:
79 field->u.dw.read = xen_pcibk_read_config_dword;
80 field->u.dw.write = xen_pcibk_write_config_dword;
81 break;
82 default:
83 err = -EINVAL;
84 goto out;
85 }
86
87 xen_pcibk_config_add_field(dev, field);
88
89out:
90 return err;
91}
92
93int xen_pcibk_config_quirks_init(struct pci_dev *dev)
94{
95 struct xen_pcibk_config_quirk *quirk;
96 int ret = 0;
97
98 quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
99 if (!quirk) {
100 ret = -ENOMEM;
101 goto out;
102 }
103
104 quirk->devid.vendor = dev->vendor;
105 quirk->devid.device = dev->device;
106 quirk->devid.subvendor = dev->subsystem_vendor;
107 quirk->devid.subdevice = dev->subsystem_device;
108 quirk->devid.class = 0;
109 quirk->devid.class_mask = 0;
110 quirk->devid.driver_data = 0UL;
111
112 quirk->pdev = dev;
113
114 register_quirk(quirk);
115out:
116 return ret;
117}
118
119void xen_pcibk_config_field_free(struct config_field *field)
120{
121 kfree(field);
122}
123
124int xen_pcibk_config_quirk_release(struct pci_dev *dev)
125{
126 struct xen_pcibk_config_quirk *quirk;
127 int ret = 0;
128
129 quirk = xen_pcibk_find_quirk(dev);
130 if (!quirk) {
131 ret = -ENXIO;
132 goto out;
133 }
134
135 list_del(&quirk->quirks_list);
136 kfree(quirk);
137
138out:
139 return ret;
140}
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.h b/drivers/xen/xen-pciback/conf_space_quirks.h
new file mode 100644
index 000000000000..cfcc517e4570
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_quirks.h
@@ -0,0 +1,33 @@
1/*
2 * PCI Backend - Data structures for special overlays for broken devices.
3 *
4 * Ryan Wilson <hap9@epoch.ncsc.mil>
5 * Chris Bookholt <hap10@epoch.ncsc.mil>
6 */
7
8#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
9#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
10
11#include <linux/pci.h>
12#include <linux/list.h>
13
14struct xen_pcibk_config_quirk {
15 struct list_head quirks_list;
16 struct pci_device_id devid;
17 struct pci_dev *pdev;
18};
19
20int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
21 *field);
22
23int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg);
24
25int xen_pcibk_config_quirks_init(struct pci_dev *dev);
26
27void xen_pcibk_config_field_free(struct config_field *field);
28
29int xen_pcibk_config_quirk_release(struct pci_dev *dev);
30
31int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg);
32
33#endif
diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
new file mode 100644
index 000000000000..1d32a9a42c01
--- /dev/null
+++ b/drivers/xen/xen-pciback/passthrough.c
@@ -0,0 +1,194 @@
1/*
2 * PCI Backend - Provides restricted access to the real PCI bus topology
3 * to the frontend
4 *
5 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
6 */
7
8#include <linux/list.h>
9#include <linux/pci.h>
10#include <linux/spinlock.h>
11#include "pciback.h"
12
13struct passthrough_dev_data {
14 /* Access to dev_list must be protected by lock */
15 struct list_head dev_list;
16 spinlock_t lock;
17};
18
19static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
20 unsigned int domain,
21 unsigned int bus,
22 unsigned int devfn)
23{
24 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
25 struct pci_dev_entry *dev_entry;
26 struct pci_dev *dev = NULL;
27 unsigned long flags;
28
29 spin_lock_irqsave(&dev_data->lock, flags);
30
31 list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
32 if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
33 && bus == (unsigned int)dev_entry->dev->bus->number
34 && devfn == dev_entry->dev->devfn) {
35 dev = dev_entry->dev;
36 break;
37 }
38 }
39
40 spin_unlock_irqrestore(&dev_data->lock, flags);
41
42 return dev;
43}
44
45static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
46 struct pci_dev *dev,
47 int devid, publish_pci_dev_cb publish_cb)
48{
49 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
50 struct pci_dev_entry *dev_entry;
51 unsigned long flags;
52 unsigned int domain, bus, devfn;
53 int err;
54
55 dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
56 if (!dev_entry)
57 return -ENOMEM;
58 dev_entry->dev = dev;
59
60 spin_lock_irqsave(&dev_data->lock, flags);
61 list_add_tail(&dev_entry->list, &dev_data->dev_list);
62 spin_unlock_irqrestore(&dev_data->lock, flags);
63
64 /* Publish this device. */
65 domain = (unsigned int)pci_domain_nr(dev->bus);
66 bus = (unsigned int)dev->bus->number;
67 devfn = dev->devfn;
68 err = publish_cb(pdev, domain, bus, devfn, devid);
69
70 return err;
71}
72
73static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
74 struct pci_dev *dev)
75{
76 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
77 struct pci_dev_entry *dev_entry, *t;
78 struct pci_dev *found_dev = NULL;
79 unsigned long flags;
80
81 spin_lock_irqsave(&dev_data->lock, flags);
82
83 list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
84 if (dev_entry->dev == dev) {
85 list_del(&dev_entry->list);
86 found_dev = dev_entry->dev;
87 kfree(dev_entry);
88 }
89 }
90
91 spin_unlock_irqrestore(&dev_data->lock, flags);
92
93 if (found_dev)
94 pcistub_put_pci_dev(found_dev);
95}
96
97static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
98{
99 struct passthrough_dev_data *dev_data;
100
101 dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
102 if (!dev_data)
103 return -ENOMEM;
104
105 spin_lock_init(&dev_data->lock);
106
107 INIT_LIST_HEAD(&dev_data->dev_list);
108
109 pdev->pci_dev_data = dev_data;
110
111 return 0;
112}
113
114static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
115 publish_pci_root_cb publish_root_cb)
116{
117 int err = 0;
118 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
119 struct pci_dev_entry *dev_entry, *e, *tmp;
120 struct pci_dev *dev;
121 int found;
122 unsigned int domain, bus;
123
124 spin_lock(&dev_data->lock);
125
126 list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
127 /* Only publish this device as a root if none of its
128 * parent bridges are exported
129 */
130 found = 0;
131 dev = dev_entry->dev->bus->self;
132 for (; !found && dev != NULL; dev = dev->bus->self) {
133 list_for_each_entry(e, &dev_data->dev_list, list) {
134 if (dev == e->dev) {
135 found = 1;
136 break;
137 }
138 }
139 }
140
141 domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
142 bus = (unsigned int)dev_entry->dev->bus->number;
143
144 if (!found) {
145 spin_unlock(&dev_data->lock);
146 err = publish_root_cb(pdev, domain, bus);
147 if (err)
148 break;
149 spin_lock(&dev_data->lock);
150 }
151 }
152
153 if (!err)
154 spin_unlock(&dev_data->lock);
155
156 return err;
157}
158
159static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
160{
161 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
162 struct pci_dev_entry *dev_entry, *t;
163
164 list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
165 list_del(&dev_entry->list);
166 pcistub_put_pci_dev(dev_entry->dev);
167 kfree(dev_entry);
168 }
169
170 kfree(dev_data);
171 pdev->pci_dev_data = NULL;
172}
173
174static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
175 struct xen_pcibk_device *pdev,
176 unsigned int *domain, unsigned int *bus,
177 unsigned int *devfn)
178{
179 *domain = pci_domain_nr(pcidev->bus);
180 *bus = pcidev->bus->number;
181 *devfn = pcidev->devfn;
182 return 1;
183}
184
185struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
186 .name = "passthrough",
187 .init = __xen_pcibk_init_devices,
188 .free = __xen_pcibk_release_devices,
189 .find = __xen_pcibk_get_pcifront_dev,
190 .publish = __xen_pcibk_publish_pci_roots,
191 .release = __xen_pcibk_release_pci_dev,
192 .add = __xen_pcibk_add_pci_dev,
193 .get = __xen_pcibk_get_pci_dev,
194};
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
new file mode 100644
index 000000000000..aec214ac0a14
--- /dev/null
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -0,0 +1,1376 @@
1/*
2 * PCI Stub Driver - Grabs devices in backend to be exported later
3 *
4 * Ryan Wilson <hap9@epoch.ncsc.mil>
5 * Chris Bookholt <hap10@epoch.ncsc.mil>
6 */
7#include <linux/module.h>
8#include <linux/init.h>
9#include <linux/rwsem.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/kref.h>
13#include <linux/pci.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/atomic.h>
17#include <xen/events.h>
18#include <asm/xen/pci.h>
19#include <asm/xen/hypervisor.h>
20#include "pciback.h"
21#include "conf_space.h"
22#include "conf_space_quirks.h"
23
24#define DRV_NAME "xen-pciback"
25
26static char *pci_devs_to_hide;
27wait_queue_head_t xen_pcibk_aer_wait_queue;
28/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
29* We want to avoid in middle of AER ops, xen_pcibk devices is being removed
30*/
31static DECLARE_RWSEM(pcistub_sem);
32module_param_named(hide, pci_devs_to_hide, charp, 0444);
33
34struct pcistub_device_id {
35 struct list_head slot_list;
36 int domain;
37 unsigned char bus;
38 unsigned int devfn;
39};
40static LIST_HEAD(pcistub_device_ids);
41static DEFINE_SPINLOCK(device_ids_lock);
42
43struct pcistub_device {
44 struct kref kref;
45 struct list_head dev_list;
46 spinlock_t lock;
47
48 struct pci_dev *dev;
49 struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
50};
51
52/* Access to pcistub_devices & seized_devices lists and the initialize_devices
53 * flag must be locked with pcistub_devices_lock
54 */
55static DEFINE_SPINLOCK(pcistub_devices_lock);
56static LIST_HEAD(pcistub_devices);
57
58/* wait for device_initcall before initializing our devices
59 * (see pcistub_init_devices_late)
60 */
61static int initialize_devices;
62static LIST_HEAD(seized_devices);
63
64static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
65{
66 struct pcistub_device *psdev;
67
68 dev_dbg(&dev->dev, "pcistub_device_alloc\n");
69
70 psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
71 if (!psdev)
72 return NULL;
73
74 psdev->dev = pci_dev_get(dev);
75 if (!psdev->dev) {
76 kfree(psdev);
77 return NULL;
78 }
79
80 kref_init(&psdev->kref);
81 spin_lock_init(&psdev->lock);
82
83 return psdev;
84}
85
86/* Don't call this directly as it's called by pcistub_device_put */
87static void pcistub_device_release(struct kref *kref)
88{
89 struct pcistub_device *psdev;
90
91 psdev = container_of(kref, struct pcistub_device, kref);
92
93 dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
94
95 xen_unregister_device_domain_owner(psdev->dev);
96
97 /* Clean-up the device */
98 xen_pcibk_reset_device(psdev->dev);
99 xen_pcibk_config_free_dyn_fields(psdev->dev);
100 xen_pcibk_config_free_dev(psdev->dev);
101 kfree(pci_get_drvdata(psdev->dev));
102 pci_set_drvdata(psdev->dev, NULL);
103
104 pci_dev_put(psdev->dev);
105
106 kfree(psdev);
107}
108
109static inline void pcistub_device_get(struct pcistub_device *psdev)
110{
111 kref_get(&psdev->kref);
112}
113
114static inline void pcistub_device_put(struct pcistub_device *psdev)
115{
116 kref_put(&psdev->kref, pcistub_device_release);
117}
118
119static struct pcistub_device *pcistub_device_find(int domain, int bus,
120 int slot, int func)
121{
122 struct pcistub_device *psdev = NULL;
123 unsigned long flags;
124
125 spin_lock_irqsave(&pcistub_devices_lock, flags);
126
127 list_for_each_entry(psdev, &pcistub_devices, dev_list) {
128 if (psdev->dev != NULL
129 && domain == pci_domain_nr(psdev->dev->bus)
130 && bus == psdev->dev->bus->number
131 && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
132 pcistub_device_get(psdev);
133 goto out;
134 }
135 }
136
137 /* didn't find it */
138 psdev = NULL;
139
140out:
141 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
142 return psdev;
143}
144
145static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev,
146 struct pcistub_device *psdev)
147{
148 struct pci_dev *pci_dev = NULL;
149 unsigned long flags;
150
151 pcistub_device_get(psdev);
152
153 spin_lock_irqsave(&psdev->lock, flags);
154 if (!psdev->pdev) {
155 psdev->pdev = pdev;
156 pci_dev = psdev->dev;
157 }
158 spin_unlock_irqrestore(&psdev->lock, flags);
159
160 if (!pci_dev)
161 pcistub_device_put(psdev);
162
163 return pci_dev;
164}
165
166struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
167 int domain, int bus,
168 int slot, int func)
169{
170 struct pcistub_device *psdev;
171 struct pci_dev *found_dev = NULL;
172 unsigned long flags;
173
174 spin_lock_irqsave(&pcistub_devices_lock, flags);
175
176 list_for_each_entry(psdev, &pcistub_devices, dev_list) {
177 if (psdev->dev != NULL
178 && domain == pci_domain_nr(psdev->dev->bus)
179 && bus == psdev->dev->bus->number
180 && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
181 found_dev = pcistub_device_get_pci_dev(pdev, psdev);
182 break;
183 }
184 }
185
186 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
187 return found_dev;
188}
189
190struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
191 struct pci_dev *dev)
192{
193 struct pcistub_device *psdev;
194 struct pci_dev *found_dev = NULL;
195 unsigned long flags;
196
197 spin_lock_irqsave(&pcistub_devices_lock, flags);
198
199 list_for_each_entry(psdev, &pcistub_devices, dev_list) {
200 if (psdev->dev == dev) {
201 found_dev = pcistub_device_get_pci_dev(pdev, psdev);
202 break;
203 }
204 }
205
206 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
207 return found_dev;
208}
209
210void pcistub_put_pci_dev(struct pci_dev *dev)
211{
212 struct pcistub_device *psdev, *found_psdev = NULL;
213 unsigned long flags;
214
215 spin_lock_irqsave(&pcistub_devices_lock, flags);
216
217 list_for_each_entry(psdev, &pcistub_devices, dev_list) {
218 if (psdev->dev == dev) {
219 found_psdev = psdev;
220 break;
221 }
222 }
223
224 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
225
226 /*hold this lock for avoiding breaking link between
227 * pcistub and xen_pcibk when AER is in processing
228 */
229 down_write(&pcistub_sem);
230 /* Cleanup our device
231 * (so it's ready for the next domain)
232 */
233 xen_pcibk_reset_device(found_psdev->dev);
234 xen_pcibk_config_free_dyn_fields(found_psdev->dev);
235 xen_pcibk_config_reset_dev(found_psdev->dev);
236
237 spin_lock_irqsave(&found_psdev->lock, flags);
238 found_psdev->pdev = NULL;
239 spin_unlock_irqrestore(&found_psdev->lock, flags);
240
241 pcistub_device_put(found_psdev);
242 up_write(&pcistub_sem);
243}
244
245static int __devinit pcistub_match_one(struct pci_dev *dev,
246 struct pcistub_device_id *pdev_id)
247{
248 /* Match the specified device by domain, bus, slot, func and also if
249 * any of the device's parent bridges match.
250 */
251 for (; dev != NULL; dev = dev->bus->self) {
252 if (pci_domain_nr(dev->bus) == pdev_id->domain
253 && dev->bus->number == pdev_id->bus
254 && dev->devfn == pdev_id->devfn)
255 return 1;
256
257 /* Sometimes topmost bridge links to itself. */
258 if (dev == dev->bus->self)
259 break;
260 }
261
262 return 0;
263}
264
265static int __devinit pcistub_match(struct pci_dev *dev)
266{
267 struct pcistub_device_id *pdev_id;
268 unsigned long flags;
269 int found = 0;
270
271 spin_lock_irqsave(&device_ids_lock, flags);
272 list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
273 if (pcistub_match_one(dev, pdev_id)) {
274 found = 1;
275 break;
276 }
277 }
278 spin_unlock_irqrestore(&device_ids_lock, flags);
279
280 return found;
281}
282
283static int __devinit pcistub_init_device(struct pci_dev *dev)
284{
285 struct xen_pcibk_dev_data *dev_data;
286 int err = 0;
287
288 dev_dbg(&dev->dev, "initializing...\n");
289
290 /* The PCI backend is not intended to be a module (or to work with
291 * removable PCI devices (yet). If it were, xen_pcibk_config_free()
292 * would need to be called somewhere to free the memory allocated
293 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
294 */
295 dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]")
296 + strlen(pci_name(dev)) + 1, GFP_ATOMIC);
297 if (!dev_data) {
298 err = -ENOMEM;
299 goto out;
300 }
301 pci_set_drvdata(dev, dev_data);
302
303 /*
304 * Setup name for fake IRQ handler. It will only be enabled
305 * once the device is turned on by the guest.
306 */
307 sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
308
309 dev_dbg(&dev->dev, "initializing config\n");
310
311 init_waitqueue_head(&xen_pcibk_aer_wait_queue);
312 err = xen_pcibk_config_init_dev(dev);
313 if (err)
314 goto out;
315
316 /* HACK: Force device (& ACPI) to determine what IRQ it's on - we
317 * must do this here because pcibios_enable_device may specify
318 * the pci device's true irq (and possibly its other resources)
319 * if they differ from what's in the configuration space.
320 * This makes the assumption that the device's resources won't
321 * change after this point (otherwise this code may break!)
322 */
323 dev_dbg(&dev->dev, "enabling device\n");
324 err = pci_enable_device(dev);
325 if (err)
326 goto config_release;
327
328 /* Now disable the device (this also ensures some private device
329 * data is setup before we export)
330 */
331 dev_dbg(&dev->dev, "reset device\n");
332 xen_pcibk_reset_device(dev);
333
334 return 0;
335
336config_release:
337 xen_pcibk_config_free_dev(dev);
338
339out:
340 pci_set_drvdata(dev, NULL);
341 kfree(dev_data);
342 return err;
343}
344
345/*
346 * Because some initialization still happens on
347 * devices during fs_initcall, we need to defer
348 * full initialization of our devices until
349 * device_initcall.
350 */
351static int __init pcistub_init_devices_late(void)
352{
353 struct pcistub_device *psdev;
354 unsigned long flags;
355 int err = 0;
356
357 pr_debug(DRV_NAME ": pcistub_init_devices_late\n");
358
359 spin_lock_irqsave(&pcistub_devices_lock, flags);
360
361 while (!list_empty(&seized_devices)) {
362 psdev = container_of(seized_devices.next,
363 struct pcistub_device, dev_list);
364 list_del(&psdev->dev_list);
365
366 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
367
368 err = pcistub_init_device(psdev->dev);
369 if (err) {
370 dev_err(&psdev->dev->dev,
371 "error %d initializing device\n", err);
372 kfree(psdev);
373 psdev = NULL;
374 }
375
376 spin_lock_irqsave(&pcistub_devices_lock, flags);
377
378 if (psdev)
379 list_add_tail(&psdev->dev_list, &pcistub_devices);
380 }
381
382 initialize_devices = 1;
383
384 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
385
386 return 0;
387}
388
389static int __devinit pcistub_seize(struct pci_dev *dev)
390{
391 struct pcistub_device *psdev;
392 unsigned long flags;
393 int err = 0;
394
395 psdev = pcistub_device_alloc(dev);
396 if (!psdev)
397 return -ENOMEM;
398
399 spin_lock_irqsave(&pcistub_devices_lock, flags);
400
401 if (initialize_devices) {
402 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
403
404 /* don't want irqs disabled when calling pcistub_init_device */
405 err = pcistub_init_device(psdev->dev);
406
407 spin_lock_irqsave(&pcistub_devices_lock, flags);
408
409 if (!err)
410 list_add(&psdev->dev_list, &pcistub_devices);
411 } else {
412 dev_dbg(&dev->dev, "deferring initialization\n");
413 list_add(&psdev->dev_list, &seized_devices);
414 }
415
416 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
417
418 if (err)
419 pcistub_device_put(psdev);
420
421 return err;
422}
423
424static int __devinit pcistub_probe(struct pci_dev *dev,
425 const struct pci_device_id *id)
426{
427 int err = 0;
428
429 dev_dbg(&dev->dev, "probing...\n");
430
431 if (pcistub_match(dev)) {
432
433 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
434 && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
435 dev_err(&dev->dev, "can't export pci devices that "
436 "don't have a normal (0) or bridge (1) "
437 "header type!\n");
438 err = -ENODEV;
439 goto out;
440 }
441
442 dev_info(&dev->dev, "seizing device\n");
443 err = pcistub_seize(dev);
444 } else
445 /* Didn't find the device */
446 err = -ENODEV;
447
448out:
449 return err;
450}
451
452static void pcistub_remove(struct pci_dev *dev)
453{
454 struct pcistub_device *psdev, *found_psdev = NULL;
455 unsigned long flags;
456
457 dev_dbg(&dev->dev, "removing\n");
458
459 spin_lock_irqsave(&pcistub_devices_lock, flags);
460
461 xen_pcibk_config_quirk_release(dev);
462
463 list_for_each_entry(psdev, &pcistub_devices, dev_list) {
464 if (psdev->dev == dev) {
465 found_psdev = psdev;
466 break;
467 }
468 }
469
470 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
471
472 if (found_psdev) {
473 dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
474 found_psdev->pdev);
475
476 if (found_psdev->pdev) {
477 printk(KERN_WARNING DRV_NAME ": ****** removing device "
478 "%s while still in-use! ******\n",
479 pci_name(found_psdev->dev));
480 printk(KERN_WARNING DRV_NAME ": ****** driver domain may"
481 " still access this device's i/o resources!\n");
482 printk(KERN_WARNING DRV_NAME ": ****** shutdown driver "
483 "domain before binding device\n");
484 printk(KERN_WARNING DRV_NAME ": ****** to other drivers "
485 "or domains\n");
486
487 xen_pcibk_release_pci_dev(found_psdev->pdev,
488 found_psdev->dev);
489 }
490
491 spin_lock_irqsave(&pcistub_devices_lock, flags);
492 list_del(&found_psdev->dev_list);
493 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
494
495 /* the final put for releasing from the list */
496 pcistub_device_put(found_psdev);
497 }
498}
499
500static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = {
501 {
502 .vendor = PCI_ANY_ID,
503 .device = PCI_ANY_ID,
504 .subvendor = PCI_ANY_ID,
505 .subdevice = PCI_ANY_ID,
506 },
507 {0,},
508};
509
510#define PCI_NODENAME_MAX 40
511static void kill_domain_by_device(struct pcistub_device *psdev)
512{
513 struct xenbus_transaction xbt;
514 int err;
515 char nodename[PCI_NODENAME_MAX];
516
517 if (!psdev)
518 dev_err(&psdev->dev->dev,
519 "device is NULL when do AER recovery/kill_domain\n");
520 snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
521 psdev->pdev->xdev->otherend_id);
522 nodename[strlen(nodename)] = '\0';
523
524again:
525 err = xenbus_transaction_start(&xbt);
526 if (err) {
527 dev_err(&psdev->dev->dev,
528 "error %d when start xenbus transaction\n", err);
529 return;
530 }
531 /*PV AER handlers will set this flag*/
532 xenbus_printf(xbt, nodename, "aerState" , "aerfail");
533 err = xenbus_transaction_end(xbt, 0);
534 if (err) {
535 if (err == -EAGAIN)
536 goto again;
537 dev_err(&psdev->dev->dev,
538 "error %d when end xenbus transaction\n", err);
539 return;
540 }
541}
542
543/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
544 * backend need to have cooperation. In xen_pcibk, those steps will do similar
545 * jobs: send service request and waiting for front_end response.
546*/
547static pci_ers_result_t common_process(struct pcistub_device *psdev,
548 pci_channel_state_t state, int aer_cmd,
549 pci_ers_result_t result)
550{
551 pci_ers_result_t res = result;
552 struct xen_pcie_aer_op *aer_op;
553 int ret;
554
555 /*with PV AER drivers*/
556 aer_op = &(psdev->pdev->sh_info->aer_op);
557 aer_op->cmd = aer_cmd ;
558 /*useful for error_detected callback*/
559 aer_op->err = state;
560 /*pcifront_end BDF*/
561 ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev,
562 &aer_op->domain, &aer_op->bus, &aer_op->devfn);
563 if (!ret) {
564 dev_err(&psdev->dev->dev,
565 DRV_NAME ": failed to get pcifront device\n");
566 return PCI_ERS_RESULT_NONE;
567 }
568 wmb();
569
570 dev_dbg(&psdev->dev->dev,
571 DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n",
572 aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
573 /*local flag to mark there's aer request, xen_pcibk callback will use
574 * this flag to judge whether we need to check pci-front give aer
575 * service ack signal
576 */
577 set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
578
579 /*It is possible that a pcifront conf_read_write ops request invokes
580 * the callback which cause the spurious execution of wake_up.
581 * Yet it is harmless and better than a spinlock here
582 */
583 set_bit(_XEN_PCIB_active,
584 (unsigned long *)&psdev->pdev->sh_info->flags);
585 wmb();
586 notify_remote_via_irq(psdev->pdev->evtchn_irq);
587
588 ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
589 !(test_bit(_XEN_PCIB_active, (unsigned long *)
590 &psdev->pdev->sh_info->flags)), 300*HZ);
591
592 if (!ret) {
593 if (test_bit(_XEN_PCIB_active,
594 (unsigned long *)&psdev->pdev->sh_info->flags)) {
595 dev_err(&psdev->dev->dev,
596 "pcifront aer process not responding!\n");
597 clear_bit(_XEN_PCIB_active,
598 (unsigned long *)&psdev->pdev->sh_info->flags);
599 aer_op->err = PCI_ERS_RESULT_NONE;
600 return res;
601 }
602 }
603 clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
604
605 if (test_bit(_XEN_PCIF_active,
606 (unsigned long *)&psdev->pdev->sh_info->flags)) {
607 dev_dbg(&psdev->dev->dev,
608 "schedule pci_conf service in xen_pcibk\n");
609 xen_pcibk_test_and_schedule_op(psdev->pdev);
610 }
611
612 res = (pci_ers_result_t)aer_op->err;
613 return res;
614}
615
616/*
617* xen_pcibk_slot_reset: it will send the slot_reset request to pcifront in case
618* of the device driver could provide this service, and then wait for pcifront
619* ack.
620* @dev: pointer to PCI devices
621* return value is used by aer_core do_recovery policy
622*/
623static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
624{
625 struct pcistub_device *psdev;
626 pci_ers_result_t result;
627
628 result = PCI_ERS_RESULT_RECOVERED;
629 dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n",
630 dev->bus->number, dev->devfn);
631
632 down_write(&pcistub_sem);
633 psdev = pcistub_device_find(pci_domain_nr(dev->bus),
634 dev->bus->number,
635 PCI_SLOT(dev->devfn),
636 PCI_FUNC(dev->devfn));
637
638 if (!psdev || !psdev->pdev) {
639 dev_err(&dev->dev,
640 DRV_NAME " device is not found/assigned\n");
641 goto end;
642 }
643
644 if (!psdev->pdev->sh_info) {
645 dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
646 " by HVM, kill it\n");
647 kill_domain_by_device(psdev);
648 goto release;
649 }
650
651 if (!test_bit(_XEN_PCIB_AERHANDLER,
652 (unsigned long *)&psdev->pdev->sh_info->flags)) {
653 dev_err(&dev->dev,
654 "guest with no AER driver should have been killed\n");
655 goto release;
656 }
657 result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
658
659 if (result == PCI_ERS_RESULT_NONE ||
660 result == PCI_ERS_RESULT_DISCONNECT) {
661 dev_dbg(&dev->dev,
662 "No AER slot_reset service or disconnected!\n");
663 kill_domain_by_device(psdev);
664 }
665release:
666 pcistub_device_put(psdev);
667end:
668 up_write(&pcistub_sem);
669 return result;
670
671}
672
673
674/*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to pcifront
675* in case of the device driver could provide this service, and then wait
676* for pcifront ack
677* @dev: pointer to PCI devices
678* return value is used by aer_core do_recovery policy
679*/
680
681static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
682{
683 struct pcistub_device *psdev;
684 pci_ers_result_t result;
685
686 result = PCI_ERS_RESULT_RECOVERED;
687 dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n",
688 dev->bus->number, dev->devfn);
689
690 down_write(&pcistub_sem);
691 psdev = pcistub_device_find(pci_domain_nr(dev->bus),
692 dev->bus->number,
693 PCI_SLOT(dev->devfn),
694 PCI_FUNC(dev->devfn));
695
696 if (!psdev || !psdev->pdev) {
697 dev_err(&dev->dev,
698 DRV_NAME " device is not found/assigned\n");
699 goto end;
700 }
701
702 if (!psdev->pdev->sh_info) {
703 dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
704 " by HVM, kill it\n");
705 kill_domain_by_device(psdev);
706 goto release;
707 }
708
709 if (!test_bit(_XEN_PCIB_AERHANDLER,
710 (unsigned long *)&psdev->pdev->sh_info->flags)) {
711 dev_err(&dev->dev,
712 "guest with no AER driver should have been killed\n");
713 goto release;
714 }
715 result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
716
717 if (result == PCI_ERS_RESULT_NONE ||
718 result == PCI_ERS_RESULT_DISCONNECT) {
719 dev_dbg(&dev->dev,
720 "No AER mmio_enabled service or disconnected!\n");
721 kill_domain_by_device(psdev);
722 }
723release:
724 pcistub_device_put(psdev);
725end:
726 up_write(&pcistub_sem);
727 return result;
728}
729
730/*xen_pcibk_error_detected: it will send the error_detected request to pcifront
731* in case of the device driver could provide this service, and then wait
732* for pcifront ack.
733* @dev: pointer to PCI devices
734* @error: the current PCI connection state
735* return value is used by aer_core do_recovery policy
736*/
737
738static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
739 pci_channel_state_t error)
740{
741 struct pcistub_device *psdev;
742 pci_ers_result_t result;
743
744 result = PCI_ERS_RESULT_CAN_RECOVER;
745 dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n",
746 dev->bus->number, dev->devfn);
747
748 down_write(&pcistub_sem);
749 psdev = pcistub_device_find(pci_domain_nr(dev->bus),
750 dev->bus->number,
751 PCI_SLOT(dev->devfn),
752 PCI_FUNC(dev->devfn));
753
754 if (!psdev || !psdev->pdev) {
755 dev_err(&dev->dev,
756 DRV_NAME " device is not found/assigned\n");
757 goto end;
758 }
759
760 if (!psdev->pdev->sh_info) {
761 dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
762 " by HVM, kill it\n");
763 kill_domain_by_device(psdev);
764 goto release;
765 }
766
767 /*Guest owns the device yet no aer handler regiested, kill guest*/
768 if (!test_bit(_XEN_PCIB_AERHANDLER,
769 (unsigned long *)&psdev->pdev->sh_info->flags)) {
770 dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
771 kill_domain_by_device(psdev);
772 goto release;
773 }
774 result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
775
776 if (result == PCI_ERS_RESULT_NONE ||
777 result == PCI_ERS_RESULT_DISCONNECT) {
778 dev_dbg(&dev->dev,
779 "No AER error_detected service or disconnected!\n");
780 kill_domain_by_device(psdev);
781 }
782release:
783 pcistub_device_put(psdev);
784end:
785 up_write(&pcistub_sem);
786 return result;
787}
788
789/*xen_pcibk_error_resume: it will send the error_resume request to pcifront
790* in case of the device driver could provide this service, and then wait
791* for pcifront ack.
792* @dev: pointer to PCI devices
793*/
794
795static void xen_pcibk_error_resume(struct pci_dev *dev)
796{
797 struct pcistub_device *psdev;
798
799 dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n",
800 dev->bus->number, dev->devfn);
801
802 down_write(&pcistub_sem);
803 psdev = pcistub_device_find(pci_domain_nr(dev->bus),
804 dev->bus->number,
805 PCI_SLOT(dev->devfn),
806 PCI_FUNC(dev->devfn));
807
808 if (!psdev || !psdev->pdev) {
809 dev_err(&dev->dev,
810 DRV_NAME " device is not found/assigned\n");
811 goto end;
812 }
813
814 if (!psdev->pdev->sh_info) {
815 dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
816 " by HVM, kill it\n");
817 kill_domain_by_device(psdev);
818 goto release;
819 }
820
821 if (!test_bit(_XEN_PCIB_AERHANDLER,
822 (unsigned long *)&psdev->pdev->sh_info->flags)) {
823 dev_err(&dev->dev,
824 "guest with no AER driver should have been killed\n");
825 kill_domain_by_device(psdev);
826 goto release;
827 }
828 common_process(psdev, 1, XEN_PCI_OP_aer_resume,
829 PCI_ERS_RESULT_RECOVERED);
830release:
831 pcistub_device_put(psdev);
832end:
833 up_write(&pcistub_sem);
834 return;
835}
836
837/*add xen_pcibk AER handling*/
838static struct pci_error_handlers xen_pcibk_error_handler = {
839 .error_detected = xen_pcibk_error_detected,
840 .mmio_enabled = xen_pcibk_mmio_enabled,
841 .slot_reset = xen_pcibk_slot_reset,
842 .resume = xen_pcibk_error_resume,
843};
844
845/*
846 * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
847 * for a normal device. I don't want it to be loaded automatically.
848 */
849
850static struct pci_driver xen_pcibk_pci_driver = {
851 /* The name should be xen_pciback, but until the tools are updated
852 * we will keep it as pciback. */
853 .name = "pciback",
854 .id_table = pcistub_ids,
855 .probe = pcistub_probe,
856 .remove = pcistub_remove,
857 .err_handler = &xen_pcibk_error_handler,
858};
859
860static inline int str_to_slot(const char *buf, int *domain, int *bus,
861 int *slot, int *func)
862{
863 int err;
864
865 err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
866 if (err == 4)
867 return 0;
868 else if (err < 0)
869 return -EINVAL;
870
871 /* try again without domain */
872 *domain = 0;
873 err = sscanf(buf, " %x:%x.%x", bus, slot, func);
874 if (err == 3)
875 return 0;
876
877 return -EINVAL;
878}
879
880static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
881 *slot, int *func, int *reg, int *size, int *mask)
882{
883 int err;
884
885 err =
886 sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
887 func, reg, size, mask);
888 if (err == 7)
889 return 0;
890 return -EINVAL;
891}
892
893static int pcistub_device_id_add(int domain, int bus, int slot, int func)
894{
895 struct pcistub_device_id *pci_dev_id;
896 unsigned long flags;
897
898 pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
899 if (!pci_dev_id)
900 return -ENOMEM;
901
902 pci_dev_id->domain = domain;
903 pci_dev_id->bus = bus;
904 pci_dev_id->devfn = PCI_DEVFN(slot, func);
905
906 pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%01x\n",
907 domain, bus, slot, func);
908
909 spin_lock_irqsave(&device_ids_lock, flags);
910 list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
911 spin_unlock_irqrestore(&device_ids_lock, flags);
912
913 return 0;
914}
915
916static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
917{
918 struct pcistub_device_id *pci_dev_id, *t;
919 int devfn = PCI_DEVFN(slot, func);
920 int err = -ENOENT;
921 unsigned long flags;
922
923 spin_lock_irqsave(&device_ids_lock, flags);
924 list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
925 slot_list) {
926 if (pci_dev_id->domain == domain
927 && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
928 /* Don't break; here because it's possible the same
929 * slot could be in the list more than once
930 */
931 list_del(&pci_dev_id->slot_list);
932 kfree(pci_dev_id);
933
934 err = 0;
935
936 pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%01x from "
937 "seize list\n", domain, bus, slot, func);
938 }
939 }
940 spin_unlock_irqrestore(&device_ids_lock, flags);
941
942 return err;
943}
944
945static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
946 int size, int mask)
947{
948 int err = 0;
949 struct pcistub_device *psdev;
950 struct pci_dev *dev;
951 struct config_field *field;
952
953 psdev = pcistub_device_find(domain, bus, slot, func);
954 if (!psdev || !psdev->dev) {
955 err = -ENODEV;
956 goto out;
957 }
958 dev = psdev->dev;
959
960 field = kzalloc(sizeof(*field), GFP_ATOMIC);
961 if (!field) {
962 err = -ENOMEM;
963 goto out;
964 }
965
966 field->offset = reg;
967 field->size = size;
968 field->mask = mask;
969 field->init = NULL;
970 field->reset = NULL;
971 field->release = NULL;
972 field->clean = xen_pcibk_config_field_free;
973
974 err = xen_pcibk_config_quirks_add_field(dev, field);
975 if (err)
976 kfree(field);
977out:
978 return err;
979}
980
981static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
982 size_t count)
983{
984 int domain, bus, slot, func;
985 int err;
986
987 err = str_to_slot(buf, &domain, &bus, &slot, &func);
988 if (err)
989 goto out;
990
991 err = pcistub_device_id_add(domain, bus, slot, func);
992
993out:
994 if (!err)
995 err = count;
996 return err;
997}
998
999DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
1000
1001static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
1002 size_t count)
1003{
1004 int domain, bus, slot, func;
1005 int err;
1006
1007 err = str_to_slot(buf, &domain, &bus, &slot, &func);
1008 if (err)
1009 goto out;
1010
1011 err = pcistub_device_id_remove(domain, bus, slot, func);
1012
1013out:
1014 if (!err)
1015 err = count;
1016 return err;
1017}
1018
1019DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
1020
1021static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
1022{
1023 struct pcistub_device_id *pci_dev_id;
1024 size_t count = 0;
1025 unsigned long flags;
1026
1027 spin_lock_irqsave(&device_ids_lock, flags);
1028 list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
1029 if (count >= PAGE_SIZE)
1030 break;
1031
1032 count += scnprintf(buf + count, PAGE_SIZE - count,
1033 "%04x:%02x:%02x.%01x\n",
1034 pci_dev_id->domain, pci_dev_id->bus,
1035 PCI_SLOT(pci_dev_id->devfn),
1036 PCI_FUNC(pci_dev_id->devfn));
1037 }
1038 spin_unlock_irqrestore(&device_ids_lock, flags);
1039
1040 return count;
1041}
1042
1043DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
1044
1045static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
1046{
1047 struct pcistub_device *psdev;
1048 struct xen_pcibk_dev_data *dev_data;
1049 size_t count = 0;
1050 unsigned long flags;
1051
1052 spin_lock_irqsave(&pcistub_devices_lock, flags);
1053 list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1054 if (count >= PAGE_SIZE)
1055 break;
1056 if (!psdev->dev)
1057 continue;
1058 dev_data = pci_get_drvdata(psdev->dev);
1059 if (!dev_data)
1060 continue;
1061 count +=
1062 scnprintf(buf + count, PAGE_SIZE - count,
1063 "%s:%s:%sing:%ld\n",
1064 pci_name(psdev->dev),
1065 dev_data->isr_on ? "on" : "off",
1066 dev_data->ack_intr ? "ack" : "not ack",
1067 dev_data->handled);
1068 }
1069 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1070 return count;
1071}
1072
1073DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
1074
1075static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
1076 const char *buf,
1077 size_t count)
1078{
1079 struct pcistub_device *psdev;
1080 struct xen_pcibk_dev_data *dev_data;
1081 int domain, bus, slot, func;
1082 int err = -ENOENT;
1083
1084 err = str_to_slot(buf, &domain, &bus, &slot, &func);
1085 if (err)
1086 goto out;
1087
1088 psdev = pcistub_device_find(domain, bus, slot, func);
1089
1090 if (!psdev)
1091 goto out;
1092
1093 dev_data = pci_get_drvdata(psdev->dev);
1094 if (!dev_data)
1095 goto out;
1096
1097 dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
1098 dev_data->irq_name, dev_data->isr_on,
1099 !dev_data->isr_on);
1100
1101 dev_data->isr_on = !(dev_data->isr_on);
1102 if (dev_data->isr_on)
1103 dev_data->ack_intr = 1;
1104out:
1105 if (!err)
1106 err = count;
1107 return err;
1108}
1109DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
1110
1111static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
1112 size_t count)
1113{
1114 int domain, bus, slot, func, reg, size, mask;
1115 int err;
1116
1117 err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
1118 &mask);
1119 if (err)
1120 goto out;
1121
1122 err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
1123
1124out:
1125 if (!err)
1126 err = count;
1127 return err;
1128}
1129
1130static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
1131{
1132 int count = 0;
1133 unsigned long flags;
1134 struct xen_pcibk_config_quirk *quirk;
1135 struct xen_pcibk_dev_data *dev_data;
1136 const struct config_field *field;
1137 const struct config_field_entry *cfg_entry;
1138
1139 spin_lock_irqsave(&device_ids_lock, flags);
1140 list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) {
1141 if (count >= PAGE_SIZE)
1142 goto out;
1143
1144 count += scnprintf(buf + count, PAGE_SIZE - count,
1145 "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
1146 quirk->pdev->bus->number,
1147 PCI_SLOT(quirk->pdev->devfn),
1148 PCI_FUNC(quirk->pdev->devfn),
1149 quirk->devid.vendor, quirk->devid.device,
1150 quirk->devid.subvendor,
1151 quirk->devid.subdevice);
1152
1153 dev_data = pci_get_drvdata(quirk->pdev);
1154
1155 list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
1156 field = cfg_entry->field;
1157 if (count >= PAGE_SIZE)
1158 goto out;
1159
1160 count += scnprintf(buf + count, PAGE_SIZE - count,
1161 "\t\t%08x:%01x:%08x\n",
1162 cfg_entry->base_offset +
1163 field->offset, field->size,
1164 field->mask);
1165 }
1166 }
1167
1168out:
1169 spin_unlock_irqrestore(&device_ids_lock, flags);
1170
1171 return count;
1172}
1173
1174DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
1175
1176static ssize_t permissive_add(struct device_driver *drv, const char *buf,
1177 size_t count)
1178{
1179 int domain, bus, slot, func;
1180 int err;
1181 struct pcistub_device *psdev;
1182 struct xen_pcibk_dev_data *dev_data;
1183 err = str_to_slot(buf, &domain, &bus, &slot, &func);
1184 if (err)
1185 goto out;
1186 psdev = pcistub_device_find(domain, bus, slot, func);
1187 if (!psdev) {
1188 err = -ENODEV;
1189 goto out;
1190 }
1191 if (!psdev->dev) {
1192 err = -ENODEV;
1193 goto release;
1194 }
1195 dev_data = pci_get_drvdata(psdev->dev);
1196 /* the driver data for a device should never be null at this point */
1197 if (!dev_data) {
1198 err = -ENXIO;
1199 goto release;
1200 }
1201 if (!dev_data->permissive) {
1202 dev_data->permissive = 1;
1203 /* Let user know that what they're doing could be unsafe */
1204 dev_warn(&psdev->dev->dev, "enabling permissive mode "
1205 "configuration space accesses!\n");
1206 dev_warn(&psdev->dev->dev,
1207 "permissive mode is potentially unsafe!\n");
1208 }
1209release:
1210 pcistub_device_put(psdev);
1211out:
1212 if (!err)
1213 err = count;
1214 return err;
1215}
1216
1217static ssize_t permissive_show(struct device_driver *drv, char *buf)
1218{
1219 struct pcistub_device *psdev;
1220 struct xen_pcibk_dev_data *dev_data;
1221 size_t count = 0;
1222 unsigned long flags;
1223 spin_lock_irqsave(&pcistub_devices_lock, flags);
1224 list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1225 if (count >= PAGE_SIZE)
1226 break;
1227 if (!psdev->dev)
1228 continue;
1229 dev_data = pci_get_drvdata(psdev->dev);
1230 if (!dev_data || !dev_data->permissive)
1231 continue;
1232 count +=
1233 scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
1234 pci_name(psdev->dev));
1235 }
1236 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1237 return count;
1238}
1239
1240DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
1241
1242static void pcistub_exit(void)
1243{
1244 driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot);
1245 driver_remove_file(&xen_pcibk_pci_driver.driver,
1246 &driver_attr_remove_slot);
1247 driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots);
1248 driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks);
1249 driver_remove_file(&xen_pcibk_pci_driver.driver,
1250 &driver_attr_permissive);
1251 driver_remove_file(&xen_pcibk_pci_driver.driver,
1252 &driver_attr_irq_handlers);
1253 driver_remove_file(&xen_pcibk_pci_driver.driver,
1254 &driver_attr_irq_handler_state);
1255 pci_unregister_driver(&xen_pcibk_pci_driver);
1256}
1257
1258static int __init pcistub_init(void)
1259{
1260 int pos = 0;
1261 int err = 0;
1262 int domain, bus, slot, func;
1263 int parsed;
1264
1265 if (pci_devs_to_hide && *pci_devs_to_hide) {
1266 do {
1267 parsed = 0;
1268
1269 err = sscanf(pci_devs_to_hide + pos,
1270 " (%x:%x:%x.%x) %n",
1271 &domain, &bus, &slot, &func, &parsed);
1272 if (err != 4) {
1273 domain = 0;
1274 err = sscanf(pci_devs_to_hide + pos,
1275 " (%x:%x.%x) %n",
1276 &bus, &slot, &func, &parsed);
1277 if (err != 3)
1278 goto parse_error;
1279 }
1280
1281 err = pcistub_device_id_add(domain, bus, slot, func);
1282 if (err)
1283 goto out;
1284
1285 /* if parsed<=0, we've reached the end of the string */
1286 pos += parsed;
1287 } while (parsed > 0 && pci_devs_to_hide[pos]);
1288 }
1289
1290 /* If we're the first PCI Device Driver to register, we're the
1291 * first one to get offered PCI devices as they become
1292 * available (and thus we can be the first to grab them)
1293 */
1294 err = pci_register_driver(&xen_pcibk_pci_driver);
1295 if (err < 0)
1296 goto out;
1297
1298 err = driver_create_file(&xen_pcibk_pci_driver.driver,
1299 &driver_attr_new_slot);
1300 if (!err)
1301 err = driver_create_file(&xen_pcibk_pci_driver.driver,
1302 &driver_attr_remove_slot);
1303 if (!err)
1304 err = driver_create_file(&xen_pcibk_pci_driver.driver,
1305 &driver_attr_slots);
1306 if (!err)
1307 err = driver_create_file(&xen_pcibk_pci_driver.driver,
1308 &driver_attr_quirks);
1309 if (!err)
1310 err = driver_create_file(&xen_pcibk_pci_driver.driver,
1311 &driver_attr_permissive);
1312
1313 if (!err)
1314 err = driver_create_file(&xen_pcibk_pci_driver.driver,
1315 &driver_attr_irq_handlers);
1316 if (!err)
1317 err = driver_create_file(&xen_pcibk_pci_driver.driver,
1318 &driver_attr_irq_handler_state);
1319 if (err)
1320 pcistub_exit();
1321
1322out:
1323 return err;
1324
1325parse_error:
1326 printk(KERN_ERR DRV_NAME ": Error parsing pci_devs_to_hide at \"%s\"\n",
1327 pci_devs_to_hide + pos);
1328 return -EINVAL;
1329}
1330
1331#ifndef MODULE
1332/*
1333 * fs_initcall happens before device_initcall
1334 * so xen_pcibk *should* get called first (b/c we
1335 * want to suck up any device before other drivers
1336 * get a chance by being the first pci device
1337 * driver to register)
1338 */
1339fs_initcall(pcistub_init);
1340#endif
1341
1342static int __init xen_pcibk_init(void)
1343{
1344 int err;
1345
1346 if (!xen_initial_domain())
1347 return -ENODEV;
1348
1349 err = xen_pcibk_config_init();
1350 if (err)
1351 return err;
1352
1353#ifdef MODULE
1354 err = pcistub_init();
1355 if (err < 0)
1356 return err;
1357#endif
1358
1359 pcistub_init_devices_late();
1360 err = xen_pcibk_xenbus_register();
1361 if (err)
1362 pcistub_exit();
1363
1364 return err;
1365}
1366
1367static void __exit xen_pcibk_cleanup(void)
1368{
1369 xen_pcibk_xenbus_unregister();
1370 pcistub_exit();
1371}
1372
1373module_init(xen_pcibk_init);
1374module_exit(xen_pcibk_cleanup);
1375
1376MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
new file mode 100644
index 000000000000..a0e131a81503
--- /dev/null
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -0,0 +1,183 @@
1/*
2 * PCI Backend Common Data Structures & Function Declarations
3 *
4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
5 */
6#ifndef __XEN_PCIBACK_H__
7#define __XEN_PCIBACK_H__
8
9#include <linux/pci.h>
10#include <linux/interrupt.h>
11#include <xen/xenbus.h>
12#include <linux/list.h>
13#include <linux/spinlock.h>
14#include <linux/workqueue.h>
15#include <linux/atomic.h>
16#include <xen/interface/io/pciif.h>
17
18struct pci_dev_entry {
19 struct list_head list;
20 struct pci_dev *dev;
21};
22
23#define _PDEVF_op_active (0)
24#define PDEVF_op_active (1<<(_PDEVF_op_active))
25#define _PCIB_op_pending (1)
26#define PCIB_op_pending (1<<(_PCIB_op_pending))
27
28struct xen_pcibk_device {
29 void *pci_dev_data;
30 spinlock_t dev_lock;
31 struct xenbus_device *xdev;
32 struct xenbus_watch be_watch;
33 u8 be_watching;
34 int evtchn_irq;
35 struct xen_pci_sharedinfo *sh_info;
36 unsigned long flags;
37 struct work_struct op_work;
38};
39
40struct xen_pcibk_dev_data {
41 struct list_head config_fields;
42 unsigned int permissive:1;
43 unsigned int warned_on_write:1;
44 unsigned int enable_intx:1;
45 unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
46 unsigned int ack_intr:1; /* .. and ACK-ing */
47 unsigned long handled;
48 unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
49 char irq_name[0]; /* xen-pcibk[000:04:00.0] */
50};
51
52/* Used by XenBus and xen_pcibk_ops.c */
53extern wait_queue_head_t xen_pcibk_aer_wait_queue;
54extern struct workqueue_struct *xen_pcibk_wq;
55/* Used by pcistub.c and conf_space_quirks.c */
56extern struct list_head xen_pcibk_quirks;
57
58/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
59struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
60 int domain, int bus,
61 int slot, int func);
62struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
63 struct pci_dev *dev);
64void pcistub_put_pci_dev(struct pci_dev *dev);
65
66/* Ensure a device is turned off or reset */
67void xen_pcibk_reset_device(struct pci_dev *pdev);
68
69/* Access a virtual configuration space for a PCI device */
70int xen_pcibk_config_init(void);
71int xen_pcibk_config_init_dev(struct pci_dev *dev);
72void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev);
73void xen_pcibk_config_reset_dev(struct pci_dev *dev);
74void xen_pcibk_config_free_dev(struct pci_dev *dev);
75int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
76 u32 *ret_val);
77int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size,
78 u32 value);
79
80/* Handle requests for specific devices from the frontend */
81typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev,
82 unsigned int domain, unsigned int bus,
83 unsigned int devfn, unsigned int devid);
84typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
85 unsigned int domain, unsigned int bus);
86
87/* Backend registration for the two types of BDF representation:
88 * vpci - BDFs start at 00
89 * passthrough - BDFs are exactly like in the host.
90 */
91struct xen_pcibk_backend {
92 char *name;
93 int (*init)(struct xen_pcibk_device *pdev);
94 void (*free)(struct xen_pcibk_device *pdev);
95 int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
96 unsigned int *domain, unsigned int *bus,
97 unsigned int *devfn);
98 int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
99 void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
100 int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
101 int devid, publish_pci_dev_cb publish_cb);
102 struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
103 unsigned int domain, unsigned int bus,
104 unsigned int devfn);
105};
106
107extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
108extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
109extern struct xen_pcibk_backend *xen_pcibk_backend;
110
111static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
112 struct pci_dev *dev,
113 int devid,
114 publish_pci_dev_cb publish_cb)
115{
116 if (xen_pcibk_backend && xen_pcibk_backend->add)
117 return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
118 return -1;
119};
120static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
121 struct pci_dev *dev)
122{
123 if (xen_pcibk_backend && xen_pcibk_backend->free)
124 return xen_pcibk_backend->release(pdev, dev);
125};
126
127static inline struct pci_dev *
128xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
129 unsigned int bus, unsigned int devfn)
130{
131 if (xen_pcibk_backend && xen_pcibk_backend->get)
132 return xen_pcibk_backend->get(pdev, domain, bus, devfn);
133 return NULL;
134};
135/**
136* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
137* before sending aer request to pcifront, so that guest could identify
138* device, coopearte with xen_pcibk to finish aer recovery job if device driver
139* has the capability
140*/
141static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
142 struct xen_pcibk_device *pdev,
143 unsigned int *domain,
144 unsigned int *bus,
145 unsigned int *devfn)
146{
147 if (xen_pcibk_backend && xen_pcibk_backend->find)
148 return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
149 devfn);
150 return -1;
151};
152static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
153{
154 if (xen_pcibk_backend && xen_pcibk_backend->init)
155 return xen_pcibk_backend->init(pdev);
156 return -1;
157};
158static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
159 publish_pci_root_cb cb)
160{
161 if (xen_pcibk_backend && xen_pcibk_backend->publish)
162 return xen_pcibk_backend->publish(pdev, cb);
163 return -1;
164};
165static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
166{
167 if (xen_pcibk_backend && xen_pcibk_backend->free)
168 return xen_pcibk_backend->free(pdev);
169};
170/* Handles events from front-end */
171irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
172void xen_pcibk_do_op(struct work_struct *data);
173
174int xen_pcibk_xenbus_register(void);
175void xen_pcibk_xenbus_unregister(void);
176
177extern int verbose_request;
178
179void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
180#endif
181
182/* Handles shared IRQs that can to device domain and control domain. */
183void xen_pcibk_irq_handler(struct pci_dev *dev, int reset);
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
new file mode 100644
index 000000000000..8c95c3415b75
--- /dev/null
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -0,0 +1,384 @@
1/*
2 * PCI Backend Operations - respond to PCI requests from Frontend
3 *
4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
5 */
6#include <linux/module.h>
7#include <linux/wait.h>
8#include <linux/bitops.h>
9#include <xen/events.h>
10#include <linux/sched.h>
11#include "pciback.h"
12
13#define DRV_NAME "xen-pciback"
14int verbose_request;
15module_param(verbose_request, int, 0644);
16
17static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
18
19/* Ensure a device is has the fake IRQ handler "turned on/off" and is
20 * ready to be exported. This MUST be run after xen_pcibk_reset_device
21 * which does the actual PCI device enable/disable.
22 */
23static void xen_pcibk_control_isr(struct pci_dev *dev, int reset)
24{
25 struct xen_pcibk_dev_data *dev_data;
26 int rc;
27 int enable = 0;
28
29 dev_data = pci_get_drvdata(dev);
30 if (!dev_data)
31 return;
32
33 /* We don't deal with bridges */
34 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
35 return;
36
37 if (reset) {
38 dev_data->enable_intx = 0;
39 dev_data->ack_intr = 0;
40 }
41 enable = dev_data->enable_intx;
42
43 /* Asked to disable, but ISR isn't runnig */
44 if (!enable && !dev_data->isr_on)
45 return;
46
47 /* Squirrel away the IRQs in the dev_data. We need this
48 * b/c when device transitions to MSI, the dev->irq is
49 * overwritten with the MSI vector.
50 */
51 if (enable)
52 dev_data->irq = dev->irq;
53
54 /*
55 * SR-IOV devices in all use MSI-X and have no legacy
56 * interrupts, so inhibit creating a fake IRQ handler for them.
57 */
58 if (dev_data->irq == 0)
59 goto out;
60
61 dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
62 dev_data->irq_name,
63 dev_data->irq,
64 pci_is_enabled(dev) ? "on" : "off",
65 dev->msi_enabled ? "MSI" : "",
66 dev->msix_enabled ? "MSI/X" : "",
67 dev_data->isr_on ? "enable" : "disable",
68 enable ? "enable" : "disable");
69
70 if (enable) {
71 rc = request_irq(dev_data->irq,
72 xen_pcibk_guest_interrupt, IRQF_SHARED,
73 dev_data->irq_name, dev);
74 if (rc) {
75 dev_err(&dev->dev, "%s: failed to install fake IRQ " \
76 "handler for IRQ %d! (rc:%d)\n",
77 dev_data->irq_name, dev_data->irq, rc);
78 goto out;
79 }
80 } else {
81 free_irq(dev_data->irq, dev);
82 dev_data->irq = 0;
83 }
84 dev_data->isr_on = enable;
85 dev_data->ack_intr = enable;
86out:
87 dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
88 dev_data->irq_name,
89 dev_data->irq,
90 pci_is_enabled(dev) ? "on" : "off",
91 dev->msi_enabled ? "MSI" : "",
92 dev->msix_enabled ? "MSI/X" : "",
93 enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
94 (dev_data->isr_on ? "failed to disable" : "disabled"));
95}
96
97/* Ensure a device is "turned off" and ready to be exported.
98 * (Also see xen_pcibk_config_reset to ensure virtual configuration space is
99 * ready to be re-exported)
100 */
101void xen_pcibk_reset_device(struct pci_dev *dev)
102{
103 u16 cmd;
104
105 xen_pcibk_control_isr(dev, 1 /* reset device */);
106
107 /* Disable devices (but not bridges) */
108 if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
109#ifdef CONFIG_PCI_MSI
110 /* The guest could have been abruptly killed without
111 * disabling MSI/MSI-X interrupts.*/
112 if (dev->msix_enabled)
113 pci_disable_msix(dev);
114 if (dev->msi_enabled)
115 pci_disable_msi(dev);
116#endif
117 pci_disable_device(dev);
118
119 pci_write_config_word(dev, PCI_COMMAND, 0);
120
121 dev->is_busmaster = 0;
122 } else {
123 pci_read_config_word(dev, PCI_COMMAND, &cmd);
124 if (cmd & (PCI_COMMAND_INVALIDATE)) {
125 cmd &= ~(PCI_COMMAND_INVALIDATE);
126 pci_write_config_word(dev, PCI_COMMAND, cmd);
127
128 dev->is_busmaster = 0;
129 }
130 }
131}
132
133#ifdef CONFIG_PCI_MSI
134static
135int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
136 struct pci_dev *dev, struct xen_pci_op *op)
137{
138 struct xen_pcibk_dev_data *dev_data;
139 int otherend = pdev->xdev->otherend_id;
140 int status;
141
142 if (unlikely(verbose_request))
143 printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
144
145 status = pci_enable_msi(dev);
146
147 if (status) {
148 printk(KERN_ERR "error enable msi for guest %x status %x\n",
149 otherend, status);
150 op->value = 0;
151 return XEN_PCI_ERR_op_failed;
152 }
153
154 /* The value the guest needs is actually the IDT vector, not the
155 * the local domain's IRQ number. */
156
157 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
158 if (unlikely(verbose_request))
159 printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
160 op->value);
161
162 dev_data = pci_get_drvdata(dev);
163 if (dev_data)
164 dev_data->ack_intr = 0;
165
166 return 0;
167}
168
169static
170int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
171 struct pci_dev *dev, struct xen_pci_op *op)
172{
173 struct xen_pcibk_dev_data *dev_data;
174
175 if (unlikely(verbose_request))
176 printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
177 pci_name(dev));
178 pci_disable_msi(dev);
179
180 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
181 if (unlikely(verbose_request))
182 printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
183 op->value);
184 dev_data = pci_get_drvdata(dev);
185 if (dev_data)
186 dev_data->ack_intr = 1;
187 return 0;
188}
189
190static
191int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
192 struct pci_dev *dev, struct xen_pci_op *op)
193{
194 struct xen_pcibk_dev_data *dev_data;
195 int i, result;
196 struct msix_entry *entries;
197
198 if (unlikely(verbose_request))
199 printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
200 pci_name(dev));
201 if (op->value > SH_INFO_MAX_VEC)
202 return -EINVAL;
203
204 entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
205 if (entries == NULL)
206 return -ENOMEM;
207
208 for (i = 0; i < op->value; i++) {
209 entries[i].entry = op->msix_entries[i].entry;
210 entries[i].vector = op->msix_entries[i].vector;
211 }
212
213 result = pci_enable_msix(dev, entries, op->value);
214
215 if (result == 0) {
216 for (i = 0; i < op->value; i++) {
217 op->msix_entries[i].entry = entries[i].entry;
218 if (entries[i].vector)
219 op->msix_entries[i].vector =
220 xen_pirq_from_irq(entries[i].vector);
221 if (unlikely(verbose_request))
222 printk(KERN_DEBUG DRV_NAME ": %s: " \
223 "MSI-X[%d]: %d\n",
224 pci_name(dev), i,
225 op->msix_entries[i].vector);
226 }
227 } else {
228 printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n",
229 pci_name(dev), result);
230 }
231 kfree(entries);
232
233 op->value = result;
234 dev_data = pci_get_drvdata(dev);
235 if (dev_data)
236 dev_data->ack_intr = 0;
237
238 return result;
239}
240
241static
242int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
243 struct pci_dev *dev, struct xen_pci_op *op)
244{
245 struct xen_pcibk_dev_data *dev_data;
246 if (unlikely(verbose_request))
247 printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
248 pci_name(dev));
249 pci_disable_msix(dev);
250
251 /*
252 * SR-IOV devices (which don't have any legacy IRQ) have
253 * an undefined IRQ value of zero.
254 */
255 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
256 if (unlikely(verbose_request))
257 printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev),
258 op->value);
259 dev_data = pci_get_drvdata(dev);
260 if (dev_data)
261 dev_data->ack_intr = 1;
262 return 0;
263}
264#endif
265/*
266* Now the same evtchn is used for both pcifront conf_read_write request
267* as well as pcie aer front end ack. We use a new work_queue to schedule
268* xen_pcibk conf_read_write service for avoiding confict with aer_core
269* do_recovery job which also use the system default work_queue
270*/
271void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
272{
273 /* Check that frontend is requesting an operation and that we are not
274 * already processing a request */
275 if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
276 && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
277 queue_work(xen_pcibk_wq, &pdev->op_work);
278 }
279 /*_XEN_PCIB_active should have been cleared by pcifront. And also make
280 sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
281 if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
282 && test_bit(_PCIB_op_pending, &pdev->flags)) {
283 wake_up(&xen_pcibk_aer_wait_queue);
284 }
285}
286
287/* Performing the configuration space reads/writes must not be done in atomic
288 * context because some of the pci_* functions can sleep (mostly due to ACPI
289 * use of semaphores). This function is intended to be called from a work
290 * queue in process context taking a struct xen_pcibk_device as a parameter */
291
292void xen_pcibk_do_op(struct work_struct *data)
293{
294 struct xen_pcibk_device *pdev =
295 container_of(data, struct xen_pcibk_device, op_work);
296 struct pci_dev *dev;
297 struct xen_pcibk_dev_data *dev_data = NULL;
298 struct xen_pci_op *op = &pdev->sh_info->op;
299 int test_intx = 0;
300
301 dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
302
303 if (dev == NULL)
304 op->err = XEN_PCI_ERR_dev_not_found;
305 else {
306 dev_data = pci_get_drvdata(dev);
307 if (dev_data)
308 test_intx = dev_data->enable_intx;
309 switch (op->cmd) {
310 case XEN_PCI_OP_conf_read:
311 op->err = xen_pcibk_config_read(dev,
312 op->offset, op->size, &op->value);
313 break;
314 case XEN_PCI_OP_conf_write:
315 op->err = xen_pcibk_config_write(dev,
316 op->offset, op->size, op->value);
317 break;
318#ifdef CONFIG_PCI_MSI
319 case XEN_PCI_OP_enable_msi:
320 op->err = xen_pcibk_enable_msi(pdev, dev, op);
321 break;
322 case XEN_PCI_OP_disable_msi:
323 op->err = xen_pcibk_disable_msi(pdev, dev, op);
324 break;
325 case XEN_PCI_OP_enable_msix:
326 op->err = xen_pcibk_enable_msix(pdev, dev, op);
327 break;
328 case XEN_PCI_OP_disable_msix:
329 op->err = xen_pcibk_disable_msix(pdev, dev, op);
330 break;
331#endif
332 default:
333 op->err = XEN_PCI_ERR_not_implemented;
334 break;
335 }
336 }
337 if (!op->err && dev && dev_data) {
338 /* Transition detected */
339 if ((dev_data->enable_intx != test_intx))
340 xen_pcibk_control_isr(dev, 0 /* no reset */);
341 }
342 /* Tell the driver domain that we're done. */
343 wmb();
344 clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
345 notify_remote_via_irq(pdev->evtchn_irq);
346
347 /* Mark that we're done. */
348 smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
349 clear_bit(_PDEVF_op_active, &pdev->flags);
350 smp_mb__after_clear_bit(); /* /before/ final check for work */
351
352 /* Check to see if the driver domain tried to start another request in
353 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
354 */
355 xen_pcibk_test_and_schedule_op(pdev);
356}
357
358irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
359{
360 struct xen_pcibk_device *pdev = dev_id;
361
362 xen_pcibk_test_and_schedule_op(pdev);
363
364 return IRQ_HANDLED;
365}
366static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
367{
368 struct pci_dev *dev = (struct pci_dev *)dev_id;
369 struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
370
371 if (dev_data->isr_on && dev_data->ack_intr) {
372 dev_data->handled++;
373 if ((dev_data->handled % 1000) == 0) {
374 if (xen_test_irq_shared(irq)) {
375 printk(KERN_INFO "%s IRQ line is not shared "
376 "with other domains. Turning ISR off\n",
377 dev_data->irq_name);
378 dev_data->ack_intr = 0;
379 }
380 }
381 return IRQ_HANDLED;
382 }
383 return IRQ_NONE;
384}
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
new file mode 100644
index 000000000000..4a42cfb0959d
--- /dev/null
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -0,0 +1,259 @@
1/*
2 * PCI Backend - Provides a Virtual PCI bus (with real devices)
3 * to the frontend
4 *
5 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
6 */
7
8#include <linux/list.h>
9#include <linux/slab.h>
10#include <linux/pci.h>
11#include <linux/spinlock.h>
12#include "pciback.h"
13
14#define PCI_SLOT_MAX 32
15#define DRV_NAME "xen-pciback"
16
17struct vpci_dev_data {
18 /* Access to dev_list must be protected by lock */
19 struct list_head dev_list[PCI_SLOT_MAX];
20 spinlock_t lock;
21};
22
23static inline struct list_head *list_first(struct list_head *head)
24{
25 return head->next;
26}
27
28static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
29 unsigned int domain,
30 unsigned int bus,
31 unsigned int devfn)
32{
33 struct pci_dev_entry *entry;
34 struct pci_dev *dev = NULL;
35 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
36 unsigned long flags;
37
38 if (domain != 0 || bus != 0)
39 return NULL;
40
41 if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
42 spin_lock_irqsave(&vpci_dev->lock, flags);
43
44 list_for_each_entry(entry,
45 &vpci_dev->dev_list[PCI_SLOT(devfn)],
46 list) {
47 if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
48 dev = entry->dev;
49 break;
50 }
51 }
52
53 spin_unlock_irqrestore(&vpci_dev->lock, flags);
54 }
55 return dev;
56}
57
58static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
59{
60 if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
61 && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
62 return 1;
63
64 return 0;
65}
66
67static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
68 struct pci_dev *dev, int devid,
69 publish_pci_dev_cb publish_cb)
70{
71 int err = 0, slot, func = -1;
72 struct pci_dev_entry *t, *dev_entry;
73 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
74 unsigned long flags;
75
76 if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
77 err = -EFAULT;
78 xenbus_dev_fatal(pdev->xdev, err,
79 "Can't export bridges on the virtual PCI bus");
80 goto out;
81 }
82
83 dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
84 if (!dev_entry) {
85 err = -ENOMEM;
86 xenbus_dev_fatal(pdev->xdev, err,
87 "Error adding entry to virtual PCI bus");
88 goto out;
89 }
90
91 dev_entry->dev = dev;
92
93 spin_lock_irqsave(&vpci_dev->lock, flags);
94
95 /* Keep multi-function devices together on the virtual PCI bus */
96 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
97 if (!list_empty(&vpci_dev->dev_list[slot])) {
98 t = list_entry(list_first(&vpci_dev->dev_list[slot]),
99 struct pci_dev_entry, list);
100
101 if (match_slot(dev, t->dev)) {
102 pr_info(DRV_NAME ": vpci: %s: "
103 "assign to virtual slot %d func %d\n",
104 pci_name(dev), slot,
105 PCI_FUNC(dev->devfn));
106 list_add_tail(&dev_entry->list,
107 &vpci_dev->dev_list[slot]);
108 func = PCI_FUNC(dev->devfn);
109 goto unlock;
110 }
111 }
112 }
113
114 /* Assign to a new slot on the virtual PCI bus */
115 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
116 if (list_empty(&vpci_dev->dev_list[slot])) {
117 printk(KERN_INFO DRV_NAME
118 ": vpci: %s: assign to virtual slot %d\n",
119 pci_name(dev), slot);
120 list_add_tail(&dev_entry->list,
121 &vpci_dev->dev_list[slot]);
122 func = PCI_FUNC(dev->devfn);
123 goto unlock;
124 }
125 }
126
127 err = -ENOMEM;
128 xenbus_dev_fatal(pdev->xdev, err,
129 "No more space on root virtual PCI bus");
130
131unlock:
132 spin_unlock_irqrestore(&vpci_dev->lock, flags);
133
134 /* Publish this device. */
135 if (!err)
136 err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
137
138out:
139 return err;
140}
141
142static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
143 struct pci_dev *dev)
144{
145 int slot;
146 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
147 struct pci_dev *found_dev = NULL;
148 unsigned long flags;
149
150 spin_lock_irqsave(&vpci_dev->lock, flags);
151
152 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
153 struct pci_dev_entry *e, *tmp;
154 list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
155 list) {
156 if (e->dev == dev) {
157 list_del(&e->list);
158 found_dev = e->dev;
159 kfree(e);
160 goto out;
161 }
162 }
163 }
164
165out:
166 spin_unlock_irqrestore(&vpci_dev->lock, flags);
167
168 if (found_dev)
169 pcistub_put_pci_dev(found_dev);
170}
171
172static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
173{
174 int slot;
175 struct vpci_dev_data *vpci_dev;
176
177 vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
178 if (!vpci_dev)
179 return -ENOMEM;
180
181 spin_lock_init(&vpci_dev->lock);
182
183 for (slot = 0; slot < PCI_SLOT_MAX; slot++)
184 INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
185
186 pdev->pci_dev_data = vpci_dev;
187
188 return 0;
189}
190
191static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
192 publish_pci_root_cb publish_cb)
193{
194 /* The Virtual PCI bus has only one root */
195 return publish_cb(pdev, 0, 0);
196}
197
198static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
199{
200 int slot;
201 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
202
203 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
204 struct pci_dev_entry *e, *tmp;
205 list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
206 list) {
207 list_del(&e->list);
208 pcistub_put_pci_dev(e->dev);
209 kfree(e);
210 }
211 }
212
213 kfree(vpci_dev);
214 pdev->pci_dev_data = NULL;
215}
216
217static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
218 struct xen_pcibk_device *pdev,
219 unsigned int *domain, unsigned int *bus,
220 unsigned int *devfn)
221{
222 struct pci_dev_entry *entry;
223 struct pci_dev *dev = NULL;
224 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
225 unsigned long flags;
226 int found = 0, slot;
227
228 spin_lock_irqsave(&vpci_dev->lock, flags);
229 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
230 list_for_each_entry(entry,
231 &vpci_dev->dev_list[slot],
232 list) {
233 dev = entry->dev;
234 if (dev && dev->bus->number == pcidev->bus->number
235 && pci_domain_nr(dev->bus) ==
236 pci_domain_nr(pcidev->bus)
237 && dev->devfn == pcidev->devfn) {
238 found = 1;
239 *domain = 0;
240 *bus = 0;
241 *devfn = PCI_DEVFN(slot,
242 PCI_FUNC(pcidev->devfn));
243 }
244 }
245 }
246 spin_unlock_irqrestore(&vpci_dev->lock, flags);
247 return found;
248}
249
250struct xen_pcibk_backend xen_pcibk_vpci_backend = {
251 .name = "vpci",
252 .init = __xen_pcibk_init_devices,
253 .free = __xen_pcibk_release_devices,
254 .find = __xen_pcibk_get_pcifront_dev,
255 .publish = __xen_pcibk_publish_pci_roots,
256 .release = __xen_pcibk_release_pci_dev,
257 .add = __xen_pcibk_add_pci_dev,
258 .get = __xen_pcibk_get_pci_dev,
259};
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
new file mode 100644
index 000000000000..206c4ce030bc
--- /dev/null
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -0,0 +1,749 @@
1/*
2 * PCI Backend Xenbus Setup - handles setup with frontend and xend
3 *
4 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
5 */
6#include <linux/module.h>
7#include <linux/init.h>
8#include <linux/list.h>
9#include <linux/vmalloc.h>
10#include <linux/workqueue.h>
11#include <xen/xenbus.h>
12#include <xen/events.h>
13#include <asm/xen/pci.h>
14#include <linux/workqueue.h>
15#include "pciback.h"
16
17#define DRV_NAME "xen-pciback"
18#define INVALID_EVTCHN_IRQ (-1)
19struct workqueue_struct *xen_pcibk_wq;
20
21static int __read_mostly passthrough;
22module_param(passthrough, bool, S_IRUGO);
23MODULE_PARM_DESC(passthrough,
24 "Option to specify how to export PCI topology to guest:\n"\
25 " 0 - (default) Hide the true PCI topology and makes the frontend\n"\
26 " there is a single PCI bus with only the exported devices on it.\n"\
27 " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\
28 " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\
29 " 1 - Passthrough provides a real view of the PCI topology to the\n"\
30 " frontend (for example, a device at 06:01.b will still appear at\n"\
31 " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
32 " exposed PCI devices to its driver domains. This may be required\n"\
33 " for drivers which depend on finding their hardward in certain\n"\
34 " bus/slot locations.");
35
36static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
37{
38 struct xen_pcibk_device *pdev;
39
40 pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL);
41 if (pdev == NULL)
42 goto out;
43 dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
44
45 pdev->xdev = xdev;
46 dev_set_drvdata(&xdev->dev, pdev);
47
48 spin_lock_init(&pdev->dev_lock);
49
50 pdev->sh_info = NULL;
51 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
52 pdev->be_watching = 0;
53
54 INIT_WORK(&pdev->op_work, xen_pcibk_do_op);
55
56 if (xen_pcibk_init_devices(pdev)) {
57 kfree(pdev);
58 pdev = NULL;
59 }
60out:
61 return pdev;
62}
63
64static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
65{
66 spin_lock(&pdev->dev_lock);
67
68 /* Ensure the guest can't trigger our handler before removing devices */
69 if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
70 unbind_from_irqhandler(pdev->evtchn_irq, pdev);
71 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
72 }
73 spin_unlock(&pdev->dev_lock);
74
75 /* If the driver domain started an op, make sure we complete it
76 * before releasing the shared memory */
77
78 /* Note, the workqueue does not use spinlocks at all.*/
79 flush_workqueue(xen_pcibk_wq);
80
81 spin_lock(&pdev->dev_lock);
82 if (pdev->sh_info != NULL) {
83 xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
84 pdev->sh_info = NULL;
85 }
86 spin_unlock(&pdev->dev_lock);
87
88}
89
90static void free_pdev(struct xen_pcibk_device *pdev)
91{
92 if (pdev->be_watching) {
93 unregister_xenbus_watch(&pdev->be_watch);
94 pdev->be_watching = 0;
95 }
96
97 xen_pcibk_disconnect(pdev);
98
99 xen_pcibk_release_devices(pdev);
100
101 dev_set_drvdata(&pdev->xdev->dev, NULL);
102 pdev->xdev = NULL;
103
104 kfree(pdev);
105}
106
107static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
108 int remote_evtchn)
109{
110 int err = 0;
111 void *vaddr;
112
113 dev_dbg(&pdev->xdev->dev,
114 "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
115 gnt_ref, remote_evtchn);
116
117 err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
118 if (err < 0) {
119 xenbus_dev_fatal(pdev->xdev, err,
120 "Error mapping other domain page in ours.");
121 goto out;
122 }
123
124 spin_lock(&pdev->dev_lock);
125 pdev->sh_info = vaddr;
126 spin_unlock(&pdev->dev_lock);
127
128 err = bind_interdomain_evtchn_to_irqhandler(
129 pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
130 0, DRV_NAME, pdev);
131 if (err < 0) {
132 xenbus_dev_fatal(pdev->xdev, err,
133 "Error binding event channel to IRQ");
134 goto out;
135 }
136
137 spin_lock(&pdev->dev_lock);
138 pdev->evtchn_irq = err;
139 spin_unlock(&pdev->dev_lock);
140 err = 0;
141
142 dev_dbg(&pdev->xdev->dev, "Attached!\n");
143out:
144 return err;
145}
146
147static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
148{
149 int err = 0;
150 int gnt_ref, remote_evtchn;
151 char *magic = NULL;
152
153
154 /* Make sure we only do this setup once */
155 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
156 XenbusStateInitialised)
157 goto out;
158
159 /* Wait for frontend to state that it has published the configuration */
160 if (xenbus_read_driver_state(pdev->xdev->otherend) !=
161 XenbusStateInitialised)
162 goto out;
163
164 dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
165
166 err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
167 "pci-op-ref", "%u", &gnt_ref,
168 "event-channel", "%u", &remote_evtchn,
169 "magic", NULL, &magic, NULL);
170 if (err) {
171 /* If configuration didn't get read correctly, wait longer */
172 xenbus_dev_fatal(pdev->xdev, err,
173 "Error reading configuration from frontend");
174 goto out;
175 }
176
177 if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
178 xenbus_dev_fatal(pdev->xdev, -EFAULT,
179 "version mismatch (%s/%s) with pcifront - "
180 "halting xen_pcibk",
181 magic, XEN_PCI_MAGIC);
182 goto out;
183 }
184
185 err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn);
186 if (err)
187 goto out;
188
189 dev_dbg(&pdev->xdev->dev, "Connecting...\n");
190
191 err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
192 if (err)
193 xenbus_dev_fatal(pdev->xdev, err,
194 "Error switching to connected state!");
195
196 dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
197out:
198
199 kfree(magic);
200
201 return err;
202}
203
204static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
205 unsigned int domain, unsigned int bus,
206 unsigned int devfn, unsigned int devid)
207{
208 int err;
209 int len;
210 char str[64];
211
212 len = snprintf(str, sizeof(str), "vdev-%d", devid);
213 if (unlikely(len >= (sizeof(str) - 1))) {
214 err = -ENOMEM;
215 goto out;
216 }
217
218 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
219 "%04x:%02x:%02x.%02x", domain, bus,
220 PCI_SLOT(devfn), PCI_FUNC(devfn));
221
222out:
223 return err;
224}
225
226static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
227 int domain, int bus, int slot, int func,
228 int devid)
229{
230 struct pci_dev *dev;
231 int err = 0;
232
233 dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
234 domain, bus, slot, func);
235
236 dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
237 if (!dev) {
238 err = -EINVAL;
239 xenbus_dev_fatal(pdev->xdev, err,
240 "Couldn't locate PCI device "
241 "(%04x:%02x:%02x.%01x)! "
242 "perhaps already in-use?",
243 domain, bus, slot, func);
244 goto out;
245 }
246
247 err = xen_pcibk_add_pci_dev(pdev, dev, devid,
248 xen_pcibk_publish_pci_dev);
249 if (err)
250 goto out;
251
252 dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
253 if (xen_register_device_domain_owner(dev,
254 pdev->xdev->otherend_id) != 0) {
255 dev_err(&dev->dev, "device has been assigned to another " \
256 "domain! Over-writting the ownership, but beware.\n");
257 xen_unregister_device_domain_owner(dev);
258 xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
259 }
260
261 /* TODO: It'd be nice to export a bridge and have all of its children
262 * get exported with it. This may be best done in xend (which will
263 * have to calculate resource usage anyway) but we probably want to
264 * put something in here to ensure that if a bridge gets given to a
265 * driver domain, that all devices under that bridge are not given
266 * to other driver domains (as he who controls the bridge can disable
267 * it and stop the other devices from working).
268 */
269out:
270 return err;
271}
272
273static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
274 int domain, int bus, int slot, int func)
275{
276 int err = 0;
277 struct pci_dev *dev;
278
279 dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
280 domain, bus, slot, func);
281
282 dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
283 if (!dev) {
284 err = -EINVAL;
285 dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
286 "(%04x:%02x:%02x.%01x)! not owned by this domain\n",
287 domain, bus, slot, func);
288 goto out;
289 }
290
291 dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
292 xen_unregister_device_domain_owner(dev);
293
294 xen_pcibk_release_pci_dev(pdev, dev);
295
296out:
297 return err;
298}
299
300static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev,
301 unsigned int domain, unsigned int bus)
302{
303 unsigned int d, b;
304 int i, root_num, len, err;
305 char str[64];
306
307 dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
308
309 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
310 "root_num", "%d", &root_num);
311 if (err == 0 || err == -ENOENT)
312 root_num = 0;
313 else if (err < 0)
314 goto out;
315
316 /* Verify that we haven't already published this pci root */
317 for (i = 0; i < root_num; i++) {
318 len = snprintf(str, sizeof(str), "root-%d", i);
319 if (unlikely(len >= (sizeof(str) - 1))) {
320 err = -ENOMEM;
321 goto out;
322 }
323
324 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
325 str, "%x:%x", &d, &b);
326 if (err < 0)
327 goto out;
328 if (err != 2) {
329 err = -EINVAL;
330 goto out;
331 }
332
333 if (d == domain && b == bus) {
334 err = 0;
335 goto out;
336 }
337 }
338
339 len = snprintf(str, sizeof(str), "root-%d", root_num);
340 if (unlikely(len >= (sizeof(str) - 1))) {
341 err = -ENOMEM;
342 goto out;
343 }
344
345 dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
346 root_num, domain, bus);
347
348 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
349 "%04x:%02x", domain, bus);
350 if (err)
351 goto out;
352
353 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
354 "root_num", "%d", (root_num + 1));
355
356out:
357 return err;
358}
359
360static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
361{
362 int err = 0;
363 int num_devs;
364 int domain, bus, slot, func;
365 int substate;
366 int i, len;
367 char state_str[64];
368 char dev_str[64];
369
370
371 dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
372
373 /* Make sure we only reconfigure once */
374 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
375 XenbusStateReconfiguring)
376 goto out;
377
378 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
379 &num_devs);
380 if (err != 1) {
381 if (err >= 0)
382 err = -EINVAL;
383 xenbus_dev_fatal(pdev->xdev, err,
384 "Error reading number of devices");
385 goto out;
386 }
387
388 for (i = 0; i < num_devs; i++) {
389 len = snprintf(state_str, sizeof(state_str), "state-%d", i);
390 if (unlikely(len >= (sizeof(state_str) - 1))) {
391 err = -ENOMEM;
392 xenbus_dev_fatal(pdev->xdev, err,
393 "String overflow while reading "
394 "configuration");
395 goto out;
396 }
397 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
398 "%d", &substate);
399 if (err != 1)
400 substate = XenbusStateUnknown;
401
402 switch (substate) {
403 case XenbusStateInitialising:
404 dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
405
406 len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
407 if (unlikely(len >= (sizeof(dev_str) - 1))) {
408 err = -ENOMEM;
409 xenbus_dev_fatal(pdev->xdev, err,
410 "String overflow while "
411 "reading configuration");
412 goto out;
413 }
414 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
415 dev_str, "%x:%x:%x.%x",
416 &domain, &bus, &slot, &func);
417 if (err < 0) {
418 xenbus_dev_fatal(pdev->xdev, err,
419 "Error reading device "
420 "configuration");
421 goto out;
422 }
423 if (err != 4) {
424 err = -EINVAL;
425 xenbus_dev_fatal(pdev->xdev, err,
426 "Error parsing pci device "
427 "configuration");
428 goto out;
429 }
430
431 err = xen_pcibk_export_device(pdev, domain, bus, slot,
432 func, i);
433 if (err)
434 goto out;
435
436 /* Publish pci roots. */
437 err = xen_pcibk_publish_pci_roots(pdev,
438 xen_pcibk_publish_pci_root);
439 if (err) {
440 xenbus_dev_fatal(pdev->xdev, err,
441 "Error while publish PCI root"
442 "buses for frontend");
443 goto out;
444 }
445
446 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
447 state_str, "%d",
448 XenbusStateInitialised);
449 if (err) {
450 xenbus_dev_fatal(pdev->xdev, err,
451 "Error switching substate of "
452 "dev-%d\n", i);
453 goto out;
454 }
455 break;
456
457 case XenbusStateClosing:
458 dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
459
460 len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
461 if (unlikely(len >= (sizeof(dev_str) - 1))) {
462 err = -ENOMEM;
463 xenbus_dev_fatal(pdev->xdev, err,
464 "String overflow while "
465 "reading configuration");
466 goto out;
467 }
468 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
469 dev_str, "%x:%x:%x.%x",
470 &domain, &bus, &slot, &func);
471 if (err < 0) {
472 xenbus_dev_fatal(pdev->xdev, err,
473 "Error reading device "
474 "configuration");
475 goto out;
476 }
477 if (err != 4) {
478 err = -EINVAL;
479 xenbus_dev_fatal(pdev->xdev, err,
480 "Error parsing pci device "
481 "configuration");
482 goto out;
483 }
484
485 err = xen_pcibk_remove_device(pdev, domain, bus, slot,
486 func);
487 if (err)
488 goto out;
489
490 /* TODO: If at some point we implement support for pci
491 * root hot-remove on pcifront side, we'll need to
492 * remove unnecessary xenstore nodes of pci roots here.
493 */
494
495 break;
496
497 default:
498 break;
499 }
500 }
501
502 err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
503 if (err) {
504 xenbus_dev_fatal(pdev->xdev, err,
505 "Error switching to reconfigured state!");
506 goto out;
507 }
508
509out:
510 return 0;
511}
512
513static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
514 enum xenbus_state fe_state)
515{
516 struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev);
517
518 dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
519
520 switch (fe_state) {
521 case XenbusStateInitialised:
522 xen_pcibk_attach(pdev);
523 break;
524
525 case XenbusStateReconfiguring:
526 xen_pcibk_reconfigure(pdev);
527 break;
528
529 case XenbusStateConnected:
530 /* pcifront switched its state from reconfiguring to connected.
531 * Then switch to connected state.
532 */
533 xenbus_switch_state(xdev, XenbusStateConnected);
534 break;
535
536 case XenbusStateClosing:
537 xen_pcibk_disconnect(pdev);
538 xenbus_switch_state(xdev, XenbusStateClosing);
539 break;
540
541 case XenbusStateClosed:
542 xen_pcibk_disconnect(pdev);
543 xenbus_switch_state(xdev, XenbusStateClosed);
544 if (xenbus_dev_is_online(xdev))
545 break;
546 /* fall through if not online */
547 case XenbusStateUnknown:
548 dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
549 device_unregister(&xdev->dev);
550 break;
551
552 default:
553 break;
554 }
555}
556
557static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
558{
559 /* Get configuration from xend (if available now) */
560 int domain, bus, slot, func;
561 int err = 0;
562 int i, num_devs;
563 char dev_str[64];
564 char state_str[64];
565
566 /* It's possible we could get the call to setup twice, so make sure
567 * we're not already connected.
568 */
569 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
570 XenbusStateInitWait)
571 goto out;
572
573 dev_dbg(&pdev->xdev->dev, "getting be setup\n");
574
575 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
576 &num_devs);
577 if (err != 1) {
578 if (err >= 0)
579 err = -EINVAL;
580 xenbus_dev_fatal(pdev->xdev, err,
581 "Error reading number of devices");
582 goto out;
583 }
584
585 for (i = 0; i < num_devs; i++) {
586 int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
587 if (unlikely(l >= (sizeof(dev_str) - 1))) {
588 err = -ENOMEM;
589 xenbus_dev_fatal(pdev->xdev, err,
590 "String overflow while reading "
591 "configuration");
592 goto out;
593 }
594
595 err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
596 "%x:%x:%x.%x", &domain, &bus, &slot, &func);
597 if (err < 0) {
598 xenbus_dev_fatal(pdev->xdev, err,
599 "Error reading device configuration");
600 goto out;
601 }
602 if (err != 4) {
603 err = -EINVAL;
604 xenbus_dev_fatal(pdev->xdev, err,
605 "Error parsing pci device "
606 "configuration");
607 goto out;
608 }
609
610 err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i);
611 if (err)
612 goto out;
613
614 /* Switch substate of this device. */
615 l = snprintf(state_str, sizeof(state_str), "state-%d", i);
616 if (unlikely(l >= (sizeof(state_str) - 1))) {
617 err = -ENOMEM;
618 xenbus_dev_fatal(pdev->xdev, err,
619 "String overflow while reading "
620 "configuration");
621 goto out;
622 }
623 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
624 "%d", XenbusStateInitialised);
625 if (err) {
626 xenbus_dev_fatal(pdev->xdev, err, "Error switching "
627 "substate of dev-%d\n", i);
628 goto out;
629 }
630 }
631
632 err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root);
633 if (err) {
634 xenbus_dev_fatal(pdev->xdev, err,
635 "Error while publish PCI root buses "
636 "for frontend");
637 goto out;
638 }
639
640 err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
641 if (err)
642 xenbus_dev_fatal(pdev->xdev, err,
643 "Error switching to initialised state!");
644
645out:
646 if (!err)
647 /* see if pcifront is already configured (if not, we'll wait) */
648 xen_pcibk_attach(pdev);
649
650 return err;
651}
652
653static void xen_pcibk_be_watch(struct xenbus_watch *watch,
654 const char **vec, unsigned int len)
655{
656 struct xen_pcibk_device *pdev =
657 container_of(watch, struct xen_pcibk_device, be_watch);
658
659 switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
660 case XenbusStateInitWait:
661 xen_pcibk_setup_backend(pdev);
662 break;
663
664 default:
665 break;
666 }
667}
668
669static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
670 const struct xenbus_device_id *id)
671{
672 int err = 0;
673 struct xen_pcibk_device *pdev = alloc_pdev(dev);
674
675 if (pdev == NULL) {
676 err = -ENOMEM;
677 xenbus_dev_fatal(dev, err,
678 "Error allocating xen_pcibk_device struct");
679 goto out;
680 }
681
682 /* wait for xend to configure us */
683 err = xenbus_switch_state(dev, XenbusStateInitWait);
684 if (err)
685 goto out;
686
687 /* watch the backend node for backend configuration information */
688 err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
689 xen_pcibk_be_watch);
690 if (err)
691 goto out;
692
693 pdev->be_watching = 1;
694
695 /* We need to force a call to our callback here in case
696 * xend already configured us!
697 */
698 xen_pcibk_be_watch(&pdev->be_watch, NULL, 0);
699
700out:
701 return err;
702}
703
704static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
705{
706 struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
707
708 if (pdev != NULL)
709 free_pdev(pdev);
710
711 return 0;
712}
713
714static const struct xenbus_device_id xenpci_ids[] = {
715 {"pci"},
716 {""},
717};
718
719static struct xenbus_driver xenbus_xen_pcibk_driver = {
720 .name = DRV_NAME,
721 .owner = THIS_MODULE,
722 .ids = xenpci_ids,
723 .probe = xen_pcibk_xenbus_probe,
724 .remove = xen_pcibk_xenbus_remove,
725 .otherend_changed = xen_pcibk_frontend_changed,
726};
727
728struct xen_pcibk_backend *xen_pcibk_backend;
729
730int __init xen_pcibk_xenbus_register(void)
731{
732 xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
733 if (!xen_pcibk_wq) {
734 printk(KERN_ERR "%s: create"
735 "xen_pciback_workqueue failed\n", __func__);
736 return -EFAULT;
737 }
738 xen_pcibk_backend = &xen_pcibk_vpci_backend;
739 if (passthrough)
740 xen_pcibk_backend = &xen_pcibk_passthrough_backend;
741 pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
742 return xenbus_register_backend(&xenbus_xen_pcibk_driver);
743}
744
745void __exit xen_pcibk_xenbus_unregister(void)
746{
747 destroy_workqueue(xen_pcibk_wq);
748 xenbus_unregister_driver(&xenbus_xen_pcibk_driver);
749}
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
new file mode 100644
index 000000000000..010937b5a7c9
--- /dev/null
+++ b/drivers/xen/xen-selfballoon.c
@@ -0,0 +1,485 @@
1/******************************************************************************
2 * Xen selfballoon driver (and optional frontswap self-shrinking driver)
3 *
4 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
5 *
6 * This code complements the cleancache and frontswap patchsets to optimize
7 * support for Xen Transcendent Memory ("tmem"). The policy it implements
8 * is rudimentary and will likely improve over time, but it does work well
9 * enough today.
10 *
11 * Two functionalities are implemented here which both use "control theory"
12 * (feedback) to optimize memory utilization. In a virtualized environment
13 * such as Xen, RAM is often a scarce resource and we would like to ensure
14 * that each of a possibly large number of virtual machines is using RAM
15 * efficiently, i.e. using as little as possible when under light load
16 * and obtaining as much as possible when memory demands are high.
17 * Since RAM needs vary highly dynamically and sometimes dramatically,
18 * "hysteresis" is used, that is, memory target is determined not just
19 * on current data but also on past data stored in the system.
20 *
21 * "Selfballooning" creates memory pressure by managing the Xen balloon
22 * driver to decrease and increase available kernel memory, driven
23 * largely by the target value of "Committed_AS" (see /proc/meminfo).
24 * Since Committed_AS does not account for clean mapped pages (i.e. pages
25 * in RAM that are identical to pages on disk), selfballooning has the
26 * affect of pushing less frequently used clean pagecache pages out of
27 * kernel RAM and, presumably using cleancache, into Xen tmem where
28 * Xen can more efficiently optimize RAM utilization for such pages.
29 *
30 * When kernel memory demand unexpectedly increases faster than Xen, via
31 * the selfballoon driver, is able to (or chooses to) provide usable RAM,
32 * the kernel may invoke swapping. In most cases, frontswap is able
33 * to absorb this swapping into Xen tmem. However, due to the fact
34 * that the kernel swap subsystem assumes swapping occurs to a disk,
35 * swapped pages may sit on the disk for a very long time; even if
36 * the kernel knows the page will never be used again. This is because
37 * the disk space costs very little and can be overwritten when
38 * necessary. When such stale pages are in frontswap, however, they
39 * are taking up valuable real estate. "Frontswap selfshrinking" works
40 * to resolve this: When frontswap activity is otherwise stable
41 * and the guest kernel is not under memory pressure, the "frontswap
42 * selfshrinking" accounts for this by providing pressure to remove some
43 * pages from frontswap and return them to kernel memory.
44 *
45 * For both "selfballooning" and "frontswap-selfshrinking", a worker
46 * thread is used and sysfs tunables are provided to adjust the frequency
47 * and rate of adjustments to achieve the goal, as well as to disable one
48 * or both functions independently.
49 *
50 * While some argue that this functionality can and should be implemented
51 * in userspace, it has been observed that bad things happen (e.g. OOMs).
52 *
53 * System configuration note: Selfballooning should not be enabled on
54 * systems without a sufficiently large swap device configured; for best
55 * results, it is recommended that total swap be increased by the size
56 * of the guest memory. Also, while technically not required to be
57 * configured, it is highly recommended that frontswap also be configured
58 * and enabled when selfballooning is running. So, selfballooning
59 * is disabled by default if frontswap is not configured and can only
60 * be enabled with the "selfballooning" kernel boot option; similarly
61 * selfballooning is enabled by default if frontswap is configured and
62 * can be disabled with the "noselfballooning" kernel boot option. Finally,
63 * when frontswap is configured, frontswap-selfshrinking can be disabled
64 * with the "noselfshrink" kernel boot option.
65 *
66 * Selfballooning is disallowed in domain0 and force-disabled.
67 *
68 */
69
70#include <linux/kernel.h>
71#include <linux/mm.h>
72#include <linux/mman.h>
73
74#include <xen/balloon.h>
75
76#include <xen/tmem.h>
77
78/* Enable/disable with sysfs. */
79static int xen_selfballooning_enabled __read_mostly;
80
81/*
82 * Controls rate at which memory target (this iteration) approaches
83 * ultimate goal when memory need is increasing (up-hysteresis) or
84 * decreasing (down-hysteresis). Higher values of hysteresis cause
85 * slower increases/decreases. The default values for the various
86 * parameters were deemed reasonable by experimentation, may be
87 * workload-dependent, and can all be adjusted via sysfs.
88 */
89static unsigned int selfballoon_downhysteresis __read_mostly = 8;
90static unsigned int selfballoon_uphysteresis __read_mostly = 1;
91
92/* In HZ, controls frequency of worker invocation. */
93static unsigned int selfballoon_interval __read_mostly = 5;
94
95static void selfballoon_process(struct work_struct *work);
96static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
97
98#ifdef CONFIG_FRONTSWAP
99#include <linux/frontswap.h>
100
101/* Enable/disable with sysfs. */
102static bool frontswap_selfshrinking __read_mostly;
103
104/* Enable/disable with kernel boot option. */
105static bool use_frontswap_selfshrink __initdata = true;
106
107/*
108 * The default values for the following parameters were deemed reasonable
109 * by experimentation, may be workload-dependent, and can all be
110 * adjusted via sysfs.
111 */
112
113/* Control rate for frontswap shrinking. Higher hysteresis is slower. */
114static unsigned int frontswap_hysteresis __read_mostly = 20;
115
116/*
117 * Number of selfballoon worker invocations to wait before observing that
118 * frontswap selfshrinking should commence. Note that selfshrinking does
119 * not use a separate worker thread.
120 */
121static unsigned int frontswap_inertia __read_mostly = 3;
122
123/* Countdown to next invocation of frontswap_shrink() */
124static unsigned long frontswap_inertia_counter;
125
126/*
127 * Invoked by the selfballoon worker thread, uses current number of pages
128 * in frontswap (frontswap_curr_pages()), previous status, and control
129 * values (hysteresis and inertia) to determine if frontswap should be
130 * shrunk and what the new frontswap size should be. Note that
131 * frontswap_shrink is essentially a partial swapoff that immediately
132 * transfers pages from the "swap device" (frontswap) back into kernel
133 * RAM; despite the name, frontswap "shrinking" is very different from
134 * the "shrinker" interface used by the kernel MM subsystem to reclaim
135 * memory.
136 */
137static void frontswap_selfshrink(void)
138{
139 static unsigned long cur_frontswap_pages;
140 static unsigned long last_frontswap_pages;
141 static unsigned long tgt_frontswap_pages;
142
143 last_frontswap_pages = cur_frontswap_pages;
144 cur_frontswap_pages = frontswap_curr_pages();
145 if (!cur_frontswap_pages ||
146 (cur_frontswap_pages > last_frontswap_pages)) {
147 frontswap_inertia_counter = frontswap_inertia;
148 return;
149 }
150 if (frontswap_inertia_counter && --frontswap_inertia_counter)
151 return;
152 if (cur_frontswap_pages <= frontswap_hysteresis)
153 tgt_frontswap_pages = 0;
154 else
155 tgt_frontswap_pages = cur_frontswap_pages -
156 (cur_frontswap_pages / frontswap_hysteresis);
157 frontswap_shrink(tgt_frontswap_pages);
158}
159
160static int __init xen_nofrontswap_selfshrink_setup(char *s)
161{
162 use_frontswap_selfshrink = false;
163 return 1;
164}
165
166__setup("noselfshrink", xen_nofrontswap_selfshrink_setup);
167
168/* Disable with kernel boot option. */
169static bool use_selfballooning __initdata = true;
170
171static int __init xen_noselfballooning_setup(char *s)
172{
173 use_selfballooning = false;
174 return 1;
175}
176
177__setup("noselfballooning", xen_noselfballooning_setup);
178#else /* !CONFIG_FRONTSWAP */
179/* Enable with kernel boot option. */
180static bool use_selfballooning __initdata = false;
181
182static int __init xen_selfballooning_setup(char *s)
183{
184 use_selfballooning = true;
185 return 1;
186}
187
188__setup("selfballooning", xen_selfballooning_setup);
189#endif /* CONFIG_FRONTSWAP */
190
191/*
192 * Use current balloon size, the goal (vm_committed_as), and hysteresis
193 * parameters to set a new target balloon size
194 */
195static void selfballoon_process(struct work_struct *work)
196{
197 unsigned long cur_pages, goal_pages, tgt_pages;
198 bool reset_timer = false;
199
200 if (xen_selfballooning_enabled) {
201 cur_pages = balloon_stats.current_pages;
202 tgt_pages = cur_pages; /* default is no change */
203 goal_pages = percpu_counter_read_positive(&vm_committed_as) +
204 balloon_stats.current_pages - totalram_pages;
205#ifdef CONFIG_FRONTSWAP
206 /* allow space for frontswap pages to be repatriated */
207 if (frontswap_selfshrinking && frontswap_enabled)
208 goal_pages += frontswap_curr_pages();
209#endif
210 if (cur_pages > goal_pages)
211 tgt_pages = cur_pages -
212 ((cur_pages - goal_pages) /
213 selfballoon_downhysteresis);
214 else if (cur_pages < goal_pages)
215 tgt_pages = cur_pages +
216 ((goal_pages - cur_pages) /
217 selfballoon_uphysteresis);
218 /* else if cur_pages == goal_pages, no change */
219 balloon_set_new_target(tgt_pages);
220 reset_timer = true;
221 }
222#ifdef CONFIG_FRONTSWAP
223 if (frontswap_selfshrinking && frontswap_enabled) {
224 frontswap_selfshrink();
225 reset_timer = true;
226 }
227#endif
228 if (reset_timer)
229 schedule_delayed_work(&selfballoon_worker,
230 selfballoon_interval * HZ);
231}
232
233#ifdef CONFIG_SYSFS
234
235#include <linux/sysdev.h>
236#include <linux/capability.h>
237
238#define SELFBALLOON_SHOW(name, format, args...) \
239 static ssize_t show_##name(struct sys_device *dev, \
240 struct sysdev_attribute *attr, \
241 char *buf) \
242 { \
243 return sprintf(buf, format, ##args); \
244 }
245
246SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled);
247
248static ssize_t store_selfballooning(struct sys_device *dev,
249 struct sysdev_attribute *attr,
250 const char *buf,
251 size_t count)
252{
253 bool was_enabled = xen_selfballooning_enabled;
254 unsigned long tmp;
255 int err;
256
257 if (!capable(CAP_SYS_ADMIN))
258 return -EPERM;
259
260 err = strict_strtoul(buf, 10, &tmp);
261 if (err || ((tmp != 0) && (tmp != 1)))
262 return -EINVAL;
263
264 xen_selfballooning_enabled = !!tmp;
265 if (!was_enabled && xen_selfballooning_enabled)
266 schedule_delayed_work(&selfballoon_worker,
267 selfballoon_interval * HZ);
268
269 return count;
270}
271
272static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR,
273 show_selfballooning, store_selfballooning);
274
275SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval);
276
277static ssize_t store_selfballoon_interval(struct sys_device *dev,
278 struct sysdev_attribute *attr,
279 const char *buf,
280 size_t count)
281{
282 unsigned long val;
283 int err;
284
285 if (!capable(CAP_SYS_ADMIN))
286 return -EPERM;
287 err = strict_strtoul(buf, 10, &val);
288 if (err || val == 0)
289 return -EINVAL;
290 selfballoon_interval = val;
291 return count;
292}
293
294static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR,
295 show_selfballoon_interval, store_selfballoon_interval);
296
297SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis);
298
299static ssize_t store_selfballoon_downhys(struct sys_device *dev,
300 struct sysdev_attribute *attr,
301 const char *buf,
302 size_t count)
303{
304 unsigned long val;
305 int err;
306
307 if (!capable(CAP_SYS_ADMIN))
308 return -EPERM;
309 err = strict_strtoul(buf, 10, &val);
310 if (err || val == 0)
311 return -EINVAL;
312 selfballoon_downhysteresis = val;
313 return count;
314}
315
316static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR,
317 show_selfballoon_downhys, store_selfballoon_downhys);
318
319
320SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis);
321
322static ssize_t store_selfballoon_uphys(struct sys_device *dev,
323 struct sysdev_attribute *attr,
324 const char *buf,
325 size_t count)
326{
327 unsigned long val;
328 int err;
329
330 if (!capable(CAP_SYS_ADMIN))
331 return -EPERM;
332 err = strict_strtoul(buf, 10, &val);
333 if (err || val == 0)
334 return -EINVAL;
335 selfballoon_uphysteresis = val;
336 return count;
337}
338
339static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
340 show_selfballoon_uphys, store_selfballoon_uphys);
341
342#ifdef CONFIG_FRONTSWAP
343SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
344
345static ssize_t store_frontswap_selfshrinking(struct sys_device *dev,
346 struct sysdev_attribute *attr,
347 const char *buf,
348 size_t count)
349{
350 bool was_enabled = frontswap_selfshrinking;
351 unsigned long tmp;
352 int err;
353
354 if (!capable(CAP_SYS_ADMIN))
355 return -EPERM;
356 err = strict_strtoul(buf, 10, &tmp);
357 if (err || ((tmp != 0) && (tmp != 1)))
358 return -EINVAL;
359 frontswap_selfshrinking = !!tmp;
360 if (!was_enabled && !xen_selfballooning_enabled &&
361 frontswap_selfshrinking)
362 schedule_delayed_work(&selfballoon_worker,
363 selfballoon_interval * HZ);
364
365 return count;
366}
367
368static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR,
369 show_frontswap_selfshrinking, store_frontswap_selfshrinking);
370
371SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia);
372
373static ssize_t store_frontswap_inertia(struct sys_device *dev,
374 struct sysdev_attribute *attr,
375 const char *buf,
376 size_t count)
377{
378 unsigned long val;
379 int err;
380
381 if (!capable(CAP_SYS_ADMIN))
382 return -EPERM;
383 err = strict_strtoul(buf, 10, &val);
384 if (err || val == 0)
385 return -EINVAL;
386 frontswap_inertia = val;
387 frontswap_inertia_counter = val;
388 return count;
389}
390
391static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR,
392 show_frontswap_inertia, store_frontswap_inertia);
393
394SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis);
395
396static ssize_t store_frontswap_hysteresis(struct sys_device *dev,
397 struct sysdev_attribute *attr,
398 const char *buf,
399 size_t count)
400{
401 unsigned long val;
402 int err;
403
404 if (!capable(CAP_SYS_ADMIN))
405 return -EPERM;
406 err = strict_strtoul(buf, 10, &val);
407 if (err || val == 0)
408 return -EINVAL;
409 frontswap_hysteresis = val;
410 return count;
411}
412
413static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR,
414 show_frontswap_hysteresis, store_frontswap_hysteresis);
415
416#endif /* CONFIG_FRONTSWAP */
417
418static struct attribute *selfballoon_attrs[] = {
419 &attr_selfballooning.attr,
420 &attr_selfballoon_interval.attr,
421 &attr_selfballoon_downhysteresis.attr,
422 &attr_selfballoon_uphysteresis.attr,
423#ifdef CONFIG_FRONTSWAP
424 &attr_frontswap_selfshrinking.attr,
425 &attr_frontswap_hysteresis.attr,
426 &attr_frontswap_inertia.attr,
427#endif
428 NULL
429};
430
431static struct attribute_group selfballoon_group = {
432 .name = "selfballoon",
433 .attrs = selfballoon_attrs
434};
435#endif
436
437int register_xen_selfballooning(struct sys_device *sysdev)
438{
439 int error = -1;
440
441#ifdef CONFIG_SYSFS
442 error = sysfs_create_group(&sysdev->kobj, &selfballoon_group);
443#endif
444 return error;
445}
446EXPORT_SYMBOL(register_xen_selfballooning);
447
448static int __init xen_selfballoon_init(void)
449{
450 bool enable = false;
451
452 if (!xen_domain())
453 return -ENODEV;
454
455 if (xen_initial_domain()) {
456 pr_info("xen/balloon: Xen selfballooning driver "
457 "disabled for domain0.\n");
458 return -ENODEV;
459 }
460
461 xen_selfballooning_enabled = tmem_enabled && use_selfballooning;
462 if (xen_selfballooning_enabled) {
463 pr_info("xen/balloon: Initializing Xen "
464 "selfballooning driver.\n");
465 enable = true;
466 }
467#ifdef CONFIG_FRONTSWAP
468 frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink;
469 if (frontswap_selfshrinking) {
470 pr_info("xen/balloon: Initializing frontswap "
471 "selfshrinking driver.\n");
472 enable = true;
473 }
474#endif
475 if (!enable)
476 return -ENODEV;
477
478 schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ);
479
480 return 0;
481}
482
483subsys_initcall(xen_selfballoon_init);
484
485MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 739769551e33..bd2f90c9ac8b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -378,26 +378,32 @@ static void xenbus_dev_release(struct device *dev)
378 kfree(to_xenbus_device(dev)); 378 kfree(to_xenbus_device(dev));
379} 379}
380 380
381static ssize_t xendev_show_nodename(struct device *dev, 381static ssize_t nodename_show(struct device *dev,
382 struct device_attribute *attr, char *buf) 382 struct device_attribute *attr, char *buf)
383{ 383{
384 return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); 384 return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
385} 385}
386static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
387 386
388static ssize_t xendev_show_devtype(struct device *dev, 387static ssize_t devtype_show(struct device *dev,
389 struct device_attribute *attr, char *buf) 388 struct device_attribute *attr, char *buf)
390{ 389{
391 return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); 390 return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
392} 391}
393static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
394 392
395static ssize_t xendev_show_modalias(struct device *dev, 393static ssize_t modalias_show(struct device *dev,
396 struct device_attribute *attr, char *buf) 394 struct device_attribute *attr, char *buf)
397{ 395{
398 return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); 396 return sprintf(buf, "%s:%s\n", dev->bus->name,
397 to_xenbus_device(dev)->devicetype);
399} 398}
400static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); 399
400struct device_attribute xenbus_dev_attrs[] = {
401 __ATTR_RO(nodename),
402 __ATTR_RO(devtype),
403 __ATTR_RO(modalias),
404 __ATTR_NULL
405};
406EXPORT_SYMBOL_GPL(xenbus_dev_attrs);
401 407
402int xenbus_probe_node(struct xen_bus_type *bus, 408int xenbus_probe_node(struct xen_bus_type *bus,
403 const char *type, 409 const char *type,
@@ -449,25 +455,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
449 if (err) 455 if (err)
450 goto fail; 456 goto fail;
451 457
452 err = device_create_file(&xendev->dev, &dev_attr_nodename);
453 if (err)
454 goto fail_unregister;
455
456 err = device_create_file(&xendev->dev, &dev_attr_devtype);
457 if (err)
458 goto fail_remove_nodename;
459
460 err = device_create_file(&xendev->dev, &dev_attr_modalias);
461 if (err)
462 goto fail_remove_devtype;
463
464 return 0; 458 return 0;
465fail_remove_devtype:
466 device_remove_file(&xendev->dev, &dev_attr_devtype);
467fail_remove_nodename:
468 device_remove_file(&xendev->dev, &dev_attr_nodename);
469fail_unregister:
470 device_unregister(&xendev->dev);
471fail: 459fail:
472 kfree(xendev); 460 kfree(xendev);
473 return err; 461 return err;
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index 888b9900ca08..b814935378c7 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -48,6 +48,8 @@ struct xen_bus_type
48 struct bus_type bus; 48 struct bus_type bus;
49}; 49};
50 50
51extern struct device_attribute xenbus_dev_attrs[];
52
51extern int xenbus_match(struct device *_dev, struct device_driver *_drv); 53extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
52extern int xenbus_dev_probe(struct device *_dev); 54extern int xenbus_dev_probe(struct device *_dev);
53extern int xenbus_dev_remove(struct device *_dev); 55extern int xenbus_dev_remove(struct device *_dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 6cf467bf63ec..60adf919d78d 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -107,6 +107,9 @@ static int xenbus_uevent_backend(struct device *dev,
107 if (xdev == NULL) 107 if (xdev == NULL)
108 return -ENODEV; 108 return -ENODEV;
109 109
110 if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
111 return -ENOMEM;
112
110 /* stuff we want to pass to /sbin/hotplug */ 113 /* stuff we want to pass to /sbin/hotplug */
111 if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) 114 if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype))
112 return -ENOMEM; 115 return -ENOMEM;
@@ -183,10 +186,6 @@ static void frontend_changed(struct xenbus_watch *watch,
183 xenbus_otherend_changed(watch, vec, len, 0); 186 xenbus_otherend_changed(watch, vec, len, 0);
184} 187}
185 188
186static struct device_attribute xenbus_backend_dev_attrs[] = {
187 __ATTR_NULL
188};
189
190static struct xen_bus_type xenbus_backend = { 189static struct xen_bus_type xenbus_backend = {
191 .root = "backend", 190 .root = "backend",
192 .levels = 3, /* backend/type/<frontend>/<id> */ 191 .levels = 3, /* backend/type/<frontend>/<id> */
@@ -200,7 +199,7 @@ static struct xen_bus_type xenbus_backend = {
200 .probe = xenbus_dev_probe, 199 .probe = xenbus_dev_probe,
201 .remove = xenbus_dev_remove, 200 .remove = xenbus_dev_remove,
202 .shutdown = xenbus_dev_shutdown, 201 .shutdown = xenbus_dev_shutdown,
203 .dev_attrs = xenbus_backend_dev_attrs, 202 .dev_attrs = xenbus_dev_attrs,
204 }, 203 },
205}; 204};
206 205
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index b6a2690c9d49..ed2ba474a560 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -81,10 +81,6 @@ static void backend_changed(struct xenbus_watch *watch,
81 xenbus_otherend_changed(watch, vec, len, 1); 81 xenbus_otherend_changed(watch, vec, len, 1);
82} 82}
83 83
84static struct device_attribute xenbus_frontend_dev_attrs[] = {
85 __ATTR_NULL
86};
87
88static const struct dev_pm_ops xenbus_pm_ops = { 84static const struct dev_pm_ops xenbus_pm_ops = {
89 .suspend = xenbus_dev_suspend, 85 .suspend = xenbus_dev_suspend,
90 .resume = xenbus_dev_resume, 86 .resume = xenbus_dev_resume,
@@ -106,7 +102,7 @@ static struct xen_bus_type xenbus_frontend = {
106 .probe = xenbus_dev_probe, 102 .probe = xenbus_dev_probe,
107 .remove = xenbus_dev_remove, 103 .remove = xenbus_dev_remove,
108 .shutdown = xenbus_dev_shutdown, 104 .shutdown = xenbus_dev_shutdown,
109 .dev_attrs = xenbus_frontend_dev_attrs, 105 .dev_attrs = xenbus_dev_attrs,
110 106
111 .pm = &xenbus_pm_ops, 107 .pm = &xenbus_pm_ops,
112 }, 108 },