aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig50
-rw-r--r--drivers/xen/Makefile24
-rw-r--r--drivers/xen/acpi.c62
-rw-r--r--drivers/xen/balloon.c84
-rw-r--r--drivers/xen/biomerge.c2
-rw-r--r--drivers/xen/cpu_hotplug.c3
-rw-r--r--drivers/xen/dbgp.c50
-rw-r--r--drivers/xen/events.c207
-rw-r--r--drivers/xen/evtchn.c2
-rw-r--r--drivers/xen/fallback.c80
-rw-r--r--drivers/xen/gntalloc.c121
-rw-r--r--drivers/xen/gntdev.c119
-rw-r--r--drivers/xen/grant-table.c714
-rw-r--r--drivers/xen/manage.c8
-rw-r--r--drivers/xen/mcelog.c414
-rw-r--r--drivers/xen/pci.c107
-rw-r--r--drivers/xen/pcpu.c371
-rw-r--r--drivers/xen/platform-pci.c14
-rw-r--r--drivers/xen/privcmd.c575
-rw-r--r--drivers/xen/privcmd.h3
-rw-r--r--drivers/xen/swiotlb-xen.c189
-rw-r--r--drivers/xen/sys-hypervisor.c23
-rw-r--r--drivers/xen/tmem.c40
-rw-r--r--drivers/xen/xen-acpi-pad.c182
-rw-r--r--drivers/xen/xen-acpi-processor.c568
-rw-r--r--drivers/xen/xen-balloon.c105
-rw-r--r--drivers/xen/xen-pciback/conf_space.c10
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c5
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.c3
-rw-r--r--drivers/xen/xen-pciback/passthrough.c34
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c285
-rw-r--r--drivers/xen/xen-pciback/pciback.h33
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c3
-rw-r--r--drivers/xen/xen-pciback/vpci.c49
-rw-r--r--drivers/xen/xen-pciback/xenbus.c51
-rw-r--r--drivers/xen/xen-selfballoon.c171
-rw-r--r--drivers/xen/xenbus/Makefile2
-rw-r--r--drivers/xen/xenbus/xenbus_client.c215
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c12
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h5
-rw-r--r--drivers/xen/xenbus/xenbus_dev_backend.c141
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c629
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c153
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h9
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c11
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c206
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c64
-rw-r--r--drivers/xen/xenfs/Makefile2
-rw-r--r--drivers/xen/xenfs/super.c9
-rw-r--r--drivers/xen/xenfs/xenfs.h2
50 files changed, 757 insertions, 5464 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index cabfa97f467..5f7ff8e2fc1 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
3 3
4config XEN_BALLOON 4config XEN_BALLOON
5 bool "Xen memory balloon driver" 5 bool "Xen memory balloon driver"
6 depends on !ARM
7 default y 6 default y
8 help 7 help
9 The balloon driver allows the Xen domain to request more memory from 8 The balloon driver allows the Xen domain to request more memory from
@@ -72,7 +71,7 @@ config XEN_DEV_EVTCHN
72 tristate "Xen /dev/xen/evtchn device" 71 tristate "Xen /dev/xen/evtchn device"
73 default y 72 default y
74 help 73 help
75 The evtchn driver allows a userspace process to trigger event 74 The evtchn driver allows a userspace process to triger event
76 channels and to receive notification of an event channel 75 channels and to receive notification of an event channel
77 firing. 76 firing.
78 If in doubt, say yes. 77 If in doubt, say yes.
@@ -87,7 +86,6 @@ config XEN_BACKEND
87 86
88config XENFS 87config XENFS
89 tristate "Xen filesystem" 88 tristate "Xen filesystem"
90 select XEN_PRIVCMD
91 default y 89 default y
92 help 90 help
93 The xen filesystem provides a way for domains to share 91 The xen filesystem provides a way for domains to share
@@ -139,6 +137,16 @@ config XEN_GRANT_DEV_ALLOC
139 to other domains. This can be used to implement frontend drivers 137 to other domains. This can be used to implement frontend drivers
140 or as part of an inter-domain shared memory channel. 138 or as part of an inter-domain shared memory channel.
141 139
140config XEN_PLATFORM_PCI
141 tristate "xen platform pci device driver"
142 depends on XEN_PVHVM && PCI
143 default m
144 help
145 Driver for the Xen PCI Platform device: it is responsible for
146 initializing xenbus and grant_table when running in a Xen HVM
147 domain. As a consequence this driver is required to run any Xen PV
148 frontend on Xen HVM.
149
142config SWIOTLB_XEN 150config SWIOTLB_XEN
143 def_bool y 151 def_bool y
144 depends on PCI 152 depends on PCI
@@ -146,7 +154,6 @@ config SWIOTLB_XEN
146 154
147config XEN_TMEM 155config XEN_TMEM
148 bool 156 bool
149 depends on !ARM
150 default y if (CLEANCACHE || FRONTSWAP) 157 default y if (CLEANCACHE || FRONTSWAP)
151 help 158 help
152 Shim to interface in-kernel Transcendent Memory hooks 159 Shim to interface in-kernel Transcendent Memory hooks
@@ -174,39 +181,4 @@ config XEN_PCIDEV_BACKEND
174 xen-pciback.hide=(03:00.0)(04:00.0) 181 xen-pciback.hide=(03:00.0)(04:00.0)
175 182
176 If in doubt, say m. 183 If in doubt, say m.
177
178config XEN_PRIVCMD
179 tristate
180 depends on XEN
181 default m
182
183config XEN_ACPI_PROCESSOR
184 tristate "Xen ACPI processor"
185 depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
186 default m
187 help
188 This ACPI processor uploads Power Management information to the Xen
189 hypervisor.
190
191 To do that the driver parses the Power Management data and uploads
192 said information to the Xen hypervisor. Then the Xen hypervisor can
193 select the proper Cx and Pxx states. It also registers itslef as the
194 SMM so that other drivers (such as ACPI cpufreq scaling driver) will
195 not load.
196
197 To compile this driver as a module, choose M here: the module will be
198 called xen_acpi_processor If you do not know what to choose, select
199 M here. If the CPUFREQ drivers are built in, select Y here.
200
201config XEN_MCE_LOG
202 bool "Xen platform mcelog"
203 depends on XEN_DOM0 && X86_64 && X86_MCE
204 default n
205 help
206 Allow kernel fetching MCE error from Xen platform and
207 converting it into Linux mcelog format for mcelog tools
208
209config XEN_HAVE_PVMMU
210 bool
211
212endmenu 184endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index fb213cf81a7..72bbb27d7a6 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,21 +1,11 @@
1ifneq ($(CONFIG_ARM),y) 1obj-y += grant-table.o features.o events.o manage.o balloon.o
2obj-y += manage.o
3obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
4endif
5obj-$(CONFIG_X86) += fallback.o
6obj-y += grant-table.o features.o events.o balloon.o
7obj-y += xenbus/ 2obj-y += xenbus/
8 3
9nostackp := $(call cc-option, -fno-stack-protector) 4nostackp := $(call cc-option, -fno-stack-protector)
10CFLAGS_features.o := $(nostackp) 5CFLAGS_features.o := $(nostackp)
11 6
12dom0-$(CONFIG_PCI) += pci.o
13dom0-$(CONFIG_USB_SUPPORT) += dbgp.o
14dom0-$(CONFIG_ACPI) += acpi.o $(xen-pad-y)
15xen-pad-$(CONFIG_X86) += xen-acpi-pad.o
16dom0-$(CONFIG_X86) += pcpu.o
17obj-$(CONFIG_XEN_DOM0) += $(dom0-y)
18obj-$(CONFIG_BLOCK) += biomerge.o 7obj-$(CONFIG_BLOCK) += biomerge.o
8obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
19obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 9obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
20obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o 10obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
21obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o 11obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o
@@ -24,14 +14,14 @@ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
24obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o 14obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
25obj-$(CONFIG_XENFS) += xenfs/ 15obj-$(CONFIG_XENFS) += xenfs/
26obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o 16obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
27obj-$(CONFIG_XEN_PVHVM) += platform-pci.o 17obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
28obj-$(CONFIG_XEN_TMEM) += tmem.o 18obj-$(CONFIG_XEN_TMEM) += tmem.o
29obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o 19obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
30obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o 20obj-$(CONFIG_XEN_DOM0) += pci.o
31obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ 21obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
32obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o 22
33obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
34xen-evtchn-y := evtchn.o 23xen-evtchn-y := evtchn.o
35xen-gntdev-y := gntdev.o 24xen-gntdev-y := gntdev.o
36xen-gntalloc-y := gntalloc.o 25xen-gntalloc-y := gntalloc.o
37xen-privcmd-y := privcmd.o 26
27xen-platform-pci-y := platform-pci.o
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
deleted file mode 100644
index 119d42a2bf5..00000000000
--- a/drivers/xen/acpi.c
+++ /dev/null
@@ -1,62 +0,0 @@
1/******************************************************************************
2 * acpi.c
3 * acpi file for domain 0 kernel
4 *
5 * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
6 * Copyright (c) 2011 Yu Ke ke.yu@intel.com
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#include <xen/acpi.h>
34#include <xen/interface/platform.h>
35#include <asm/xen/hypercall.h>
36#include <asm/xen/hypervisor.h>
37
38int xen_acpi_notify_hypervisor_state(u8 sleep_state,
39 u32 pm1a_cnt, u32 pm1b_cnt)
40{
41 struct xen_platform_op op = {
42 .cmd = XENPF_enter_acpi_sleep,
43 .interface_version = XENPF_INTERFACE_VERSION,
44 .u = {
45 .enter_acpi_sleep = {
46 .pm1a_cnt_val = (u16)pm1a_cnt,
47 .pm1b_cnt_val = (u16)pm1b_cnt,
48 .sleep_state = sleep_state,
49 },
50 },
51 };
52
53 if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) {
54 WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!"
55 "Email xen-devel@lists.xensource.com Thank you.\n", \
56 pm1a_cnt, pm1b_cnt);
57 return -1;
58 }
59
60 HYPERVISOR_dom0_op(&op);
61 return 1;
62}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index a56776dbe09..5dfd8f8ff07 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -39,7 +39,6 @@
39#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/sched.h> 40#include <linux/sched.h>
41#include <linux/errno.h> 41#include <linux/errno.h>
42#include <linux/module.h>
43#include <linux/mm.h> 42#include <linux/mm.h>
44#include <linux/bootmem.h> 43#include <linux/bootmem.h>
45#include <linux/pagemap.h> 44#include <linux/pagemap.h>
@@ -55,6 +54,7 @@
55#include <asm/pgalloc.h> 54#include <asm/pgalloc.h>
56#include <asm/pgtable.h> 55#include <asm/pgtable.h>
57#include <asm/tlb.h> 56#include <asm/tlb.h>
57#include <asm/e820.h>
58 58
59#include <asm/xen/hypervisor.h> 59#include <asm/xen/hypervisor.h>
60#include <asm/xen/hypercall.h> 60#include <asm/xen/hypercall.h>
@@ -87,14 +87,14 @@ struct balloon_stats balloon_stats;
87EXPORT_SYMBOL_GPL(balloon_stats); 87EXPORT_SYMBOL_GPL(balloon_stats);
88 88
89/* We increase/decrease in batches which fit in a page */ 89/* We increase/decrease in batches which fit in a page */
90static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; 90static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
91 91
92#ifdef CONFIG_HIGHMEM 92#ifdef CONFIG_HIGHMEM
93#define inc_totalhigh_pages() (totalhigh_pages++) 93#define inc_totalhigh_pages() (totalhigh_pages++)
94#define dec_totalhigh_pages() (totalhigh_pages--) 94#define dec_totalhigh_pages() (totalhigh_pages--)
95#else 95#else
96#define inc_totalhigh_pages() do {} while (0) 96#define inc_totalhigh_pages() do {} while(0)
97#define dec_totalhigh_pages() do {} while (0) 97#define dec_totalhigh_pages() do {} while(0)
98#endif 98#endif
99 99
100/* List of ballooned pages, threaded through the mem_map array. */ 100/* List of ballooned pages, threaded through the mem_map array. */
@@ -154,7 +154,8 @@ static struct page *balloon_retrieve(bool prefer_highmem)
154 if (PageHighMem(page)) { 154 if (PageHighMem(page)) {
155 balloon_stats.balloon_high--; 155 balloon_stats.balloon_high--;
156 inc_totalhigh_pages(); 156 inc_totalhigh_pages();
157 } else 157 }
158 else
158 balloon_stats.balloon_low--; 159 balloon_stats.balloon_low--;
159 160
160 totalram_pages++; 161 totalram_pages++;
@@ -359,7 +360,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
359 360
360 set_phys_to_machine(pfn, frame_list[i]); 361 set_phys_to_machine(pfn, frame_list[i]);
361 362
362#ifdef CONFIG_XEN_HAVE_PVMMU
363 /* Link back into the page tables if not highmem. */ 363 /* Link back into the page tables if not highmem. */
364 if (xen_pv_domain() && !PageHighMem(page)) { 364 if (xen_pv_domain() && !PageHighMem(page)) {
365 int ret; 365 int ret;
@@ -369,7 +369,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
369 0); 369 0);
370 BUG_ON(ret); 370 BUG_ON(ret);
371 } 371 }
372#endif
373 372
374 /* Relinquish the page back to the allocator. */ 373 /* Relinquish the page back to the allocator. */
375 ClearPageReserved(page); 374 ClearPageReserved(page);
@@ -418,14 +417,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
418 417
419 scrub_page(page); 418 scrub_page(page);
420 419
421#ifdef CONFIG_XEN_HAVE_PVMMU
422 if (xen_pv_domain() && !PageHighMem(page)) { 420 if (xen_pv_domain() && !PageHighMem(page)) {
423 ret = HYPERVISOR_update_va_mapping( 421 ret = HYPERVISOR_update_va_mapping(
424 (unsigned long)__va(pfn << PAGE_SHIFT), 422 (unsigned long)__va(pfn << PAGE_SHIFT),
425 __pte_ma(0), 0); 423 __pte_ma(0), 0);
426 BUG_ON(ret); 424 BUG_ON(ret);
427 } 425 }
428#endif 426
429 } 427 }
430 428
431 /* Ensure that ballooned highmem pages don't have kmaps. */ 429 /* Ensure that ballooned highmem pages don't have kmaps. */
@@ -503,24 +501,20 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
503 * alloc_xenballooned_pages - get pages that have been ballooned out 501 * alloc_xenballooned_pages - get pages that have been ballooned out
504 * @nr_pages: Number of pages to get 502 * @nr_pages: Number of pages to get
505 * @pages: pages returned 503 * @pages: pages returned
506 * @highmem: allow highmem pages
507 * @return 0 on success, error otherwise 504 * @return 0 on success, error otherwise
508 */ 505 */
509int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) 506int alloc_xenballooned_pages(int nr_pages, struct page** pages)
510{ 507{
511 int pgno = 0; 508 int pgno = 0;
512 struct page *page; 509 struct page* page;
513 mutex_lock(&balloon_mutex); 510 mutex_lock(&balloon_mutex);
514 while (pgno < nr_pages) { 511 while (pgno < nr_pages) {
515 page = balloon_retrieve(highmem); 512 page = balloon_retrieve(true);
516 if (page && (highmem || !PageHighMem(page))) { 513 if (page) {
517 pages[pgno++] = page; 514 pages[pgno++] = page;
518 } else { 515 } else {
519 enum bp_state st; 516 enum bp_state st;
520 if (page) 517 st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
521 balloon_append(page);
522 st = decrease_reservation(nr_pages - pgno,
523 highmem ? GFP_HIGHUSER : GFP_USER);
524 if (st != BP_DONE) 518 if (st != BP_DONE)
525 goto out_undo; 519 goto out_undo;
526 } 520 }
@@ -542,7 +536,7 @@ EXPORT_SYMBOL(alloc_xenballooned_pages);
542 * @nr_pages: Number of pages 536 * @nr_pages: Number of pages
543 * @pages: pages to return 537 * @pages: pages to return
544 */ 538 */
545void free_xenballooned_pages(int nr_pages, struct page **pages) 539void free_xenballooned_pages(int nr_pages, struct page** pages)
546{ 540{
547 int i; 541 int i;
548 542
@@ -561,40 +555,17 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
561} 555}
562EXPORT_SYMBOL(free_xenballooned_pages); 556EXPORT_SYMBOL(free_xenballooned_pages);
563 557
564static void __init balloon_add_region(unsigned long start_pfn, 558static int __init balloon_init(void)
565 unsigned long pages)
566{ 559{
567 unsigned long pfn, extra_pfn_end; 560 unsigned long pfn, extra_pfn_end;
568 struct page *page; 561 struct page *page;
569 562
570 /*
571 * If the amount of usable memory has been limited (e.g., with
572 * the 'mem' command line parameter), don't add pages beyond
573 * this limit.
574 */
575 extra_pfn_end = min(max_pfn, start_pfn + pages);
576
577 for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
578 page = pfn_to_page(pfn);
579 /* totalram_pages and totalhigh_pages do not
580 include the boot-time balloon extension, so
581 don't subtract from it. */
582 __balloon_append(page);
583 }
584}
585
586static int __init balloon_init(void)
587{
588 int i;
589
590 if (!xen_domain()) 563 if (!xen_domain())
591 return -ENODEV; 564 return -ENODEV;
592 565
593 pr_info("xen/balloon: Initialising balloon driver.\n"); 566 pr_info("xen/balloon: Initialising balloon driver.\n");
594 567
595 balloon_stats.current_pages = xen_pv_domain() 568 balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn;
596 ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
597 : max_pfn;
598 balloon_stats.target_pages = balloon_stats.current_pages; 569 balloon_stats.target_pages = balloon_stats.current_pages;
599 balloon_stats.balloon_low = 0; 570 balloon_stats.balloon_low = 0;
600 balloon_stats.balloon_high = 0; 571 balloon_stats.balloon_high = 0;
@@ -613,13 +584,24 @@ static int __init balloon_init(void)
613#endif 584#endif
614 585
615 /* 586 /*
616 * Initialize the balloon with pages from the extra memory 587 * Initialise the balloon with excess memory space. We need
617 * regions (see arch/x86/xen/setup.c). 588 * to make sure we don't add memory which doesn't exist or
589 * logically exist. The E820 map can be trimmed to be smaller
590 * than the amount of physical memory due to the mem= command
591 * line parameter. And if this is a 32-bit non-HIGHMEM kernel
592 * on a system with memory which requires highmem to access,
593 * don't try to use it.
618 */ 594 */
619 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) 595 extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
620 if (xen_extra_mem[i].size) 596 (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
621 balloon_add_region(PFN_UP(xen_extra_mem[i].start), 597 for (pfn = PFN_UP(xen_extra_mem_start);
622 PFN_DOWN(xen_extra_mem[i].size)); 598 pfn < extra_pfn_end;
599 pfn++) {
600 page = pfn_to_page(pfn);
601 /* totalram_pages and totalhigh_pages do not include the boot-time
602 balloon extension, so don't subtract from it. */
603 __balloon_append(page);
604 }
623 605
624 return 0; 606 return 0;
625} 607}
diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c
index 0edb91c0de6..ba6eda4b514 100644
--- a/drivers/xen/biomerge.c
+++ b/drivers/xen/biomerge.c
@@ -1,6 +1,5 @@
1#include <linux/bio.h> 1#include <linux/bio.h>
2#include <linux/io.h> 2#include <linux/io.h>
3#include <linux/export.h>
4#include <xen/page.h> 3#include <xen/page.h>
5 4
6bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, 5bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
@@ -12,4 +11,3 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
12 return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && 11 return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&
13 ((mfn1 == mfn2) || ((mfn1+1) == mfn2)); 12 ((mfn1 == mfn2) || ((mfn1+1) == mfn2));
14} 13}
15EXPORT_SYMBOL(xen_biovec_phys_mergeable);
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 4dcfced107f..14e2d995e95 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -30,8 +30,7 @@ static int vcpu_online(unsigned int cpu)
30 sprintf(dir, "cpu/%u", cpu); 30 sprintf(dir, "cpu/%u", cpu);
31 err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); 31 err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
32 if (err != 1) { 32 if (err != 1) {
33 if (!xen_initial_domain()) 33 printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
34 printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
35 return err; 34 return err;
36 } 35 }
37 36
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
deleted file mode 100644
index f3ccc80a455..00000000000
--- a/drivers/xen/dbgp.c
+++ /dev/null
@@ -1,50 +0,0 @@
1#include <linux/pci.h>
2#include <linux/usb.h>
3#include <linux/usb/ehci_def.h>
4#include <linux/usb/hcd.h>
5#include <asm/xen/hypercall.h>
6#include <xen/interface/physdev.h>
7#include <xen/xen.h>
8
9static int xen_dbgp_op(struct usb_hcd *hcd, int op)
10{
11#ifdef CONFIG_PCI
12 const struct device *ctrlr = hcd_to_bus(hcd)->controller;
13#endif
14 struct physdev_dbgp_op dbgp;
15
16 if (!xen_initial_domain())
17 return 0;
18
19 dbgp.op = op;
20
21#ifdef CONFIG_PCI
22 if (ctrlr->bus == &pci_bus_type) {
23 const struct pci_dev *pdev = to_pci_dev(ctrlr);
24
25 dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
26 dbgp.u.pci.bus = pdev->bus->number;
27 dbgp.u.pci.devfn = pdev->devfn;
28 dbgp.bus = PHYSDEVOP_DBGP_BUS_PCI;
29 } else
30#endif
31 dbgp.bus = PHYSDEVOP_DBGP_BUS_UNKNOWN;
32
33 return HYPERVISOR_physdev_op(PHYSDEVOP_dbgp_op, &dbgp);
34}
35
36int xen_dbgp_reset_prep(struct usb_hcd *hcd)
37{
38 return xen_dbgp_op(hcd, PHYSDEVOP_DBGP_RESET_PREPARE);
39}
40
41int xen_dbgp_external_startup(struct usb_hcd *hcd)
42{
43 return xen_dbgp_op(hcd, PHYSDEVOP_DBGP_RESET_DONE);
44}
45
46#ifndef CONFIG_EARLY_PRINTK_DBGP
47#include <linux/export.h>
48EXPORT_SYMBOL_GPL(xen_dbgp_reset_prep);
49EXPORT_SYMBOL_GPL(xen_dbgp_external_startup);
50#endif
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0be4df39e95..44490de5aec 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -31,16 +31,13 @@
31#include <linux/irqnr.h> 31#include <linux/irqnr.h>
32#include <linux/pci.h> 32#include <linux/pci.h>
33 33
34#ifdef CONFIG_X86
35#include <asm/desc.h> 34#include <asm/desc.h>
36#include <asm/ptrace.h> 35#include <asm/ptrace.h>
37#include <asm/irq.h> 36#include <asm/irq.h>
38#include <asm/idle.h> 37#include <asm/idle.h>
39#include <asm/io_apic.h> 38#include <asm/io_apic.h>
40#include <asm/xen/page.h>
41#include <asm/xen/pci.h>
42#endif
43#include <asm/sync_bitops.h> 39#include <asm/sync_bitops.h>
40#include <asm/xen/pci.h>
44#include <asm/xen/hypercall.h> 41#include <asm/xen/hypercall.h>
45#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
46 43
@@ -52,9 +49,6 @@
52#include <xen/interface/event_channel.h> 49#include <xen/interface/event_channel.h>
53#include <xen/interface/hvm/hvm_op.h> 50#include <xen/interface/hvm/hvm_op.h>
54#include <xen/interface/hvm/params.h> 51#include <xen/interface/hvm/params.h>
55#include <xen/interface/physdev.h>
56#include <xen/interface/sched.h>
57#include <asm/hw_irq.h>
58 52
59/* 53/*
60 * This lock protects updates to the following mapping and reference-count 54 * This lock protects updates to the following mapping and reference-count
@@ -91,9 +85,9 @@ enum xen_irq_type {
91 * IPI - IPI vector 85 * IPI - IPI vector
92 * EVTCHN - 86 * EVTCHN -
93 */ 87 */
94struct irq_info { 88struct irq_info
89{
95 struct list_head list; 90 struct list_head list;
96 int refcnt;
97 enum xen_irq_type type; /* type */ 91 enum xen_irq_type type; /* type */
98 unsigned irq; 92 unsigned irq;
99 unsigned short evtchn; /* event channel */ 93 unsigned short evtchn; /* event channel */
@@ -115,10 +109,6 @@ struct irq_info {
115#define PIRQ_SHAREABLE (1 << 1) 109#define PIRQ_SHAREABLE (1 << 1)
116 110
117static int *evtchn_to_irq; 111static int *evtchn_to_irq;
118#ifdef CONFIG_X86
119static unsigned long *pirq_eoi_map;
120#endif
121static bool (*pirq_needs_eoi)(unsigned irq);
122 112
123static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG], 113static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG],
124 cpu_evtchn_mask); 114 cpu_evtchn_mask);
@@ -279,16 +269,10 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
279 return ret; 269 return ret;
280} 270}
281 271
282#ifdef CONFIG_X86 272static bool pirq_needs_eoi(unsigned irq)
283static bool pirq_check_eoi_map(unsigned irq)
284{
285 return test_bit(pirq_from_irq(irq), pirq_eoi_map);
286}
287#endif
288
289static bool pirq_needs_eoi_flag(unsigned irq)
290{ 273{
291 struct irq_info *info = info_for_irq(irq); 274 struct irq_info *info = info_for_irq(irq);
275
292 BUG_ON(info->type != IRQT_PIRQ); 276 BUG_ON(info->type != IRQT_PIRQ);
293 277
294 return info->u.pirq.flags & PIRQ_NEEDS_EOI; 278 return info->u.pirq.flags & PIRQ_NEEDS_EOI;
@@ -298,9 +282,9 @@ static inline unsigned long active_evtchns(unsigned int cpu,
298 struct shared_info *sh, 282 struct shared_info *sh,
299 unsigned int idx) 283 unsigned int idx)
300{ 284{
301 return sh->evtchn_pending[idx] & 285 return (sh->evtchn_pending[idx] &
302 per_cpu(cpu_evtchn_mask, cpu)[idx] & 286 per_cpu(cpu_evtchn_mask, cpu)[idx] &
303 ~sh->evtchn_mask[idx]; 287 ~sh->evtchn_mask[idx]);
304} 288}
305 289
306static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) 290static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
@@ -382,22 +366,11 @@ static void unmask_evtchn(int port)
382{ 366{
383 struct shared_info *s = HYPERVISOR_shared_info; 367 struct shared_info *s = HYPERVISOR_shared_info;
384 unsigned int cpu = get_cpu(); 368 unsigned int cpu = get_cpu();
385 int do_hypercall = 0, evtchn_pending = 0;
386 369
387 BUG_ON(!irqs_disabled()); 370 BUG_ON(!irqs_disabled());
388 371
389 if (unlikely((cpu != cpu_from_evtchn(port)))) 372 /* Slow path (hypercall) if this is a non-local port. */
390 do_hypercall = 1; 373 if (unlikely(cpu != cpu_from_evtchn(port))) {
391 else
392 evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]);
393
394 if (unlikely(evtchn_pending && xen_hvm_domain()))
395 do_hypercall = 1;
396
397 /* Slow path (hypercall) if this is a non-local port or if this is
398 * an hvm domain and an event is pending (hvm domains don't have
399 * their own implementation of irq_enable). */
400 if (do_hypercall) {
401 struct evtchn_unmask unmask = { .port = port }; 374 struct evtchn_unmask unmask = { .port = port };
402 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); 375 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
403 } else { 376 } else {
@@ -410,7 +383,7 @@ static void unmask_evtchn(int port)
410 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose 383 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
411 * the interrupt edge' if the channel is masked. 384 * the interrupt edge' if the channel is masked.
412 */ 385 */
413 if (evtchn_pending && 386 if (sync_test_bit(port, &s->evtchn_pending[0]) &&
414 !sync_test_and_set_bit(port / BITS_PER_LONG, 387 !sync_test_and_set_bit(port / BITS_PER_LONG,
415 &vcpu_info->evtchn_pending_sel)) 388 &vcpu_info->evtchn_pending_sel))
416 vcpu_info->evtchn_upcall_pending = 1; 389 vcpu_info->evtchn_upcall_pending = 1;
@@ -434,7 +407,6 @@ static void xen_irq_init(unsigned irq)
434 panic("Unable to allocate metadata for IRQ%d\n", irq); 407 panic("Unable to allocate metadata for IRQ%d\n", irq);
435 408
436 info->type = IRQT_UNBOUND; 409 info->type = IRQT_UNBOUND;
437 info->refcnt = -1;
438 410
439 irq_set_handler_data(irq, info); 411 irq_set_handler_data(irq, info);
440 412
@@ -460,8 +432,7 @@ static int __must_check xen_allocate_irq_dynamic(void)
460 432
461 irq = irq_alloc_desc_from(first, -1); 433 irq = irq_alloc_desc_from(first, -1);
462 434
463 if (irq >= 0) 435 xen_irq_init(irq);
464 xen_irq_init(irq);
465 436
466 return irq; 437 return irq;
467} 438}
@@ -498,8 +469,6 @@ static void xen_free_irq(unsigned irq)
498 469
499 irq_set_handler_data(irq, NULL); 470 irq_set_handler_data(irq, NULL);
500 471
501 WARN_ON(info->refcnt > 0);
502
503 kfree(info); 472 kfree(info);
504 473
505 /* Legacy IRQ descriptors are managed by the arch. */ 474 /* Legacy IRQ descriptors are managed by the arch. */
@@ -631,7 +600,7 @@ static void disable_pirq(struct irq_data *data)
631 disable_dynirq(data); 600 disable_dynirq(data);
632} 601}
633 602
634int xen_irq_from_gsi(unsigned gsi) 603static int find_irq_by_gsi(unsigned gsi)
635{ 604{
636 struct irq_info *info; 605 struct irq_info *info;
637 606
@@ -645,7 +614,6 @@ int xen_irq_from_gsi(unsigned gsi)
645 614
646 return -1; 615 return -1;
647} 616}
648EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
649 617
650/* 618/*
651 * Do not make any assumptions regarding the relationship between the 619 * Do not make any assumptions regarding the relationship between the
@@ -665,11 +633,11 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
665 633
666 mutex_lock(&irq_mapping_update_lock); 634 mutex_lock(&irq_mapping_update_lock);
667 635
668 irq = xen_irq_from_gsi(gsi); 636 irq = find_irq_by_gsi(gsi);
669 if (irq != -1) { 637 if (irq != -1) {
670 printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", 638 printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
671 irq, gsi); 639 irq, gsi);
672 goto out; 640 goto out; /* XXX need refcount? */
673 } 641 }
674 642
675 irq = xen_allocate_irq_gsi(gsi); 643 irq = xen_allocate_irq_gsi(gsi);
@@ -745,7 +713,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
745 mutex_lock(&irq_mapping_update_lock); 713 mutex_lock(&irq_mapping_update_lock);
746 714
747 irq = xen_allocate_irq_dynamic(); 715 irq = xen_allocate_irq_dynamic();
748 if (irq < 0) 716 if (irq == -1)
749 goto out; 717 goto out;
750 718
751 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, 719 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
@@ -761,7 +729,7 @@ out:
761error_irq: 729error_irq:
762 mutex_unlock(&irq_mapping_update_lock); 730 mutex_unlock(&irq_mapping_update_lock);
763 xen_free_irq(irq); 731 xen_free_irq(irq);
764 return ret; 732 return -1;
765} 733}
766#endif 734#endif
767 735
@@ -811,7 +779,7 @@ int xen_irq_from_pirq(unsigned pirq)
811 mutex_lock(&irq_mapping_update_lock); 779 mutex_lock(&irq_mapping_update_lock);
812 780
813 list_for_each_entry(info, &xen_irq_list_head, list) { 781 list_for_each_entry(info, &xen_irq_list_head, list) {
814 if (info->type != IRQT_PIRQ) 782 if (info == NULL || info->type != IRQT_PIRQ)
815 continue; 783 continue;
816 irq = info->irq; 784 irq = info->irq;
817 if (info->u.pirq.pirq == pirq) 785 if (info->u.pirq.pirq == pirq)
@@ -847,11 +815,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
847 handle_edge_irq, "event"); 815 handle_edge_irq, "event");
848 816
849 xen_irq_info_evtchn_init(irq, evtchn); 817 xen_irq_info_evtchn_init(irq, evtchn);
850 } else {
851 struct irq_info *info = info_for_irq(irq);
852 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
853 } 818 }
854 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
855 819
856out: 820out:
857 mutex_unlock(&irq_mapping_update_lock); 821 mutex_unlock(&irq_mapping_update_lock);
@@ -886,9 +850,6 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
886 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); 850 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
887 851
888 bind_evtchn_to_cpu(evtchn, cpu); 852 bind_evtchn_to_cpu(evtchn, cpu);
889 } else {
890 struct irq_info *info = info_for_irq(irq);
891 WARN_ON(info == NULL || info->type != IRQT_IPI);
892 } 853 }
893 854
894 out: 855 out:
@@ -911,32 +872,11 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
911 return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); 872 return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
912} 873}
913 874
914static int find_virq(unsigned int virq, unsigned int cpu)
915{
916 struct evtchn_status status;
917 int port, rc = -ENOENT;
918
919 memset(&status, 0, sizeof(status));
920 for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
921 status.dom = DOMID_SELF;
922 status.port = port;
923 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
924 if (rc < 0)
925 continue;
926 if (status.status != EVTCHNSTAT_virq)
927 continue;
928 if (status.u.virq == virq && status.vcpu == cpu) {
929 rc = port;
930 break;
931 }
932 }
933 return rc;
934}
935 875
936int bind_virq_to_irq(unsigned int virq, unsigned int cpu) 876int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
937{ 877{
938 struct evtchn_bind_virq bind_virq; 878 struct evtchn_bind_virq bind_virq;
939 int evtchn, irq, ret; 879 int evtchn, irq;
940 880
941 mutex_lock(&irq_mapping_update_lock); 881 mutex_lock(&irq_mapping_update_lock);
942 882
@@ -952,23 +892,14 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
952 892
953 bind_virq.virq = virq; 893 bind_virq.virq = virq;
954 bind_virq.vcpu = cpu; 894 bind_virq.vcpu = cpu;
955 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 895 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
956 &bind_virq); 896 &bind_virq) != 0)
957 if (ret == 0) 897 BUG();
958 evtchn = bind_virq.port; 898 evtchn = bind_virq.port;
959 else {
960 if (ret == -EEXIST)
961 ret = find_virq(virq, cpu);
962 BUG_ON(ret < 0);
963 evtchn = ret;
964 }
965 899
966 xen_irq_info_virq_init(cpu, irq, evtchn, virq); 900 xen_irq_info_virq_init(cpu, irq, evtchn, virq);
967 901
968 bind_evtchn_to_cpu(evtchn, cpu); 902 bind_evtchn_to_cpu(evtchn, cpu);
969 } else {
970 struct irq_info *info = info_for_irq(irq);
971 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
972 } 903 }
973 904
974out: 905out:
@@ -981,16 +912,9 @@ static void unbind_from_irq(unsigned int irq)
981{ 912{
982 struct evtchn_close close; 913 struct evtchn_close close;
983 int evtchn = evtchn_from_irq(irq); 914 int evtchn = evtchn_from_irq(irq);
984 struct irq_info *info = irq_get_handler_data(irq);
985 915
986 mutex_lock(&irq_mapping_update_lock); 916 mutex_lock(&irq_mapping_update_lock);
987 917
988 if (info->refcnt > 0) {
989 info->refcnt--;
990 if (info->refcnt != 0)
991 goto done;
992 }
993
994 if (VALID_EVTCHN(evtchn)) { 918 if (VALID_EVTCHN(evtchn)) {
995 close.port = evtchn; 919 close.port = evtchn;
996 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 920 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
@@ -1019,7 +943,6 @@ static void unbind_from_irq(unsigned int irq)
1019 943
1020 xen_free_irq(irq); 944 xen_free_irq(irq);
1021 945
1022 done:
1023 mutex_unlock(&irq_mapping_update_lock); 946 mutex_unlock(&irq_mapping_update_lock);
1024} 947}
1025 948
@@ -1115,69 +1038,6 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1115} 1038}
1116EXPORT_SYMBOL_GPL(unbind_from_irqhandler); 1039EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1117 1040
1118int evtchn_make_refcounted(unsigned int evtchn)
1119{
1120 int irq = evtchn_to_irq[evtchn];
1121 struct irq_info *info;
1122
1123 if (irq == -1)
1124 return -ENOENT;
1125
1126 info = irq_get_handler_data(irq);
1127
1128 if (!info)
1129 return -ENOENT;
1130
1131 WARN_ON(info->refcnt != -1);
1132
1133 info->refcnt = 1;
1134
1135 return 0;
1136}
1137EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1138
1139int evtchn_get(unsigned int evtchn)
1140{
1141 int irq;
1142 struct irq_info *info;
1143 int err = -ENOENT;
1144
1145 if (evtchn >= NR_EVENT_CHANNELS)
1146 return -EINVAL;
1147
1148 mutex_lock(&irq_mapping_update_lock);
1149
1150 irq = evtchn_to_irq[evtchn];
1151 if (irq == -1)
1152 goto done;
1153
1154 info = irq_get_handler_data(irq);
1155
1156 if (!info)
1157 goto done;
1158
1159 err = -EINVAL;
1160 if (info->refcnt <= 0)
1161 goto done;
1162
1163 info->refcnt++;
1164 err = 0;
1165 done:
1166 mutex_unlock(&irq_mapping_update_lock);
1167
1168 return err;
1169}
1170EXPORT_SYMBOL_GPL(evtchn_get);
1171
1172void evtchn_put(unsigned int evtchn)
1173{
1174 int irq = evtchn_to_irq[evtchn];
1175 if (WARN_ON(irq == -1))
1176 return;
1177 unbind_from_irq(irq);
1178}
1179EXPORT_SYMBOL_GPL(evtchn_put);
1180
1181void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) 1041void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1182{ 1042{
1183 int irq = per_cpu(ipi_to_irq, cpu)[vector]; 1043 int irq = per_cpu(ipi_to_irq, cpu)[vector];
@@ -1292,7 +1152,7 @@ static void __xen_evtchn_do_upcall(void)
1292 int cpu = get_cpu(); 1152 int cpu = get_cpu();
1293 struct shared_info *s = HYPERVISOR_shared_info; 1153 struct shared_info *s = HYPERVISOR_shared_info;
1294 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); 1154 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1295 unsigned count; 1155 unsigned count;
1296 1156
1297 do { 1157 do {
1298 unsigned long pending_words; 1158 unsigned long pending_words;
@@ -1395,10 +1255,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
1395{ 1255{
1396 struct pt_regs *old_regs = set_irq_regs(regs); 1256 struct pt_regs *old_regs = set_irq_regs(regs);
1397 1257
1398 irq_enter();
1399#ifdef CONFIG_X86
1400 exit_idle(); 1258 exit_idle();
1401#endif 1259 irq_enter();
1402 1260
1403 __xen_evtchn_do_upcall(); 1261 __xen_evtchn_do_upcall();
1404 1262
@@ -1812,7 +1670,6 @@ void __init xen_init_IRQ(void)
1812 1670
1813 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), 1671 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
1814 GFP_KERNEL); 1672 GFP_KERNEL);
1815 BUG_ON(!evtchn_to_irq);
1816 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1673 for (i = 0; i < NR_EVENT_CHANNELS; i++)
1817 evtchn_to_irq[i] = -1; 1674 evtchn_to_irq[i] = -1;
1818 1675
@@ -1822,9 +1679,6 @@ void __init xen_init_IRQ(void)
1822 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1679 for (i = 0; i < NR_EVENT_CHANNELS; i++)
1823 mask_evtchn(i); 1680 mask_evtchn(i);
1824 1681
1825 pirq_needs_eoi = pirq_needs_eoi_flag;
1826
1827#ifdef CONFIG_X86
1828 if (xen_hvm_domain()) { 1682 if (xen_hvm_domain()) {
1829 xen_callback_vector(); 1683 xen_callback_vector();
1830 native_init_IRQ(); 1684 native_init_IRQ();
@@ -1832,21 +1686,8 @@ void __init xen_init_IRQ(void)
1832 * __acpi_register_gsi can point at the right function */ 1686 * __acpi_register_gsi can point at the right function */
1833 pci_xen_hvm_init(); 1687 pci_xen_hvm_init();
1834 } else { 1688 } else {
1835 int rc;
1836 struct physdev_pirq_eoi_gmfn eoi_gmfn;
1837
1838 irq_ctx_init(smp_processor_id()); 1689 irq_ctx_init(smp_processor_id());
1839 if (xen_initial_domain()) 1690 if (xen_initial_domain())
1840 pci_xen_initial_domain(); 1691 pci_xen_initial_domain();
1841
1842 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
1843 eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
1844 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
1845 if (rc != 0) {
1846 free_page((unsigned long) pirq_eoi_map);
1847 pirq_eoi_map = NULL;
1848 } else
1849 pirq_needs_eoi = pirq_check_eoi_map;
1850 } 1692 }
1851#endif
1852} 1693}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b1f60a0c0be..dbc13e94b61 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -268,7 +268,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
268 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, 268 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
269 u->name, (void *)(unsigned long)port); 269 u->name, (void *)(unsigned long)port);
270 if (rc >= 0) 270 if (rc >= 0)
271 rc = evtchn_make_refcounted(port); 271 rc = 0;
272 272
273 return rc; 273 return rc;
274} 274}
diff --git a/drivers/xen/fallback.c b/drivers/xen/fallback.c
deleted file mode 100644
index 0ef7c4d40f8..00000000000
--- a/drivers/xen/fallback.c
+++ /dev/null
@@ -1,80 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/string.h>
3#include <linux/bug.h>
4#include <linux/export.h>
5#include <asm/hypervisor.h>
6#include <asm/xen/hypercall.h>
7
8int xen_event_channel_op_compat(int cmd, void *arg)
9{
10 struct evtchn_op op;
11 int rc;
12
13 op.cmd = cmd;
14 memcpy(&op.u, arg, sizeof(op.u));
15 rc = _hypercall1(int, event_channel_op_compat, &op);
16
17 switch (cmd) {
18 case EVTCHNOP_close:
19 case EVTCHNOP_send:
20 case EVTCHNOP_bind_vcpu:
21 case EVTCHNOP_unmask:
22 /* no output */
23 break;
24
25#define COPY_BACK(eop) \
26 case EVTCHNOP_##eop: \
27 memcpy(arg, &op.u.eop, sizeof(op.u.eop)); \
28 break
29
30 COPY_BACK(bind_interdomain);
31 COPY_BACK(bind_virq);
32 COPY_BACK(bind_pirq);
33 COPY_BACK(status);
34 COPY_BACK(alloc_unbound);
35 COPY_BACK(bind_ipi);
36#undef COPY_BACK
37
38 default:
39 WARN_ON(rc != -ENOSYS);
40 break;
41 }
42
43 return rc;
44}
45EXPORT_SYMBOL_GPL(xen_event_channel_op_compat);
46
47int HYPERVISOR_physdev_op_compat(int cmd, void *arg)
48{
49 struct physdev_op op;
50 int rc;
51
52 op.cmd = cmd;
53 memcpy(&op.u, arg, sizeof(op.u));
54 rc = _hypercall1(int, physdev_op_compat, &op);
55
56 switch (cmd) {
57 case PHYSDEVOP_IRQ_UNMASK_NOTIFY:
58 case PHYSDEVOP_set_iopl:
59 case PHYSDEVOP_set_iobitmap:
60 case PHYSDEVOP_apic_write:
61 /* no output */
62 break;
63
64#define COPY_BACK(pop, fld) \
65 case PHYSDEVOP_##pop: \
66 memcpy(arg, &op.u.fld, sizeof(op.u.fld)); \
67 break
68
69 COPY_BACK(irq_status_query, irq_status_query);
70 COPY_BACK(apic_read, apic_op);
71 COPY_BACK(ASSIGN_VECTOR, irq_op);
72#undef COPY_BACK
73
74 default:
75 WARN_ON(rc != -ENOSYS);
76 break;
77 }
78
79 return rc;
80}
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 4097987b330..e1c4c6e5b46 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -74,7 +74,7 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
74 "the gntalloc device"); 74 "the gntalloc device");
75 75
76static LIST_HEAD(gref_list); 76static LIST_HEAD(gref_list);
77static DEFINE_MUTEX(gref_mutex); 77static DEFINE_SPINLOCK(gref_lock);
78static int gref_size; 78static int gref_size;
79 79
80struct notify_info { 80struct notify_info {
@@ -99,12 +99,6 @@ struct gntalloc_file_private_data {
99 uint64_t index; 99 uint64_t index;
100}; 100};
101 101
102struct gntalloc_vma_private_data {
103 struct gntalloc_gref *gref;
104 int users;
105 int count;
106};
107
108static void __del_gref(struct gntalloc_gref *gref); 102static void __del_gref(struct gntalloc_gref *gref);
109 103
110static void do_cleanup(void) 104static void do_cleanup(void)
@@ -149,15 +143,15 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
149 } 143 }
150 144
151 /* Add to gref lists. */ 145 /* Add to gref lists. */
152 mutex_lock(&gref_mutex); 146 spin_lock(&gref_lock);
153 list_splice_tail(&queue_gref, &gref_list); 147 list_splice_tail(&queue_gref, &gref_list);
154 list_splice_tail(&queue_file, &priv->list); 148 list_splice_tail(&queue_file, &priv->list);
155 mutex_unlock(&gref_mutex); 149 spin_unlock(&gref_lock);
156 150
157 return 0; 151 return 0;
158 152
159undo: 153undo:
160 mutex_lock(&gref_mutex); 154 spin_lock(&gref_lock);
161 gref_size -= (op->count - i); 155 gref_size -= (op->count - i);
162 156
163 list_for_each_entry(gref, &queue_file, next_file) { 157 list_for_each_entry(gref, &queue_file, next_file) {
@@ -173,7 +167,7 @@ undo:
173 */ 167 */
174 if (unlikely(!list_empty(&queue_gref))) 168 if (unlikely(!list_empty(&queue_gref)))
175 list_splice_tail(&queue_gref, &gref_list); 169 list_splice_tail(&queue_gref, &gref_list);
176 mutex_unlock(&gref_mutex); 170 spin_unlock(&gref_lock);
177 return rc; 171 return rc;
178} 172}
179 173
@@ -184,10 +178,8 @@ static void __del_gref(struct gntalloc_gref *gref)
184 tmp[gref->notify.pgoff] = 0; 178 tmp[gref->notify.pgoff] = 0;
185 kunmap(gref->page); 179 kunmap(gref->page);
186 } 180 }
187 if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { 181 if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
188 notify_remote_via_evtchn(gref->notify.event); 182 notify_remote_via_evtchn(gref->notify.event);
189 evtchn_put(gref->notify.event);
190 }
191 183
192 gref->notify.flags = 0; 184 gref->notify.flags = 0;
193 185
@@ -197,8 +189,6 @@ static void __del_gref(struct gntalloc_gref *gref)
197 189
198 if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) 190 if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
199 return; 191 return;
200
201 gnttab_free_grant_reference(gref->gref_id);
202 } 192 }
203 193
204 gref_size--; 194 gref_size--;
@@ -261,7 +251,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp)
261 251
262 pr_debug("%s: priv %p\n", __func__, priv); 252 pr_debug("%s: priv %p\n", __func__, priv);
263 253
264 mutex_lock(&gref_mutex); 254 spin_lock(&gref_lock);
265 while (!list_empty(&priv->list)) { 255 while (!list_empty(&priv->list)) {
266 gref = list_entry(priv->list.next, 256 gref = list_entry(priv->list.next,
267 struct gntalloc_gref, next_file); 257 struct gntalloc_gref, next_file);
@@ -271,7 +261,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp)
271 __del_gref(gref); 261 __del_gref(gref);
272 } 262 }
273 kfree(priv); 263 kfree(priv);
274 mutex_unlock(&gref_mutex); 264 spin_unlock(&gref_lock);
275 265
276 return 0; 266 return 0;
277} 267}
@@ -296,21 +286,21 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
296 goto out; 286 goto out;
297 } 287 }
298 288
299 mutex_lock(&gref_mutex); 289 spin_lock(&gref_lock);
300 /* Clean up pages that were at zero (local) users but were still mapped 290 /* Clean up pages that were at zero (local) users but were still mapped
301 * by remote domains. Since those pages count towards the limit that we 291 * by remote domains. Since those pages count towards the limit that we
302 * are about to enforce, removing them here is a good idea. 292 * are about to enforce, removing them here is a good idea.
303 */ 293 */
304 do_cleanup(); 294 do_cleanup();
305 if (gref_size + op.count > limit) { 295 if (gref_size + op.count > limit) {
306 mutex_unlock(&gref_mutex); 296 spin_unlock(&gref_lock);
307 rc = -ENOSPC; 297 rc = -ENOSPC;
308 goto out_free; 298 goto out_free;
309 } 299 }
310 gref_size += op.count; 300 gref_size += op.count;
311 op.index = priv->index; 301 op.index = priv->index;
312 priv->index += op.count * PAGE_SIZE; 302 priv->index += op.count * PAGE_SIZE;
313 mutex_unlock(&gref_mutex); 303 spin_unlock(&gref_lock);
314 304
315 rc = add_grefs(&op, gref_ids, priv); 305 rc = add_grefs(&op, gref_ids, priv);
316 if (rc < 0) 306 if (rc < 0)
@@ -353,7 +343,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
353 goto dealloc_grant_out; 343 goto dealloc_grant_out;
354 } 344 }
355 345
356 mutex_lock(&gref_mutex); 346 spin_lock(&gref_lock);
357 gref = find_grefs(priv, op.index, op.count); 347 gref = find_grefs(priv, op.index, op.count);
358 if (gref) { 348 if (gref) {
359 /* Remove from the file list only, and decrease reference count. 349 /* Remove from the file list only, and decrease reference count.
@@ -373,7 +363,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
373 363
374 do_cleanup(); 364 do_cleanup();
375 365
376 mutex_unlock(&gref_mutex); 366 spin_unlock(&gref_lock);
377dealloc_grant_out: 367dealloc_grant_out:
378 return rc; 368 return rc;
379} 369}
@@ -393,7 +383,7 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
393 index = op.index & ~(PAGE_SIZE - 1); 383 index = op.index & ~(PAGE_SIZE - 1);
394 pgoff = op.index & (PAGE_SIZE - 1); 384 pgoff = op.index & (PAGE_SIZE - 1);
395 385
396 mutex_lock(&gref_mutex); 386 spin_lock(&gref_lock);
397 387
398 gref = find_grefs(priv, index, 1); 388 gref = find_grefs(priv, index, 1);
399 if (!gref) { 389 if (!gref) {
@@ -406,30 +396,12 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
406 goto unlock_out; 396 goto unlock_out;
407 } 397 }
408 398
409 /* We need to grab a reference to the event channel we are going to use
410 * to send the notify before releasing the reference we may already have
411 * (if someone has called this ioctl twice). This is required so that
412 * it is possible to change the clear_byte part of the notification
413 * without disturbing the event channel part, which may now be the last
414 * reference to that event channel.
415 */
416 if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
417 if (evtchn_get(op.event_channel_port)) {
418 rc = -EINVAL;
419 goto unlock_out;
420 }
421 }
422
423 if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
424 evtchn_put(gref->notify.event);
425
426 gref->notify.flags = op.action; 399 gref->notify.flags = op.action;
427 gref->notify.pgoff = pgoff; 400 gref->notify.pgoff = pgoff;
428 gref->notify.event = op.event_channel_port; 401 gref->notify.event = op.event_channel_port;
429 rc = 0; 402 rc = 0;
430
431 unlock_out: 403 unlock_out:
432 mutex_unlock(&gref_mutex); 404 spin_unlock(&gref_lock);
433 return rc; 405 return rc;
434} 406}
435 407
@@ -457,40 +429,26 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
457 429
458static void gntalloc_vma_open(struct vm_area_struct *vma) 430static void gntalloc_vma_open(struct vm_area_struct *vma)
459{ 431{
460 struct gntalloc_vma_private_data *priv = vma->vm_private_data; 432 struct gntalloc_gref *gref = vma->vm_private_data;
461 433 if (!gref)
462 if (!priv)
463 return; 434 return;
464 435
465 mutex_lock(&gref_mutex); 436 spin_lock(&gref_lock);
466 priv->users++; 437 gref->users++;
467 mutex_unlock(&gref_mutex); 438 spin_unlock(&gref_lock);
468} 439}
469 440
470static void gntalloc_vma_close(struct vm_area_struct *vma) 441static void gntalloc_vma_close(struct vm_area_struct *vma)
471{ 442{
472 struct gntalloc_vma_private_data *priv = vma->vm_private_data; 443 struct gntalloc_gref *gref = vma->vm_private_data;
473 struct gntalloc_gref *gref, *next; 444 if (!gref)
474 int i;
475
476 if (!priv)
477 return; 445 return;
478 446
479 mutex_lock(&gref_mutex); 447 spin_lock(&gref_lock);
480 priv->users--; 448 gref->users--;
481 if (priv->users == 0) { 449 if (gref->users == 0)
482 gref = priv->gref; 450 __del_gref(gref);
483 for (i = 0; i < priv->count; i++) { 451 spin_unlock(&gref_lock);
484 gref->users--;
485 next = list_entry(gref->next_gref.next,
486 struct gntalloc_gref, next_gref);
487 if (gref->users == 0)
488 __del_gref(gref);
489 gref = next;
490 }
491 kfree(priv);
492 }
493 mutex_unlock(&gref_mutex);
494} 452}
495 453
496static struct vm_operations_struct gntalloc_vmops = { 454static struct vm_operations_struct gntalloc_vmops = {
@@ -501,41 +459,30 @@ static struct vm_operations_struct gntalloc_vmops = {
501static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) 459static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
502{ 460{
503 struct gntalloc_file_private_data *priv = filp->private_data; 461 struct gntalloc_file_private_data *priv = filp->private_data;
504 struct gntalloc_vma_private_data *vm_priv;
505 struct gntalloc_gref *gref; 462 struct gntalloc_gref *gref;
506 int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 463 int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
507 int rv, i; 464 int rv, i;
508 465
466 pr_debug("%s: priv %p, page %lu+%d\n", __func__,
467 priv, vma->vm_pgoff, count);
468
509 if (!(vma->vm_flags & VM_SHARED)) { 469 if (!(vma->vm_flags & VM_SHARED)) {
510 printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); 470 printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
511 return -EINVAL; 471 return -EINVAL;
512 } 472 }
513 473
514 vm_priv = kmalloc(sizeof(*vm_priv), GFP_KERNEL); 474 spin_lock(&gref_lock);
515 if (!vm_priv)
516 return -ENOMEM;
517
518 mutex_lock(&gref_mutex);
519
520 pr_debug("%s: priv %p,%p, page %lu+%d\n", __func__,
521 priv, vm_priv, vma->vm_pgoff, count);
522
523 gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); 475 gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
524 if (gref == NULL) { 476 if (gref == NULL) {
525 rv = -ENOENT; 477 rv = -ENOENT;
526 pr_debug("%s: Could not find grant reference", 478 pr_debug("%s: Could not find grant reference",
527 __func__); 479 __func__);
528 kfree(vm_priv);
529 goto out_unlock; 480 goto out_unlock;
530 } 481 }
531 482
532 vm_priv->gref = gref; 483 vma->vm_private_data = gref;
533 vm_priv->users = 1;
534 vm_priv->count = count;
535
536 vma->vm_private_data = vm_priv;
537 484
538 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 485 vma->vm_flags |= VM_RESERVED;
539 486
540 vma->vm_ops = &gntalloc_vmops; 487 vma->vm_ops = &gntalloc_vmops;
541 488
@@ -552,7 +499,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
552 rv = 0; 499 rv = 0;
553 500
554out_unlock: 501out_unlock:
555 mutex_unlock(&gref_mutex); 502 spin_unlock(&gref_lock);
556 return rv; 503 return rv;
557} 504}
558 505
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2e22df2f7a3..f914b26cf0c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -83,7 +83,6 @@ struct grant_map {
83 struct ioctl_gntdev_grant_ref *grants; 83 struct ioctl_gntdev_grant_ref *grants;
84 struct gnttab_map_grant_ref *map_ops; 84 struct gnttab_map_grant_ref *map_ops;
85 struct gnttab_unmap_grant_ref *unmap_ops; 85 struct gnttab_unmap_grant_ref *unmap_ops;
86 struct gnttab_map_grant_ref *kmap_ops;
87 struct page **pages; 86 struct page **pages;
88}; 87};
89 88
@@ -105,21 +104,6 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
105#endif 104#endif
106} 105}
107 106
108static void gntdev_free_map(struct grant_map *map)
109{
110 if (map == NULL)
111 return;
112
113 if (map->pages)
114 free_xenballooned_pages(map->count, map->pages);
115 kfree(map->pages);
116 kfree(map->grants);
117 kfree(map->map_ops);
118 kfree(map->unmap_ops);
119 kfree(map->kmap_ops);
120 kfree(map);
121}
122
123static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) 107static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
124{ 108{
125 struct grant_map *add; 109 struct grant_map *add;
@@ -129,25 +113,22 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
129 if (NULL == add) 113 if (NULL == add)
130 return NULL; 114 return NULL;
131 115
132 add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); 116 add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
133 add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); 117 add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
134 add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); 118 add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
135 add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); 119 add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL);
136 add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
137 if (NULL == add->grants || 120 if (NULL == add->grants ||
138 NULL == add->map_ops || 121 NULL == add->map_ops ||
139 NULL == add->unmap_ops || 122 NULL == add->unmap_ops ||
140 NULL == add->kmap_ops ||
141 NULL == add->pages) 123 NULL == add->pages)
142 goto err; 124 goto err;
143 125
144 if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */)) 126 if (alloc_xenballooned_pages(count, add->pages))
145 goto err; 127 goto err;
146 128
147 for (i = 0; i < count; i++) { 129 for (i = 0; i < count; i++) {
148 add->map_ops[i].handle = -1; 130 add->map_ops[i].handle = -1;
149 add->unmap_ops[i].handle = -1; 131 add->unmap_ops[i].handle = -1;
150 add->kmap_ops[i].handle = -1;
151 } 132 }
152 133
153 add->index = 0; 134 add->index = 0;
@@ -157,7 +138,11 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
157 return add; 138 return add;
158 139
159err: 140err:
160 gntdev_free_map(add); 141 kfree(add->pages);
142 kfree(add->grants);
143 kfree(add->map_ops);
144 kfree(add->unmap_ops);
145 kfree(add);
161 return NULL; 146 return NULL;
162} 147}
163 148
@@ -205,12 +190,19 @@ static void gntdev_put_map(struct grant_map *map)
205 190
206 if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { 191 if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
207 notify_remote_via_evtchn(map->notify.event); 192 notify_remote_via_evtchn(map->notify.event);
208 evtchn_put(map->notify.event);
209 } 193 }
210 194
211 if (map->pages && !use_ptemod) 195 if (map->pages) {
212 unmap_grant_pages(map, 0, map->count); 196 if (!use_ptemod)
213 gntdev_free_map(map); 197 unmap_grant_pages(map, 0, map->count);
198
199 free_xenballooned_pages(map->count, map->pages);
200 }
201 kfree(map->pages);
202 kfree(map->grants);
203 kfree(map->map_ops);
204 kfree(map->unmap_ops);
205 kfree(map);
214} 206}
215 207
216/* ------------------------------------------------------------------ */ 208/* ------------------------------------------------------------------ */
@@ -251,35 +243,10 @@ static int map_grant_pages(struct grant_map *map)
251 gnttab_set_unmap_op(&map->unmap_ops[i], addr, 243 gnttab_set_unmap_op(&map->unmap_ops[i], addr,
252 map->flags, -1 /* handle */); 244 map->flags, -1 /* handle */);
253 } 245 }
254 } else {
255 /*
256 * Setup the map_ops corresponding to the pte entries pointing
257 * to the kernel linear addresses of the struct pages.
258 * These ptes are completely different from the user ptes dealt
259 * with find_grant_ptes.
260 */
261 for (i = 0; i < map->count; i++) {
262 unsigned level;
263 unsigned long address = (unsigned long)
264 pfn_to_kaddr(page_to_pfn(map->pages[i]));
265 pte_t *ptep;
266 u64 pte_maddr = 0;
267 BUG_ON(PageHighMem(map->pages[i]));
268
269 ptep = lookup_address(address, &level);
270 pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
271 gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
272 map->flags |
273 GNTMAP_host_map |
274 GNTMAP_contains_pte,
275 map->grants[i].ref,
276 map->grants[i].domid);
277 }
278 } 246 }
279 247
280 pr_debug("map %d+%d\n", map->index, map->count); 248 pr_debug("map %d+%d\n", map->index, map->count);
281 err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, 249 err = gnttab_map_refs(map->map_ops, map->pages, map->count);
282 map->pages, map->count);
283 if (err) 250 if (err)
284 return err; 251 return err;
285 252
@@ -316,9 +283,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
316 } 283 }
317 } 284 }
318 285
319 err = gnttab_unmap_refs(map->unmap_ops + offset, 286 err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
320 use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset,
321 pages);
322 if (err) 287 if (err)
323 return err; 288 return err;
324 289
@@ -448,7 +413,7 @@ static void mn_release(struct mmu_notifier *mn,
448 spin_unlock(&priv->lock); 413 spin_unlock(&priv->lock);
449} 414}
450 415
451static struct mmu_notifier_ops gntdev_mmu_ops = { 416struct mmu_notifier_ops gntdev_mmu_ops = {
452 .release = mn_release, 417 .release = mn_release,
453 .invalidate_page = mn_invl_page, 418 .invalidate_page = mn_invl_page,
454 .invalidate_range_start = mn_invl_range_start, 419 .invalidate_range_start = mn_invl_range_start,
@@ -497,11 +462,13 @@ static int gntdev_release(struct inode *inode, struct file *flip)
497 462
498 pr_debug("priv %p\n", priv); 463 pr_debug("priv %p\n", priv);
499 464
465 spin_lock(&priv->lock);
500 while (!list_empty(&priv->maps)) { 466 while (!list_empty(&priv->maps)) {
501 map = list_entry(priv->maps.next, struct grant_map, next); 467 map = list_entry(priv->maps.next, struct grant_map, next);
502 list_del(&map->next); 468 list_del(&map->next);
503 gntdev_put_map(map); 469 gntdev_put_map(map);
504 } 470 }
471 spin_unlock(&priv->lock);
505 472
506 if (use_ptemod) 473 if (use_ptemod)
507 mmu_notifier_unregister(&priv->mn, priv->mm); 474 mmu_notifier_unregister(&priv->mn, priv->mm);
@@ -565,11 +532,10 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
565 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); 532 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
566 if (map) { 533 if (map) {
567 list_del(&map->next); 534 list_del(&map->next);
535 gntdev_put_map(map);
568 err = 0; 536 err = 0;
569 } 537 }
570 spin_unlock(&priv->lock); 538 spin_unlock(&priv->lock);
571 if (map)
572 gntdev_put_map(map);
573 return err; 539 return err;
574} 540}
575 541
@@ -605,8 +571,6 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
605 struct ioctl_gntdev_unmap_notify op; 571 struct ioctl_gntdev_unmap_notify op;
606 struct grant_map *map; 572 struct grant_map *map;
607 int rc; 573 int rc;
608 int out_flags;
609 unsigned int out_event;
610 574
611 if (copy_from_user(&op, u, sizeof(op))) 575 if (copy_from_user(&op, u, sizeof(op)))
612 return -EFAULT; 576 return -EFAULT;
@@ -614,21 +578,6 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
614 if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) 578 if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
615 return -EINVAL; 579 return -EINVAL;
616 580
617 /* We need to grab a reference to the event channel we are going to use
618 * to send the notify before releasing the reference we may already have
619 * (if someone has called this ioctl twice). This is required so that
620 * it is possible to change the clear_byte part of the notification
621 * without disturbing the event channel part, which may now be the last
622 * reference to that event channel.
623 */
624 if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
625 if (evtchn_get(op.event_channel_port))
626 return -EINVAL;
627 }
628
629 out_flags = op.action;
630 out_event = op.event_channel_port;
631
632 spin_lock(&priv->lock); 581 spin_lock(&priv->lock);
633 582
634 list_for_each_entry(map, &priv->maps, next) { 583 list_for_each_entry(map, &priv->maps, next) {
@@ -647,22 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
647 goto unlock_out; 596 goto unlock_out;
648 } 597 }
649 598
650 out_flags = map->notify.flags;
651 out_event = map->notify.event;
652
653 map->notify.flags = op.action; 599 map->notify.flags = op.action;
654 map->notify.addr = op.index - (map->index << PAGE_SHIFT); 600 map->notify.addr = op.index - (map->index << PAGE_SHIFT);
655 map->notify.event = op.event_channel_port; 601 map->notify.event = op.event_channel_port;
656
657 rc = 0; 602 rc = 0;
658
659 unlock_out: 603 unlock_out:
660 spin_unlock(&priv->lock); 604 spin_unlock(&priv->lock);
661
662 /* Drop the reference to the event channel we did not save in the map */
663 if (out_flags & UNMAP_NOTIFY_SEND_EVENT)
664 evtchn_put(out_event);
665
666 return rc; 605 return rc;
667} 606}
668 607
@@ -722,10 +661,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
722 661
723 vma->vm_ops = &gntdev_vmops; 662 vma->vm_ops = &gntdev_vmops;
724 663
725 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 664 vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND;
726 665
727 if (use_ptemod) 666 if (use_ptemod)
728 vma->vm_flags |= VM_DONTCOPY; 667 vma->vm_flags |= VM_DONTCOPY|VM_PFNMAP;
729 668
730 vma->vm_private_data = map; 669 vma->vm_private_data = map;
731 670
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7038de53652..4f44b347b24 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -38,28 +38,22 @@
38#include <linux/vmalloc.h> 38#include <linux/vmalloc.h>
39#include <linux/uaccess.h> 39#include <linux/uaccess.h>
40#include <linux/io.h> 40#include <linux/io.h>
41#include <linux/delay.h>
42#include <linux/hardirq.h>
43 41
44#include <xen/xen.h> 42#include <xen/xen.h>
45#include <xen/interface/xen.h> 43#include <xen/interface/xen.h>
46#include <xen/page.h> 44#include <xen/page.h>
47#include <xen/grant_table.h> 45#include <xen/grant_table.h>
48#include <xen/interface/memory.h> 46#include <xen/interface/memory.h>
49#include <xen/hvc-console.h>
50#include <asm/xen/hypercall.h> 47#include <asm/xen/hypercall.h>
51#include <asm/xen/interface.h>
52 48
53#include <asm/pgtable.h> 49#include <asm/pgtable.h>
54#include <asm/sync_bitops.h> 50#include <asm/sync_bitops.h>
55 51
52
56/* External tools reserve first few grant table entries. */ 53/* External tools reserve first few grant table entries. */
57#define NR_RESERVED_ENTRIES 8 54#define NR_RESERVED_ENTRIES 8
58#define GNTTAB_LIST_END 0xffffffff 55#define GNTTAB_LIST_END 0xffffffff
59#define GREFS_PER_GRANT_FRAME \ 56#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
60(grant_table_version == 1 ? \
61(PAGE_SIZE / sizeof(struct grant_entry_v1)) : \
62(PAGE_SIZE / sizeof(union grant_entry_v2)))
63 57
64static grant_ref_t **gnttab_list; 58static grant_ref_t **gnttab_list;
65static unsigned int nr_grant_frames; 59static unsigned int nr_grant_frames;
@@ -70,97 +64,13 @@ static DEFINE_SPINLOCK(gnttab_list_lock);
70unsigned long xen_hvm_resume_frames; 64unsigned long xen_hvm_resume_frames;
71EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); 65EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
72 66
73static union { 67static struct grant_entry *shared;
74 struct grant_entry_v1 *v1;
75 union grant_entry_v2 *v2;
76 void *addr;
77} gnttab_shared;
78
79/*This is a structure of function pointers for grant table*/
80struct gnttab_ops {
81 /*
82 * Mapping a list of frames for storing grant entries. Frames parameter
83 * is used to store grant table address when grant table being setup,
84 * nr_gframes is the number of frames to map grant table. Returning
85 * GNTST_okay means success and negative value means failure.
86 */
87 int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
88 /*
89 * Release a list of frames which are mapped in map_frames for grant
90 * entry status.
91 */
92 void (*unmap_frames)(void);
93 /*
94 * Introducing a valid entry into the grant table, granting the frame of
95 * this grant entry to domain for accessing or transfering. Ref
96 * parameter is reference of this introduced grant entry, domid is id of
97 * granted domain, frame is the page frame to be granted, and flags is
98 * status of the grant entry to be updated.
99 */
100 void (*update_entry)(grant_ref_t ref, domid_t domid,
101 unsigned long frame, unsigned flags);
102 /*
103 * Stop granting a grant entry to domain for accessing. Ref parameter is
104 * reference of a grant entry whose grant access will be stopped,
105 * readonly is not in use in this function. If the grant entry is
106 * currently mapped for reading or writing, just return failure(==0)
107 * directly and don't tear down the grant access. Otherwise, stop grant
108 * access for this entry and return success(==1).
109 */
110 int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
111 /*
112 * Stop granting a grant entry to domain for transfer. Ref parameter is
113 * reference of a grant entry whose grant transfer will be stopped. If
114 * tranfer has not started, just reclaim the grant entry and return
115 * failure(==0). Otherwise, wait for the transfer to complete and then
116 * return the frame.
117 */
118 unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
119 /*
120 * Query the status of a grant entry. Ref parameter is reference of
121 * queried grant entry, return value is the status of queried entry.
122 * Detailed status(writing/reading) can be gotten from the return value
123 * by bit operations.
124 */
125 int (*query_foreign_access)(grant_ref_t ref);
126 /*
127 * Grant a domain to access a range of bytes within the page referred by
128 * an available grant entry. Ref parameter is reference of a grant entry
129 * which will be sub-page accessed, domid is id of grantee domain, frame
130 * is frame address of subpage grant, flags is grant type and flag
131 * information, page_off is offset of the range of bytes, and length is
132 * length of bytes to be accessed.
133 */
134 void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
135 unsigned long frame, int flags,
136 unsigned page_off, unsigned length);
137 /*
138 * Redirect an available grant entry on domain A to another grant
139 * reference of domain B, then allow domain C to use grant reference
140 * of domain B transitively. Ref parameter is an available grant entry
141 * reference on domain A, domid is id of domain C which accesses grant
142 * entry transitively, flags is grant type and flag information,
143 * trans_domid is id of domain B whose grant entry is finally accessed
144 * transitively, trans_gref is grant entry transitive reference of
145 * domain B.
146 */
147 void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
148 domid_t trans_domid, grant_ref_t trans_gref);
149};
150
151static struct gnttab_ops *gnttab_interface;
152
153/*This reflects status of grant entries, so act as a global value*/
154static grant_status_t *grstatus;
155
156static int grant_table_version;
157 68
158static struct gnttab_free_callback *gnttab_free_callback_list; 69static struct gnttab_free_callback *gnttab_free_callback_list;
159 70
160static int gnttab_expand(unsigned int req_entries); 71static int gnttab_expand(unsigned int req_entries);
161 72
162#define RPP (PAGE_SIZE / sizeof(grant_ref_t)) 73#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
163#define SPP (PAGE_SIZE / sizeof(grant_status_t))
164 74
165static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) 75static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
166{ 76{
@@ -232,33 +142,23 @@ static void put_free_entry(grant_ref_t ref)
232 spin_unlock_irqrestore(&gnttab_list_lock, flags); 142 spin_unlock_irqrestore(&gnttab_list_lock, flags);
233} 143}
234 144
235/* 145static void update_grant_entry(grant_ref_t ref, domid_t domid,
236 * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2. 146 unsigned long frame, unsigned flags)
237 * Introducing a valid entry into the grant table:
238 * 1. Write ent->domid.
239 * 2. Write ent->frame:
240 * GTF_permit_access: Frame to which access is permitted.
241 * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
242 * frame, or zero if none.
243 * 3. Write memory barrier (WMB).
244 * 4. Write ent->flags, inc. valid type.
245 */
246static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
247 unsigned long frame, unsigned flags)
248{ 147{
249 gnttab_shared.v1[ref].domid = domid; 148 /*
250 gnttab_shared.v1[ref].frame = frame; 149 * Introducing a valid entry into the grant table:
251 wmb(); 150 * 1. Write ent->domid.
252 gnttab_shared.v1[ref].flags = flags; 151 * 2. Write ent->frame:
253} 152 * GTF_permit_access: Frame to which access is permitted.
254 153 * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
255static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid, 154 * frame, or zero if none.
256 unsigned long frame, unsigned flags) 155 * 3. Write memory barrier (WMB).
257{ 156 * 4. Write ent->flags, inc. valid type.
258 gnttab_shared.v2[ref].hdr.domid = domid; 157 */
259 gnttab_shared.v2[ref].full_page.frame = frame; 158 shared[ref].frame = frame;
159 shared[ref].domid = domid;
260 wmb(); 160 wmb();
261 gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags; 161 shared[ref].flags = flags;
262} 162}
263 163
264/* 164/*
@@ -267,7 +167,7 @@ static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
267void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 167void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
268 unsigned long frame, int readonly) 168 unsigned long frame, int readonly)
269{ 169{
270 gnttab_interface->update_entry(ref, domid, frame, 170 update_grant_entry(ref, domid, frame,
271 GTF_permit_access | (readonly ? GTF_readonly : 0)); 171 GTF_permit_access | (readonly ? GTF_readonly : 0));
272} 172}
273EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); 173EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
@@ -287,274 +187,33 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
287} 187}
288EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); 188EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
289 189
290static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, 190int gnttab_query_foreign_access(grant_ref_t ref)
291 unsigned long frame, int flags,
292 unsigned page_off, unsigned length)
293{
294 gnttab_shared.v2[ref].sub_page.frame = frame;
295 gnttab_shared.v2[ref].sub_page.page_off = page_off;
296 gnttab_shared.v2[ref].sub_page.length = length;
297 gnttab_shared.v2[ref].hdr.domid = domid;
298 wmb();
299 gnttab_shared.v2[ref].hdr.flags =
300 GTF_permit_access | GTF_sub_page | flags;
301}
302
303int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
304 unsigned long frame, int flags,
305 unsigned page_off,
306 unsigned length)
307{
308 if (flags & (GTF_accept_transfer | GTF_reading |
309 GTF_writing | GTF_transitive))
310 return -EPERM;
311
312 if (gnttab_interface->update_subpage_entry == NULL)
313 return -ENOSYS;
314
315 gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
316 page_off, length);
317
318 return 0;
319}
320EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
321
322int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
323 int flags, unsigned page_off,
324 unsigned length)
325{
326 int ref, rc;
327
328 ref = get_free_entries(1);
329 if (unlikely(ref < 0))
330 return -ENOSPC;
331
332 rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
333 page_off, length);
334 if (rc < 0) {
335 put_free_entry(ref);
336 return rc;
337 }
338
339 return ref;
340}
341EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
342
343bool gnttab_subpage_grants_available(void)
344{
345 return gnttab_interface->update_subpage_entry != NULL;
346}
347EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
348
349static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
350 int flags, domid_t trans_domid,
351 grant_ref_t trans_gref)
352{
353 gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
354 gnttab_shared.v2[ref].transitive.gref = trans_gref;
355 gnttab_shared.v2[ref].hdr.domid = domid;
356 wmb();
357 gnttab_shared.v2[ref].hdr.flags =
358 GTF_permit_access | GTF_transitive | flags;
359}
360
361int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
362 int flags, domid_t trans_domid,
363 grant_ref_t trans_gref)
364{
365 if (flags & (GTF_accept_transfer | GTF_reading |
366 GTF_writing | GTF_sub_page))
367 return -EPERM;
368
369 if (gnttab_interface->update_trans_entry == NULL)
370 return -ENOSYS;
371
372 gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
373 trans_gref);
374
375 return 0;
376}
377EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
378
379int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
380 domid_t trans_domid,
381 grant_ref_t trans_gref)
382{
383 int ref, rc;
384
385 ref = get_free_entries(1);
386 if (unlikely(ref < 0))
387 return -ENOSPC;
388
389 rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
390 trans_domid, trans_gref);
391 if (rc < 0) {
392 put_free_entry(ref);
393 return rc;
394 }
395
396 return ref;
397}
398EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
399
400bool gnttab_trans_grants_available(void)
401{ 191{
402 return gnttab_interface->update_trans_entry != NULL; 192 u16 nflags;
403}
404EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
405 193
406static int gnttab_query_foreign_access_v1(grant_ref_t ref) 194 nflags = shared[ref].flags;
407{
408 return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
409}
410 195
411static int gnttab_query_foreign_access_v2(grant_ref_t ref) 196 return (nflags & (GTF_reading|GTF_writing));
412{
413 return grstatus[ref] & (GTF_reading|GTF_writing);
414}
415
416int gnttab_query_foreign_access(grant_ref_t ref)
417{
418 return gnttab_interface->query_foreign_access(ref);
419} 197}
420EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); 198EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
421 199
422static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) 200int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
423{ 201{
424 u16 flags, nflags; 202 u16 flags, nflags;
425 u16 *pflags;
426 203
427 pflags = &gnttab_shared.v1[ref].flags; 204 nflags = shared[ref].flags;
428 nflags = *pflags;
429 do { 205 do {
430 flags = nflags; 206 flags = nflags;
431 if (flags & (GTF_reading|GTF_writing)) 207 if (flags & (GTF_reading|GTF_writing)) {
208 printk(KERN_ALERT "WARNING: g.e. still in use!\n");
432 return 0; 209 return 0;
433 } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); 210 }
434 211 } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
435 return 1;
436}
437
438static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
439{
440 gnttab_shared.v2[ref].hdr.flags = 0;
441 mb();
442 if (grstatus[ref] & (GTF_reading|GTF_writing)) {
443 return 0;
444 } else {
445 /* The read of grstatus needs to have acquire
446 semantics. On x86, reads already have
447 that, and we just need to protect against
448 compiler reorderings. On other
449 architectures we may need a full
450 barrier. */
451#ifdef CONFIG_X86
452 barrier();
453#else
454 mb();
455#endif
456 }
457 212
458 return 1; 213 return 1;
459} 214}
460
461static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
462{
463 return gnttab_interface->end_foreign_access_ref(ref, readonly);
464}
465
466int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
467{
468 if (_gnttab_end_foreign_access_ref(ref, readonly))
469 return 1;
470 pr_warn("WARNING: g.e. %#x still in use!\n", ref);
471 return 0;
472}
473EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); 215EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
474 216
475struct deferred_entry {
476 struct list_head list;
477 grant_ref_t ref;
478 bool ro;
479 uint16_t warn_delay;
480 struct page *page;
481};
482static LIST_HEAD(deferred_list);
483static void gnttab_handle_deferred(unsigned long);
484static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
485
486static void gnttab_handle_deferred(unsigned long unused)
487{
488 unsigned int nr = 10;
489 struct deferred_entry *first = NULL;
490 unsigned long flags;
491
492 spin_lock_irqsave(&gnttab_list_lock, flags);
493 while (nr--) {
494 struct deferred_entry *entry
495 = list_first_entry(&deferred_list,
496 struct deferred_entry, list);
497
498 if (entry == first)
499 break;
500 list_del(&entry->list);
501 spin_unlock_irqrestore(&gnttab_list_lock, flags);
502 if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
503 put_free_entry(entry->ref);
504 if (entry->page) {
505 pr_debug("freeing g.e. %#x (pfn %#lx)\n",
506 entry->ref, page_to_pfn(entry->page));
507 __free_page(entry->page);
508 } else
509 pr_info("freeing g.e. %#x\n", entry->ref);
510 kfree(entry);
511 entry = NULL;
512 } else {
513 if (!--entry->warn_delay)
514 pr_info("g.e. %#x still pending\n",
515 entry->ref);
516 if (!first)
517 first = entry;
518 }
519 spin_lock_irqsave(&gnttab_list_lock, flags);
520 if (entry)
521 list_add_tail(&entry->list, &deferred_list);
522 else if (list_empty(&deferred_list))
523 break;
524 }
525 if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
526 deferred_timer.expires = jiffies + HZ;
527 add_timer(&deferred_timer);
528 }
529 spin_unlock_irqrestore(&gnttab_list_lock, flags);
530}
531
532static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
533 struct page *page)
534{
535 struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
536 const char *what = KERN_WARNING "leaking";
537
538 if (entry) {
539 unsigned long flags;
540
541 entry->ref = ref;
542 entry->ro = readonly;
543 entry->page = page;
544 entry->warn_delay = 60;
545 spin_lock_irqsave(&gnttab_list_lock, flags);
546 list_add_tail(&entry->list, &deferred_list);
547 if (!timer_pending(&deferred_timer)) {
548 deferred_timer.expires = jiffies + HZ;
549 add_timer(&deferred_timer);
550 }
551 spin_unlock_irqrestore(&gnttab_list_lock, flags);
552 what = KERN_DEBUG "deferring";
553 }
554 printk("%s g.e. %#x (pfn %#lx)\n",
555 what, ref, page ? page_to_pfn(page) : -1);
556}
557
558void gnttab_end_foreign_access(grant_ref_t ref, int readonly, 217void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
559 unsigned long page) 218 unsigned long page)
560{ 219{
@@ -562,9 +221,12 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
562 put_free_entry(ref); 221 put_free_entry(ref);
563 if (page != 0) 222 if (page != 0)
564 free_page(page); 223 free_page(page);
565 } else 224 } else {
566 gnttab_add_deferred(ref, readonly, 225 /* XXX This needs to be fixed so that the ref and page are
567 page ? virt_to_page(page) : NULL); 226 placed on a list to be freed up later. */
227 printk(KERN_WARNING
228 "WARNING: leaking g.e. and page still in use!\n");
229 }
568} 230}
569EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); 231EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
570 232
@@ -584,76 +246,37 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
584void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, 246void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
585 unsigned long pfn) 247 unsigned long pfn)
586{ 248{
587 gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer); 249 update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
588} 250}
589EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); 251EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
590 252
591static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) 253unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
592{ 254{
593 unsigned long frame; 255 unsigned long frame;
594 u16 flags; 256 u16 flags;
595 u16 *pflags;
596
597 pflags = &gnttab_shared.v1[ref].flags;
598 257
599 /* 258 /*
600 * If a transfer is not even yet started, try to reclaim the grant 259 * If a transfer is not even yet started, try to reclaim the grant
601 * reference and return failure (== 0). 260 * reference and return failure (== 0).
602 */ 261 */
603 while (!((flags = *pflags) & GTF_transfer_committed)) { 262 while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
604 if (sync_cmpxchg(pflags, flags, 0) == flags) 263 if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
605 return 0; 264 return 0;
606 cpu_relax(); 265 cpu_relax();
607 } 266 }
608 267
609 /* If a transfer is in progress then wait until it is completed. */ 268 /* If a transfer is in progress then wait until it is completed. */
610 while (!(flags & GTF_transfer_completed)) { 269 while (!(flags & GTF_transfer_completed)) {
611 flags = *pflags; 270 flags = shared[ref].flags;
612 cpu_relax(); 271 cpu_relax();
613 } 272 }
614 273
615 rmb(); /* Read the frame number /after/ reading completion status. */ 274 rmb(); /* Read the frame number /after/ reading completion status. */
616 frame = gnttab_shared.v1[ref].frame; 275 frame = shared[ref].frame;
617 BUG_ON(frame == 0); 276 BUG_ON(frame == 0);
618 277
619 return frame; 278 return frame;
620} 279}
621
622static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
623{
624 unsigned long frame;
625 u16 flags;
626 u16 *pflags;
627
628 pflags = &gnttab_shared.v2[ref].hdr.flags;
629
630 /*
631 * If a transfer is not even yet started, try to reclaim the grant
632 * reference and return failure (== 0).
633 */
634 while (!((flags = *pflags) & GTF_transfer_committed)) {
635 if (sync_cmpxchg(pflags, flags, 0) == flags)
636 return 0;
637 cpu_relax();
638 }
639
640 /* If a transfer is in progress then wait until it is completed. */
641 while (!(flags & GTF_transfer_completed)) {
642 flags = *pflags;
643 cpu_relax();
644 }
645
646 rmb(); /* Read the frame number /after/ reading completion status. */
647 frame = gnttab_shared.v2[ref].full_page.frame;
648 BUG_ON(frame == 0);
649
650 return frame;
651}
652
653unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
654{
655 return gnttab_interface->end_foreign_transfer_ref(ref);
656}
657EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); 280EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
658 281
659unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) 282unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
@@ -824,58 +447,10 @@ unsigned int gnttab_max_grant_frames(void)
824} 447}
825EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); 448EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
826 449
827/* Handling of paged out grant targets (GNTST_eagain) */
828#define MAX_DELAY 256
829static inline void
830gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
831 const char *func)
832{
833 unsigned delay = 1;
834
835 do {
836 BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
837 if (*status == GNTST_eagain)
838 msleep(delay++);
839 } while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
840
841 if (delay >= MAX_DELAY) {
842 printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm);
843 *status = GNTST_bad_page;
844 }
845}
846
847void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
848{
849 struct gnttab_map_grant_ref *op;
850
851 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
852 BUG();
853 for (op = batch; op < batch + count; op++)
854 if (op->status == GNTST_eagain)
855 gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
856 &op->status, __func__);
857}
858EXPORT_SYMBOL_GPL(gnttab_batch_map);
859
860void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
861{
862 struct gnttab_copy *op;
863
864 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
865 BUG();
866 for (op = batch; op < batch + count; op++)
867 if (op->status == GNTST_eagain)
868 gnttab_retry_eagain_gop(GNTTABOP_copy, op,
869 &op->status, __func__);
870}
871EXPORT_SYMBOL_GPL(gnttab_batch_copy);
872
873int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, 450int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
874 struct gnttab_map_grant_ref *kmap_ops,
875 struct page **pages, unsigned int count) 451 struct page **pages, unsigned int count)
876{ 452{
877 int i, ret; 453 int i, ret;
878 bool lazy = false;
879 pte_t *pte; 454 pte_t *pte;
880 unsigned long mfn; 455 unsigned long mfn;
881 456
@@ -883,20 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
883 if (ret) 458 if (ret)
884 return ret; 459 return ret;
885 460
886 /* Retry eagain maps */
887 for (i = 0; i < count; i++)
888 if (map_ops[i].status == GNTST_eagain)
889 gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
890 &map_ops[i].status, __func__);
891
892 if (xen_feature(XENFEAT_auto_translated_physmap)) 461 if (xen_feature(XENFEAT_auto_translated_physmap))
893 return ret; 462 return ret;
894 463
895 if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
896 arch_enter_lazy_mmu_mode();
897 lazy = true;
898 }
899
900 for (i = 0; i < count; i++) { 464 for (i = 0; i < count; i++) {
901 /* Do not add to override if the map failed. */ 465 /* Do not add to override if the map failed. */
902 if (map_ops[i].status) 466 if (map_ops[i].status)
@@ -907,27 +471,37 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
907 (map_ops[i].host_addr & ~PAGE_MASK)); 471 (map_ops[i].host_addr & ~PAGE_MASK));
908 mfn = pte_mfn(*pte); 472 mfn = pte_mfn(*pte);
909 } else { 473 } else {
910 mfn = PFN_DOWN(map_ops[i].dev_bus_addr); 474 /* If you really wanted to do this:
475 * mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
476 *
477 * The reason we do not implement it is b/c on the
478 * unmap path (gnttab_unmap_refs) we have no means of
479 * checking whether the page is !GNTMAP_contains_pte.
480 *
481 * That is without some extra data-structure to carry
482 * the struct page, bool clear_pte, and list_head next
483 * tuples and deal with allocation/delallocation, etc.
484 *
485 * The users of this API set the GNTMAP_contains_pte
486 * flag so lets just return not supported until it
487 * becomes neccessary to implement.
488 */
489 return -EOPNOTSUPP;
911 } 490 }
912 ret = m2p_add_override(mfn, pages[i], kmap_ops ? 491 ret = m2p_add_override(mfn, pages[i],
913 &kmap_ops[i] : NULL); 492 map_ops[i].flags & GNTMAP_contains_pte);
914 if (ret) 493 if (ret)
915 return ret; 494 return ret;
916 } 495 }
917 496
918 if (lazy)
919 arch_leave_lazy_mmu_mode();
920
921 return ret; 497 return ret;
922} 498}
923EXPORT_SYMBOL_GPL(gnttab_map_refs); 499EXPORT_SYMBOL_GPL(gnttab_map_refs);
924 500
925int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, 501int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
926 struct gnttab_map_grant_ref *kmap_ops, 502 struct page **pages, unsigned int count)
927 struct page **pages, unsigned int count)
928{ 503{
929 int i, ret; 504 int i, ret;
930 bool lazy = false;
931 505
932 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); 506 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
933 if (ret) 507 if (ret)
@@ -936,100 +510,20 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
936 if (xen_feature(XENFEAT_auto_translated_physmap)) 510 if (xen_feature(XENFEAT_auto_translated_physmap))
937 return ret; 511 return ret;
938 512
939 if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
940 arch_enter_lazy_mmu_mode();
941 lazy = true;
942 }
943
944 for (i = 0; i < count; i++) { 513 for (i = 0; i < count; i++) {
945 ret = m2p_remove_override(pages[i], kmap_ops ? 514 ret = m2p_remove_override(pages[i], true /* clear the PTE */);
946 &kmap_ops[i] : NULL);
947 if (ret) 515 if (ret)
948 return ret; 516 return ret;
949 } 517 }
950 518
951 if (lazy)
952 arch_leave_lazy_mmu_mode();
953
954 return ret; 519 return ret;
955} 520}
956EXPORT_SYMBOL_GPL(gnttab_unmap_refs); 521EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
957 522
958static unsigned nr_status_frames(unsigned nr_grant_frames)
959{
960 return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
961}
962
963static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
964{
965 int rc;
966
967 rc = arch_gnttab_map_shared(frames, nr_gframes,
968 gnttab_max_grant_frames(),
969 &gnttab_shared.addr);
970 BUG_ON(rc);
971
972 return 0;
973}
974
975static void gnttab_unmap_frames_v1(void)
976{
977 arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
978}
979
980static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
981{
982 uint64_t *sframes;
983 unsigned int nr_sframes;
984 struct gnttab_get_status_frames getframes;
985 int rc;
986
987 nr_sframes = nr_status_frames(nr_gframes);
988
989 /* No need for kzalloc as it is initialized in following hypercall
990 * GNTTABOP_get_status_frames.
991 */
992 sframes = kmalloc(nr_sframes * sizeof(uint64_t), GFP_ATOMIC);
993 if (!sframes)
994 return -ENOMEM;
995
996 getframes.dom = DOMID_SELF;
997 getframes.nr_frames = nr_sframes;
998 set_xen_guest_handle(getframes.frame_list, sframes);
999
1000 rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
1001 &getframes, 1);
1002 if (rc == -ENOSYS) {
1003 kfree(sframes);
1004 return -ENOSYS;
1005 }
1006
1007 BUG_ON(rc || getframes.status);
1008
1009 rc = arch_gnttab_map_status(sframes, nr_sframes,
1010 nr_status_frames(gnttab_max_grant_frames()),
1011 &grstatus);
1012 BUG_ON(rc);
1013 kfree(sframes);
1014
1015 rc = arch_gnttab_map_shared(frames, nr_gframes,
1016 gnttab_max_grant_frames(),
1017 &gnttab_shared.addr);
1018 BUG_ON(rc);
1019
1020 return 0;
1021}
1022
1023static void gnttab_unmap_frames_v2(void)
1024{
1025 arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
1026 arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
1027}
1028
1029static int gnttab_map(unsigned int start_idx, unsigned int end_idx) 523static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1030{ 524{
1031 struct gnttab_setup_table setup; 525 struct gnttab_setup_table setup;
1032 xen_pfn_t *frames; 526 unsigned long *frames;
1033 unsigned int nr_gframes = end_idx + 1; 527 unsigned int nr_gframes = end_idx + 1;
1034 int rc; 528 int rc;
1035 529
@@ -1057,9 +551,6 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1057 return rc; 551 return rc;
1058 } 552 }
1059 553
1060 /* No need for kzalloc as it is initialized in following hypercall
1061 * GNTTABOP_setup_table.
1062 */
1063 frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); 554 frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
1064 if (!frames) 555 if (!frames)
1065 return -ENOMEM; 556 return -ENOMEM;
@@ -1076,68 +567,19 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1076 567
1077 BUG_ON(rc || setup.status); 568 BUG_ON(rc || setup.status);
1078 569
1079 rc = gnttab_interface->map_frames(frames, nr_gframes); 570 rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(),
571 &shared);
572 BUG_ON(rc);
1080 573
1081 kfree(frames); 574 kfree(frames);
1082 575
1083 return rc; 576 return 0;
1084}
1085
1086static struct gnttab_ops gnttab_v1_ops = {
1087 .map_frames = gnttab_map_frames_v1,
1088 .unmap_frames = gnttab_unmap_frames_v1,
1089 .update_entry = gnttab_update_entry_v1,
1090 .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1,
1091 .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1,
1092 .query_foreign_access = gnttab_query_foreign_access_v1,
1093};
1094
1095static struct gnttab_ops gnttab_v2_ops = {
1096 .map_frames = gnttab_map_frames_v2,
1097 .unmap_frames = gnttab_unmap_frames_v2,
1098 .update_entry = gnttab_update_entry_v2,
1099 .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
1100 .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2,
1101 .query_foreign_access = gnttab_query_foreign_access_v2,
1102 .update_subpage_entry = gnttab_update_subpage_entry_v2,
1103 .update_trans_entry = gnttab_update_trans_entry_v2,
1104};
1105
1106static void gnttab_request_version(void)
1107{
1108 int rc;
1109 struct gnttab_set_version gsv;
1110
1111 if (xen_hvm_domain())
1112 gsv.version = 1;
1113 else
1114 gsv.version = 2;
1115 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1116 if (rc == 0 && gsv.version == 2) {
1117 grant_table_version = 2;
1118 gnttab_interface = &gnttab_v2_ops;
1119 } else if (grant_table_version == 2) {
1120 /*
1121 * If we've already used version 2 features,
1122 * but then suddenly discover that they're not
1123 * available (e.g. migrating to an older
1124 * version of Xen), almost unbounded badness
1125 * can happen.
1126 */
1127 panic("we need grant tables version 2, but only version 1 is available");
1128 } else {
1129 grant_table_version = 1;
1130 gnttab_interface = &gnttab_v1_ops;
1131 }
1132 printk(KERN_INFO "Grant tables using version %d layout.\n",
1133 grant_table_version);
1134} 577}
1135 578
1136int gnttab_resume(void) 579int gnttab_resume(void)
1137{ 580{
1138 unsigned int max_nr_gframes; 581 unsigned int max_nr_gframes;
1139 582
1140 gnttab_request_version();
1141 max_nr_gframes = gnttab_max_grant_frames(); 583 max_nr_gframes = gnttab_max_grant_frames();
1142 if (max_nr_gframes < nr_grant_frames) 584 if (max_nr_gframes < nr_grant_frames)
1143 return -ENOSYS; 585 return -ENOSYS;
@@ -1145,10 +587,9 @@ int gnttab_resume(void)
1145 if (xen_pv_domain()) 587 if (xen_pv_domain())
1146 return gnttab_map(0, nr_grant_frames - 1); 588 return gnttab_map(0, nr_grant_frames - 1);
1147 589
1148 if (gnttab_shared.addr == NULL) { 590 if (!shared) {
1149 gnttab_shared.addr = ioremap(xen_hvm_resume_frames, 591 shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes);
1150 PAGE_SIZE * max_nr_gframes); 592 if (shared == NULL) {
1151 if (gnttab_shared.addr == NULL) {
1152 printk(KERN_WARNING 593 printk(KERN_WARNING
1153 "Failed to ioremap gnttab share frames!"); 594 "Failed to ioremap gnttab share frames!");
1154 return -ENOMEM; 595 return -ENOMEM;
@@ -1162,7 +603,7 @@ int gnttab_resume(void)
1162 603
1163int gnttab_suspend(void) 604int gnttab_suspend(void)
1164{ 605{
1165 gnttab_interface->unmap_frames(); 606 arch_gnttab_unmap_shared(shared, nr_grant_frames);
1166 return 0; 607 return 0;
1167} 608}
1168 609
@@ -1189,7 +630,6 @@ int gnttab_init(void)
1189 int i; 630 int i;
1190 unsigned int max_nr_glist_frames, nr_glist_frames; 631 unsigned int max_nr_glist_frames, nr_glist_frames;
1191 unsigned int nr_init_grefs; 632 unsigned int nr_init_grefs;
1192 int ret;
1193 633
1194 nr_grant_frames = 1; 634 nr_grant_frames = 1;
1195 boot_max_nr_grant_frames = __max_nr_grant_frames(); 635 boot_max_nr_grant_frames = __max_nr_grant_frames();
@@ -1208,16 +648,12 @@ int gnttab_init(void)
1208 nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; 648 nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
1209 for (i = 0; i < nr_glist_frames; i++) { 649 for (i = 0; i < nr_glist_frames; i++) {
1210 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); 650 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
1211 if (gnttab_list[i] == NULL) { 651 if (gnttab_list[i] == NULL)
1212 ret = -ENOMEM;
1213 goto ini_nomem; 652 goto ini_nomem;
1214 }
1215 } 653 }
1216 654
1217 if (gnttab_resume() < 0) { 655 if (gnttab_resume() < 0)
1218 ret = -ENODEV; 656 return -ENODEV;
1219 goto ini_nomem;
1220 }
1221 657
1222 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; 658 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
1223 659
@@ -1235,11 +671,11 @@ int gnttab_init(void)
1235 for (i--; i >= 0; i--) 671 for (i--; i >= 0; i--)
1236 free_page((unsigned long)gnttab_list[i]); 672 free_page((unsigned long)gnttab_list[i]);
1237 kfree(gnttab_list); 673 kfree(gnttab_list);
1238 return ret; 674 return -ENOMEM;
1239} 675}
1240EXPORT_SYMBOL_GPL(gnttab_init); 676EXPORT_SYMBOL_GPL(gnttab_init);
1241 677
1242static int __gnttab_init(void) 678static int __devinit __gnttab_init(void)
1243{ 679{
1244 /* Delay grant-table initialization in the PV on HVM case */ 680 /* Delay grant-table initialization in the PV on HVM case */
1245 if (xen_hvm_domain()) 681 if (xen_hvm_domain())
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 412b96cc530..0b5366b5be2 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -9,7 +9,6 @@
9#include <linux/stop_machine.h> 9#include <linux/stop_machine.h>
10#include <linux/freezer.h> 10#include <linux/freezer.h>
11#include <linux/syscore_ops.h> 11#include <linux/syscore_ops.h>
12#include <linux/export.h>
13 12
14#include <xen/xen.h> 13#include <xen/xen.h>
15#include <xen/xenbus.h> 14#include <xen/xenbus.h>
@@ -129,10 +128,9 @@ static void do_suspend(void)
129 printk(KERN_DEBUG "suspending xenstore...\n"); 128 printk(KERN_DEBUG "suspending xenstore...\n");
130 xs_suspend(); 129 xs_suspend();
131 130
132 err = dpm_suspend_end(PMSG_FREEZE); 131 err = dpm_suspend_noirq(PMSG_FREEZE);
133 if (err) { 132 if (err) {
134 printk(KERN_ERR "dpm_suspend_end failed: %d\n", err); 133 printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
135 si.cancelled = 0;
136 goto out_resume; 134 goto out_resume;
137 } 135 }
138 136
@@ -150,7 +148,7 @@ static void do_suspend(void)
150 148
151 err = stop_machine(xen_suspend, &si, cpumask_of(0)); 149 err = stop_machine(xen_suspend, &si, cpumask_of(0));
152 150
153 dpm_resume_start(si.cancelled ? PMSG_THAW : PMSG_RESTORE); 151 dpm_resume_noirq(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
154 152
155 if (err) { 153 if (err) {
156 printk(KERN_ERR "failed to start xen_suspend: %d\n", err); 154 printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
deleted file mode 100644
index 8feee08bcb4..00000000000
--- a/drivers/xen/mcelog.c
+++ /dev/null
@@ -1,414 +0,0 @@
1/******************************************************************************
2 * mcelog.c
3 * Driver for receiving and transferring machine check error infomation
4 *
5 * Copyright (c) 2012 Intel Corporation
6 * Author: Liu, Jinsong <jinsong.liu@intel.com>
7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
8 * Author: Ke, Liping <liping.ke@intel.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
34
35#include <linux/init.h>
36#include <linux/types.h>
37#include <linux/kernel.h>
38#include <linux/slab.h>
39#include <linux/fs.h>
40#include <linux/device.h>
41#include <linux/miscdevice.h>
42#include <linux/uaccess.h>
43#include <linux/capability.h>
44#include <linux/poll.h>
45#include <linux/sched.h>
46
47#include <xen/interface/xen.h>
48#include <xen/events.h>
49#include <xen/interface/vcpu.h>
50#include <xen/xen.h>
51#include <asm/xen/hypercall.h>
52#include <asm/xen/hypervisor.h>
53
54#define XEN_MCELOG "xen_mcelog: "
55
56static struct mc_info g_mi;
57static struct mcinfo_logical_cpu *g_physinfo;
58static uint32_t ncpus;
59
60static DEFINE_MUTEX(mcelog_lock);
61
62static struct xen_mce_log xen_mcelog = {
63 .signature = XEN_MCE_LOG_SIGNATURE,
64 .len = XEN_MCE_LOG_LEN,
65 .recordlen = sizeof(struct xen_mce),
66};
67
68static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock);
69static int xen_mce_chrdev_open_count; /* #times opened */
70static int xen_mce_chrdev_open_exclu; /* already open exclusive? */
71
72static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait);
73
74static int xen_mce_chrdev_open(struct inode *inode, struct file *file)
75{
76 spin_lock(&xen_mce_chrdev_state_lock);
77
78 if (xen_mce_chrdev_open_exclu ||
79 (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
80 spin_unlock(&xen_mce_chrdev_state_lock);
81
82 return -EBUSY;
83 }
84
85 if (file->f_flags & O_EXCL)
86 xen_mce_chrdev_open_exclu = 1;
87 xen_mce_chrdev_open_count++;
88
89 spin_unlock(&xen_mce_chrdev_state_lock);
90
91 return nonseekable_open(inode, file);
92}
93
94static int xen_mce_chrdev_release(struct inode *inode, struct file *file)
95{
96 spin_lock(&xen_mce_chrdev_state_lock);
97
98 xen_mce_chrdev_open_count--;
99 xen_mce_chrdev_open_exclu = 0;
100
101 spin_unlock(&xen_mce_chrdev_state_lock);
102
103 return 0;
104}
105
106static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf,
107 size_t usize, loff_t *off)
108{
109 char __user *buf = ubuf;
110 unsigned num;
111 int i, err;
112
113 mutex_lock(&mcelog_lock);
114
115 num = xen_mcelog.next;
116
117 /* Only supports full reads right now */
118 err = -EINVAL;
119 if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce))
120 goto out;
121
122 err = 0;
123 for (i = 0; i < num; i++) {
124 struct xen_mce *m = &xen_mcelog.entry[i];
125
126 err |= copy_to_user(buf, m, sizeof(*m));
127 buf += sizeof(*m);
128 }
129
130 memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce));
131 xen_mcelog.next = 0;
132
133 if (err)
134 err = -EFAULT;
135
136out:
137 mutex_unlock(&mcelog_lock);
138
139 return err ? err : buf - ubuf;
140}
141
142static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait)
143{
144 poll_wait(file, &xen_mce_chrdev_wait, wait);
145
146 if (xen_mcelog.next)
147 return POLLIN | POLLRDNORM;
148
149 return 0;
150}
151
152static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
153 unsigned long arg)
154{
155 int __user *p = (int __user *)arg;
156
157 if (!capable(CAP_SYS_ADMIN))
158 return -EPERM;
159
160 switch (cmd) {
161 case MCE_GET_RECORD_LEN:
162 return put_user(sizeof(struct xen_mce), p);
163 case MCE_GET_LOG_LEN:
164 return put_user(XEN_MCE_LOG_LEN, p);
165 case MCE_GETCLEAR_FLAGS: {
166 unsigned flags;
167
168 do {
169 flags = xen_mcelog.flags;
170 } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags);
171
172 return put_user(flags, p);
173 }
174 default:
175 return -ENOTTY;
176 }
177}
178
179static const struct file_operations xen_mce_chrdev_ops = {
180 .open = xen_mce_chrdev_open,
181 .release = xen_mce_chrdev_release,
182 .read = xen_mce_chrdev_read,
183 .poll = xen_mce_chrdev_poll,
184 .unlocked_ioctl = xen_mce_chrdev_ioctl,
185 .llseek = no_llseek,
186};
187
188static struct miscdevice xen_mce_chrdev_device = {
189 MISC_MCELOG_MINOR,
190 "mcelog",
191 &xen_mce_chrdev_ops,
192};
193
194/*
195 * Caller should hold the mcelog_lock
196 */
197static void xen_mce_log(struct xen_mce *mce)
198{
199 unsigned entry;
200
201 entry = xen_mcelog.next;
202
203 /*
204 * When the buffer fills up discard new entries.
205 * Assume that the earlier errors are the more
206 * interesting ones:
207 */
208 if (entry >= XEN_MCE_LOG_LEN) {
209 set_bit(XEN_MCE_OVERFLOW,
210 (unsigned long *)&xen_mcelog.flags);
211 return;
212 }
213
214 memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce));
215
216 xen_mcelog.next++;
217}
218
219static int convert_log(struct mc_info *mi)
220{
221 struct mcinfo_common *mic;
222 struct mcinfo_global *mc_global;
223 struct mcinfo_bank *mc_bank;
224 struct xen_mce m;
225 uint32_t i;
226
227 mic = NULL;
228 x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
229 if (unlikely(!mic)) {
230 pr_warning(XEN_MCELOG "Failed to find global error info\n");
231 return -ENODEV;
232 }
233
234 memset(&m, 0, sizeof(struct xen_mce));
235
236 mc_global = (struct mcinfo_global *)mic;
237 m.mcgstatus = mc_global->mc_gstatus;
238 m.apicid = mc_global->mc_apicid;
239
240 for (i = 0; i < ncpus; i++)
241 if (g_physinfo[i].mc_apicid == m.apicid)
242 break;
243 if (unlikely(i == ncpus)) {
244 pr_warning(XEN_MCELOG "Failed to match cpu with apicid %d\n",
245 m.apicid);
246 return -ENODEV;
247 }
248
249 m.socketid = g_physinfo[i].mc_chipid;
250 m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
251 m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
252 m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value;
253
254 mic = NULL;
255 x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
256 if (unlikely(!mic)) {
257 pr_warning(XEN_MCELOG "Fail to find bank error info\n");
258 return -ENODEV;
259 }
260
261 do {
262 if ((!mic) || (mic->size == 0) ||
263 (mic->type != MC_TYPE_GLOBAL &&
264 mic->type != MC_TYPE_BANK &&
265 mic->type != MC_TYPE_EXTENDED &&
266 mic->type != MC_TYPE_RECOVERY))
267 break;
268
269 if (mic->type == MC_TYPE_BANK) {
270 mc_bank = (struct mcinfo_bank *)mic;
271 m.misc = mc_bank->mc_misc;
272 m.status = mc_bank->mc_status;
273 m.addr = mc_bank->mc_addr;
274 m.tsc = mc_bank->mc_tsc;
275 m.bank = mc_bank->mc_bank;
276 m.finished = 1;
277 /*log this record*/
278 xen_mce_log(&m);
279 }
280 mic = x86_mcinfo_next(mic);
281 } while (1);
282
283 return 0;
284}
285
286static int mc_queue_handle(uint32_t flags)
287{
288 struct xen_mc mc_op;
289 int ret = 0;
290
291 mc_op.cmd = XEN_MC_fetch;
292 mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
293 set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi);
294 do {
295 mc_op.u.mc_fetch.flags = flags;
296 ret = HYPERVISOR_mca(&mc_op);
297 if (ret) {
298 pr_err(XEN_MCELOG "Failed to fetch %s error log\n",
299 (flags == XEN_MC_URGENT) ?
300 "urgnet" : "nonurgent");
301 break;
302 }
303
304 if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
305 mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
306 break;
307 else {
308 ret = convert_log(&g_mi);
309 if (ret)
310 pr_warning(XEN_MCELOG
311 "Failed to convert this error log, "
312 "continue acking it anyway\n");
313
314 mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK;
315 ret = HYPERVISOR_mca(&mc_op);
316 if (ret) {
317 pr_err(XEN_MCELOG
318 "Failed to ack previous error log\n");
319 break;
320 }
321 }
322 } while (1);
323
324 return ret;
325}
326
327/* virq handler for machine check error info*/
328static void xen_mce_work_fn(struct work_struct *work)
329{
330 int err;
331
332 mutex_lock(&mcelog_lock);
333
334 /* urgent mc_info */
335 err = mc_queue_handle(XEN_MC_URGENT);
336 if (err)
337 pr_err(XEN_MCELOG
338 "Failed to handle urgent mc_info queue, "
339 "continue handling nonurgent mc_info queue anyway.\n");
340
341 /* nonurgent mc_info */
342 err = mc_queue_handle(XEN_MC_NONURGENT);
343 if (err)
344 pr_err(XEN_MCELOG
345 "Failed to handle nonurgent mc_info queue.\n");
346
347 /* wake processes polling /dev/mcelog */
348 wake_up_interruptible(&xen_mce_chrdev_wait);
349
350 mutex_unlock(&mcelog_lock);
351}
352static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
353
354static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
355{
356 schedule_work(&xen_mce_work);
357 return IRQ_HANDLED;
358}
359
360static int bind_virq_for_mce(void)
361{
362 int ret;
363 struct xen_mc mc_op;
364
365 memset(&mc_op, 0, sizeof(struct xen_mc));
366
367 /* Fetch physical CPU Numbers */
368 mc_op.cmd = XEN_MC_physcpuinfo;
369 mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
370 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
371 ret = HYPERVISOR_mca(&mc_op);
372 if (ret) {
373 pr_err(XEN_MCELOG "Failed to get CPU numbers\n");
374 return ret;
375 }
376
377 /* Fetch each CPU Physical Info for later reference*/
378 ncpus = mc_op.u.mc_physcpuinfo.ncpus;
379 g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu),
380 GFP_KERNEL);
381 if (!g_physinfo)
382 return -ENOMEM;
383 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
384 ret = HYPERVISOR_mca(&mc_op);
385 if (ret) {
386 pr_err(XEN_MCELOG "Failed to get CPU info\n");
387 kfree(g_physinfo);
388 return ret;
389 }
390
391 ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
392 xen_mce_interrupt, 0, "mce", NULL);
393 if (ret < 0) {
394 pr_err(XEN_MCELOG "Failed to bind virq\n");
395 kfree(g_physinfo);
396 return ret;
397 }
398
399 return 0;
400}
401
402static int __init xen_late_init_mcelog(void)
403{
404 /* Only DOM0 is responsible for MCE logging */
405 if (xen_initial_domain()) {
406 /* register character device /dev/mcelog for xen mcelog */
407 if (misc_register(&xen_mce_chrdev_device))
408 return -ENODEV;
409 return bind_virq_for_mce();
410 }
411
412 return -ENODEV;
413}
414device_initcall(xen_late_init_mcelog);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 18fff88254e..cef4bafc07d 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -18,7 +18,6 @@
18 */ 18 */
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/acpi.h>
22#include <xen/xen.h> 21#include <xen/xen.h>
23#include <xen/interface/physdev.h> 22#include <xen/interface/physdev.h>
24#include <xen/interface/xen.h> 23#include <xen/interface/xen.h>
@@ -27,85 +26,26 @@
27#include <asm/xen/hypercall.h> 26#include <asm/xen/hypercall.h>
28#include "../pci/pci.h" 27#include "../pci/pci.h"
29 28
30static bool __read_mostly pci_seg_supported = true;
31
32static int xen_add_device(struct device *dev) 29static int xen_add_device(struct device *dev)
33{ 30{
34 int r; 31 int r;
35 struct pci_dev *pci_dev = to_pci_dev(dev); 32 struct pci_dev *pci_dev = to_pci_dev(dev);
36#ifdef CONFIG_PCI_IOV
37 struct pci_dev *physfn = pci_dev->physfn;
38#endif
39
40 if (pci_seg_supported) {
41 struct physdev_pci_device_add add = {
42 .seg = pci_domain_nr(pci_dev->bus),
43 .bus = pci_dev->bus->number,
44 .devfn = pci_dev->devfn
45 };
46#ifdef CONFIG_ACPI
47 acpi_handle handle;
48#endif
49
50#ifdef CONFIG_PCI_IOV
51 if (pci_dev->is_virtfn) {
52 add.flags = XEN_PCI_DEV_VIRTFN;
53 add.physfn.bus = physfn->bus->number;
54 add.physfn.devfn = physfn->devfn;
55 } else
56#endif
57 if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
58 add.flags = XEN_PCI_DEV_EXTFN;
59
60#ifdef CONFIG_ACPI
61 handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
62 if (!handle && pci_dev->bus->bridge)
63 handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
64#ifdef CONFIG_PCI_IOV
65 if (!handle && pci_dev->is_virtfn)
66 handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
67#endif
68 if (handle) {
69 acpi_status status;
70
71 do {
72 unsigned long long pxm;
73
74 status = acpi_evaluate_integer(handle, "_PXM",
75 NULL, &pxm);
76 if (ACPI_SUCCESS(status)) {
77 add.optarr[0] = pxm;
78 add.flags |= XEN_PCI_DEV_PXM;
79 break;
80 }
81 status = acpi_get_parent(handle, &handle);
82 } while (ACPI_SUCCESS(status));
83 }
84#endif /* CONFIG_ACPI */
85
86 r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
87 if (r != -ENOSYS)
88 return r;
89 pci_seg_supported = false;
90 }
91 33
92 if (pci_domain_nr(pci_dev->bus))
93 r = -ENOSYS;
94#ifdef CONFIG_PCI_IOV 34#ifdef CONFIG_PCI_IOV
95 else if (pci_dev->is_virtfn) { 35 if (pci_dev->is_virtfn) {
96 struct physdev_manage_pci_ext manage_pci_ext = { 36 struct physdev_manage_pci_ext manage_pci_ext = {
97 .bus = pci_dev->bus->number, 37 .bus = pci_dev->bus->number,
98 .devfn = pci_dev->devfn, 38 .devfn = pci_dev->devfn,
99 .is_virtfn = 1, 39 .is_virtfn = 1,
100 .physfn.bus = physfn->bus->number, 40 .physfn.bus = pci_dev->physfn->bus->number,
101 .physfn.devfn = physfn->devfn, 41 .physfn.devfn = pci_dev->physfn->devfn,
102 }; 42 };
103 43
104 r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, 44 r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
105 &manage_pci_ext); 45 &manage_pci_ext);
106 } 46 } else
107#endif 47#endif
108 else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { 48 if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
109 struct physdev_manage_pci_ext manage_pci_ext = { 49 struct physdev_manage_pci_ext manage_pci_ext = {
110 .bus = pci_dev->bus->number, 50 .bus = pci_dev->bus->number,
111 .devfn = pci_dev->devfn, 51 .devfn = pci_dev->devfn,
@@ -116,7 +56,7 @@ static int xen_add_device(struct device *dev)
116 &manage_pci_ext); 56 &manage_pci_ext);
117 } else { 57 } else {
118 struct physdev_manage_pci manage_pci = { 58 struct physdev_manage_pci manage_pci = {
119 .bus = pci_dev->bus->number, 59 .bus = pci_dev->bus->number,
120 .devfn = pci_dev->devfn, 60 .devfn = pci_dev->devfn,
121 }; 61 };
122 62
@@ -131,27 +71,13 @@ static int xen_remove_device(struct device *dev)
131{ 71{
132 int r; 72 int r;
133 struct pci_dev *pci_dev = to_pci_dev(dev); 73 struct pci_dev *pci_dev = to_pci_dev(dev);
74 struct physdev_manage_pci manage_pci;
134 75
135 if (pci_seg_supported) { 76 manage_pci.bus = pci_dev->bus->number;
136 struct physdev_pci_device device = { 77 manage_pci.devfn = pci_dev->devfn;
137 .seg = pci_domain_nr(pci_dev->bus),
138 .bus = pci_dev->bus->number,
139 .devfn = pci_dev->devfn
140 };
141 78
142 r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove, 79 r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
143 &device); 80 &manage_pci);
144 } else if (pci_domain_nr(pci_dev->bus))
145 r = -ENOSYS;
146 else {
147 struct physdev_manage_pci manage_pci = {
148 .bus = pci_dev->bus->number,
149 .devfn = pci_dev->devfn
150 };
151
152 r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
153 &manage_pci);
154 }
155 81
156 return r; 82 return r;
157} 83}
@@ -170,16 +96,13 @@ static int xen_pci_notifier(struct notifier_block *nb,
170 r = xen_remove_device(dev); 96 r = xen_remove_device(dev);
171 break; 97 break;
172 default: 98 default:
173 return NOTIFY_DONE; 99 break;
174 } 100 }
175 if (r) 101
176 dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n", 102 return r;
177 action == BUS_NOTIFY_ADD_DEVICE ? "add" :
178 (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?"));
179 return NOTIFY_OK;
180} 103}
181 104
182static struct notifier_block device_nb = { 105struct notifier_block device_nb = {
183 .notifier_call = xen_pci_notifier, 106 .notifier_call = xen_pci_notifier,
184}; 107};
185 108
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
deleted file mode 100644
index 067fcfa1723..00000000000
--- a/drivers/xen/pcpu.c
+++ /dev/null
@@ -1,371 +0,0 @@
1/******************************************************************************
2 * pcpu.c
3 * Management physical cpu in dom0, get pcpu info and provide sys interface
4 *
5 * Copyright (c) 2012 Intel Corporation
6 * Author: Liu, Jinsong <jinsong.liu@intel.com>
7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#include <linux/interrupt.h>
35#include <linux/spinlock.h>
36#include <linux/cpu.h>
37#include <linux/stat.h>
38#include <linux/capability.h>
39
40#include <xen/xen.h>
41#include <xen/xenbus.h>
42#include <xen/events.h>
43#include <xen/interface/platform.h>
44#include <asm/xen/hypervisor.h>
45#include <asm/xen/hypercall.h>
46
47#define XEN_PCPU "xen_cpu: "
48
49/*
50 * @cpu_id: Xen physical cpu logic number
51 * @flags: Xen physical cpu status flag
52 * - XEN_PCPU_FLAGS_ONLINE: cpu is online
53 * - XEN_PCPU_FLAGS_INVALID: cpu is not present
54 */
55struct pcpu {
56 struct list_head list;
57 struct device dev;
58 uint32_t cpu_id;
59 uint32_t flags;
60};
61
62static struct bus_type xen_pcpu_subsys = {
63 .name = "xen_cpu",
64 .dev_name = "xen_cpu",
65};
66
67static DEFINE_MUTEX(xen_pcpu_lock);
68
69static LIST_HEAD(xen_pcpus);
70
71static int xen_pcpu_down(uint32_t cpu_id)
72{
73 struct xen_platform_op op = {
74 .cmd = XENPF_cpu_offline,
75 .interface_version = XENPF_INTERFACE_VERSION,
76 .u.cpu_ol.cpuid = cpu_id,
77 };
78
79 return HYPERVISOR_dom0_op(&op);
80}
81
82static int xen_pcpu_up(uint32_t cpu_id)
83{
84 struct xen_platform_op op = {
85 .cmd = XENPF_cpu_online,
86 .interface_version = XENPF_INTERFACE_VERSION,
87 .u.cpu_ol.cpuid = cpu_id,
88 };
89
90 return HYPERVISOR_dom0_op(&op);
91}
92
93static ssize_t show_online(struct device *dev,
94 struct device_attribute *attr,
95 char *buf)
96{
97 struct pcpu *cpu = container_of(dev, struct pcpu, dev);
98
99 return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
100}
101
102static ssize_t __ref store_online(struct device *dev,
103 struct device_attribute *attr,
104 const char *buf, size_t count)
105{
106 struct pcpu *pcpu = container_of(dev, struct pcpu, dev);
107 unsigned long long val;
108 ssize_t ret;
109
110 if (!capable(CAP_SYS_ADMIN))
111 return -EPERM;
112
113 if (kstrtoull(buf, 0, &val) < 0)
114 return -EINVAL;
115
116 switch (val) {
117 case 0:
118 ret = xen_pcpu_down(pcpu->cpu_id);
119 break;
120 case 1:
121 ret = xen_pcpu_up(pcpu->cpu_id);
122 break;
123 default:
124 ret = -EINVAL;
125 }
126
127 if (ret >= 0)
128 ret = count;
129 return ret;
130}
131static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
132
133static bool xen_pcpu_online(uint32_t flags)
134{
135 return !!(flags & XEN_PCPU_FLAGS_ONLINE);
136}
137
138static void pcpu_online_status(struct xenpf_pcpuinfo *info,
139 struct pcpu *pcpu)
140{
141 if (xen_pcpu_online(info->flags) &&
142 !xen_pcpu_online(pcpu->flags)) {
143 /* the pcpu is onlined */
144 pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
145 kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
146 } else if (!xen_pcpu_online(info->flags) &&
147 xen_pcpu_online(pcpu->flags)) {
148 /* The pcpu is offlined */
149 pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
150 kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
151 }
152}
153
154static struct pcpu *get_pcpu(uint32_t cpu_id)
155{
156 struct pcpu *pcpu;
157
158 list_for_each_entry(pcpu, &xen_pcpus, list) {
159 if (pcpu->cpu_id == cpu_id)
160 return pcpu;
161 }
162
163 return NULL;
164}
165
166static void pcpu_release(struct device *dev)
167{
168 struct pcpu *pcpu = container_of(dev, struct pcpu, dev);
169
170 list_del(&pcpu->list);
171 kfree(pcpu);
172}
173
174static void unregister_and_remove_pcpu(struct pcpu *pcpu)
175{
176 struct device *dev;
177
178 if (!pcpu)
179 return;
180
181 dev = &pcpu->dev;
182 if (dev->id)
183 device_remove_file(dev, &dev_attr_online);
184
185 /* pcpu remove would be implicitly done */
186 device_unregister(dev);
187}
188
189static int register_pcpu(struct pcpu *pcpu)
190{
191 struct device *dev;
192 int err = -EINVAL;
193
194 if (!pcpu)
195 return err;
196
197 dev = &pcpu->dev;
198 dev->bus = &xen_pcpu_subsys;
199 dev->id = pcpu->cpu_id;
200 dev->release = pcpu_release;
201
202 err = device_register(dev);
203 if (err) {
204 pcpu_release(dev);
205 return err;
206 }
207
208 /*
209 * Xen never offline cpu0 due to several restrictions
210 * and assumptions. This basically doesn't add a sys control
211 * to user, one cannot attempt to offline BSP.
212 */
213 if (dev->id) {
214 err = device_create_file(dev, &dev_attr_online);
215 if (err) {
216 device_unregister(dev);
217 return err;
218 }
219 }
220
221 return 0;
222}
223
224static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info)
225{
226 struct pcpu *pcpu;
227 int err;
228
229 if (info->flags & XEN_PCPU_FLAGS_INVALID)
230 return ERR_PTR(-ENODEV);
231
232 pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
233 if (!pcpu)
234 return ERR_PTR(-ENOMEM);
235
236 INIT_LIST_HEAD(&pcpu->list);
237 pcpu->cpu_id = info->xen_cpuid;
238 pcpu->flags = info->flags;
239
240 /* Need hold on xen_pcpu_lock before pcpu list manipulations */
241 list_add_tail(&pcpu->list, &xen_pcpus);
242
243 err = register_pcpu(pcpu);
244 if (err) {
245 pr_warning(XEN_PCPU "Failed to register pcpu%u\n",
246 info->xen_cpuid);
247 return ERR_PTR(-ENOENT);
248 }
249
250 return pcpu;
251}
252
253/*
254 * Caller should hold the xen_pcpu_lock
255 */
256static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu)
257{
258 int ret;
259 struct pcpu *pcpu = NULL;
260 struct xenpf_pcpuinfo *info;
261 struct xen_platform_op op = {
262 .cmd = XENPF_get_cpuinfo,
263 .interface_version = XENPF_INTERFACE_VERSION,
264 .u.pcpu_info.xen_cpuid = cpu,
265 };
266
267 ret = HYPERVISOR_dom0_op(&op);
268 if (ret)
269 return ret;
270
271 info = &op.u.pcpu_info;
272 if (max_cpu)
273 *max_cpu = info->max_present;
274
275 pcpu = get_pcpu(cpu);
276
277 /*
278 * Only those at cpu present map has its sys interface.
279 */
280 if (info->flags & XEN_PCPU_FLAGS_INVALID) {
281 if (pcpu)
282 unregister_and_remove_pcpu(pcpu);
283 return 0;
284 }
285
286 if (!pcpu) {
287 pcpu = create_and_register_pcpu(info);
288 if (IS_ERR_OR_NULL(pcpu))
289 return -ENODEV;
290 } else
291 pcpu_online_status(info, pcpu);
292
293 return 0;
294}
295
296/*
297 * Sync dom0's pcpu information with xen hypervisor's
298 */
299static int xen_sync_pcpus(void)
300{
301 /*
302 * Boot cpu always have cpu_id 0 in xen
303 */
304 uint32_t cpu = 0, max_cpu = 0;
305 int err = 0;
306 struct pcpu *pcpu, *tmp;
307
308 mutex_lock(&xen_pcpu_lock);
309
310 while (!err && (cpu <= max_cpu)) {
311 err = sync_pcpu(cpu, &max_cpu);
312 cpu++;
313 }
314
315 if (err)
316 list_for_each_entry_safe(pcpu, tmp, &xen_pcpus, list)
317 unregister_and_remove_pcpu(pcpu);
318
319 mutex_unlock(&xen_pcpu_lock);
320
321 return err;
322}
323
324static void xen_pcpu_work_fn(struct work_struct *work)
325{
326 xen_sync_pcpus();
327}
328static DECLARE_WORK(xen_pcpu_work, xen_pcpu_work_fn);
329
330static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
331{
332 schedule_work(&xen_pcpu_work);
333 return IRQ_HANDLED;
334}
335
336static int __init xen_pcpu_init(void)
337{
338 int irq, ret;
339
340 if (!xen_initial_domain())
341 return -ENODEV;
342
343 irq = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
344 xen_pcpu_interrupt, 0,
345 "xen-pcpu", NULL);
346 if (irq < 0) {
347 pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
348 return irq;
349 }
350
351 ret = subsys_system_register(&xen_pcpu_subsys, NULL);
352 if (ret) {
353 pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
354 goto err1;
355 }
356
357 ret = xen_sync_pcpus();
358 if (ret) {
359 pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
360 goto err2;
361 }
362
363 return 0;
364
365err2:
366 bus_unregister(&xen_pcpu_subsys);
367err1:
368 unbind_from_irqhandler(irq, NULL);
369 return ret;
370}
371arch_initcall(xen_pcpu_init);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 99db9e1eb8b..319dd0a94d5 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -101,17 +101,14 @@ static int platform_pci_resume(struct pci_dev *pdev)
101 return 0; 101 return 0;
102} 102}
103 103
104static int platform_pci_init(struct pci_dev *pdev, 104static int __devinit platform_pci_init(struct pci_dev *pdev,
105 const struct pci_device_id *ent) 105 const struct pci_device_id *ent)
106{ 106{
107 int i, ret; 107 int i, ret;
108 long ioaddr; 108 long ioaddr;
109 long mmio_addr, mmio_len; 109 long mmio_addr, mmio_len;
110 unsigned int max_nr_gframes; 110 unsigned int max_nr_gframes;
111 111
112 if (!xen_domain())
113 return -ENODEV;
114
115 i = pci_enable_device(pdev); 112 i = pci_enable_device(pdev);
116 if (i) 113 if (i)
117 return i; 114 return i;
@@ -170,7 +167,7 @@ pci_out:
170 return ret; 167 return ret;
171} 168}
172 169
173static struct pci_device_id platform_pci_tbl[] = { 170static struct pci_device_id platform_pci_tbl[] __devinitdata = {
174 {PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM, 171 {PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM,
175 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 172 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
176 {0,} 173 {0,}
@@ -189,6 +186,11 @@ static struct pci_driver platform_driver = {
189 186
190static int __init platform_pci_module_init(void) 187static int __init platform_pci_module_init(void)
191{ 188{
189 /* no unplug has been done, IGNORE hasn't been specified: just
190 * return now */
191 if (!xen_platform_pci_unplug)
192 return -ENODEV;
193
192 return pci_register_driver(&platform_driver); 194 return pci_register_driver(&platform_driver);
193} 195}
194 196
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
deleted file mode 100644
index 0bbbccbb1f1..00000000000
--- a/drivers/xen/privcmd.c
+++ /dev/null
@@ -1,575 +0,0 @@
1/******************************************************************************
2 * privcmd.c
3 *
4 * Interface to privileged domain-0 commands.
5 *
6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7 */
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/sched.h>
12#include <linux/slab.h>
13#include <linux/string.h>
14#include <linux/errno.h>
15#include <linux/mm.h>
16#include <linux/mman.h>
17#include <linux/uaccess.h>
18#include <linux/swap.h>
19#include <linux/highmem.h>
20#include <linux/pagemap.h>
21#include <linux/seq_file.h>
22#include <linux/miscdevice.h>
23
24#include <asm/pgalloc.h>
25#include <asm/pgtable.h>
26#include <asm/tlb.h>
27#include <asm/xen/hypervisor.h>
28#include <asm/xen/hypercall.h>
29
30#include <xen/xen.h>
31#include <xen/privcmd.h>
32#include <xen/interface/xen.h>
33#include <xen/features.h>
34#include <xen/page.h>
35#include <xen/xen-ops.h>
36#include <xen/balloon.h>
37
38#include "privcmd.h"
39
40MODULE_LICENSE("GPL");
41
42#define PRIV_VMA_LOCKED ((void *)1)
43
44#ifndef HAVE_ARCH_PRIVCMD_MMAP
45static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
46#endif
47
48static long privcmd_ioctl_hypercall(void __user *udata)
49{
50 struct privcmd_hypercall hypercall;
51 long ret;
52
53 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
54 return -EFAULT;
55
56 ret = privcmd_call(hypercall.op,
57 hypercall.arg[0], hypercall.arg[1],
58 hypercall.arg[2], hypercall.arg[3],
59 hypercall.arg[4]);
60
61 return ret;
62}
63
64static void free_page_list(struct list_head *pages)
65{
66 struct page *p, *n;
67
68 list_for_each_entry_safe(p, n, pages, lru)
69 __free_page(p);
70
71 INIT_LIST_HEAD(pages);
72}
73
74/*
75 * Given an array of items in userspace, return a list of pages
76 * containing the data. If copying fails, either because of memory
77 * allocation failure or a problem reading user memory, return an
78 * error code; its up to the caller to dispose of any partial list.
79 */
80static int gather_array(struct list_head *pagelist,
81 unsigned nelem, size_t size,
82 const void __user *data)
83{
84 unsigned pageidx;
85 void *pagedata;
86 int ret;
87
88 if (size > PAGE_SIZE)
89 return 0;
90
91 pageidx = PAGE_SIZE;
92 pagedata = NULL; /* quiet, gcc */
93 while (nelem--) {
94 if (pageidx > PAGE_SIZE-size) {
95 struct page *page = alloc_page(GFP_KERNEL);
96
97 ret = -ENOMEM;
98 if (page == NULL)
99 goto fail;
100
101 pagedata = page_address(page);
102
103 list_add_tail(&page->lru, pagelist);
104 pageidx = 0;
105 }
106
107 ret = -EFAULT;
108 if (copy_from_user(pagedata + pageidx, data, size))
109 goto fail;
110
111 data += size;
112 pageidx += size;
113 }
114
115 ret = 0;
116
117fail:
118 return ret;
119}
120
121/*
122 * Call function "fn" on each element of the array fragmented
123 * over a list of pages.
124 */
125static int traverse_pages(unsigned nelem, size_t size,
126 struct list_head *pos,
127 int (*fn)(void *data, void *state),
128 void *state)
129{
130 void *pagedata;
131 unsigned pageidx;
132 int ret = 0;
133
134 BUG_ON(size > PAGE_SIZE);
135
136 pageidx = PAGE_SIZE;
137 pagedata = NULL; /* hush, gcc */
138
139 while (nelem--) {
140 if (pageidx > PAGE_SIZE-size) {
141 struct page *page;
142 pos = pos->next;
143 page = list_entry(pos, struct page, lru);
144 pagedata = page_address(page);
145 pageidx = 0;
146 }
147
148 ret = (*fn)(pagedata + pageidx, state);
149 if (ret)
150 break;
151 pageidx += size;
152 }
153
154 return ret;
155}
156
157struct mmap_mfn_state {
158 unsigned long va;
159 struct vm_area_struct *vma;
160 domid_t domain;
161};
162
163static int mmap_mfn_range(void *data, void *state)
164{
165 struct privcmd_mmap_entry *msg = data;
166 struct mmap_mfn_state *st = state;
167 struct vm_area_struct *vma = st->vma;
168 int rc;
169
170 /* Do not allow range to wrap the address space. */
171 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
172 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
173 return -EINVAL;
174
175 /* Range chunks must be contiguous in va space. */
176 if ((msg->va != st->va) ||
177 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
178 return -EINVAL;
179
180 rc = xen_remap_domain_mfn_range(vma,
181 msg->va & PAGE_MASK,
182 msg->mfn, msg->npages,
183 vma->vm_page_prot,
184 st->domain, NULL);
185 if (rc < 0)
186 return rc;
187
188 st->va += msg->npages << PAGE_SHIFT;
189
190 return 0;
191}
192
193static long privcmd_ioctl_mmap(void __user *udata)
194{
195 struct privcmd_mmap mmapcmd;
196 struct mm_struct *mm = current->mm;
197 struct vm_area_struct *vma;
198 int rc;
199 LIST_HEAD(pagelist);
200 struct mmap_mfn_state state;
201
202 if (!xen_initial_domain())
203 return -EPERM;
204
205 /* We only support privcmd_ioctl_mmap_batch for auto translated. */
206 if (xen_feature(XENFEAT_auto_translated_physmap))
207 return -ENOSYS;
208
209 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
210 return -EFAULT;
211
212 rc = gather_array(&pagelist,
213 mmapcmd.num, sizeof(struct privcmd_mmap_entry),
214 mmapcmd.entry);
215
216 if (rc || list_empty(&pagelist))
217 goto out;
218
219 down_write(&mm->mmap_sem);
220
221 {
222 struct page *page = list_first_entry(&pagelist,
223 struct page, lru);
224 struct privcmd_mmap_entry *msg = page_address(page);
225
226 vma = find_vma(mm, msg->va);
227 rc = -EINVAL;
228
229 if (!vma || (msg->va != vma->vm_start) ||
230 !privcmd_enforce_singleshot_mapping(vma))
231 goto out_up;
232 }
233
234 state.va = vma->vm_start;
235 state.vma = vma;
236 state.domain = mmapcmd.dom;
237
238 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
239 &pagelist,
240 mmap_mfn_range, &state);
241
242
243out_up:
244 up_write(&mm->mmap_sem);
245
246out:
247 free_page_list(&pagelist);
248
249 return rc;
250}
251
252struct mmap_batch_state {
253 domid_t domain;
254 unsigned long va;
255 struct vm_area_struct *vma;
256 int index;
257 /* A tristate:
258 * 0 for no errors
259 * 1 if at least one error has happened (and no
260 * -ENOENT errors have happened)
261 * -ENOENT if at least 1 -ENOENT has happened.
262 */
263 int global_error;
264 /* An array for individual errors */
265 int *err;
266
267 /* User-space mfn array to store errors in the second pass for V1. */
268 xen_pfn_t __user *user_mfn;
269};
270
271/* auto translated dom0 note: if domU being created is PV, then mfn is
272 * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
273 */
274static int mmap_batch_fn(void *data, void *state)
275{
276 xen_pfn_t *mfnp = data;
277 struct mmap_batch_state *st = state;
278 struct vm_area_struct *vma = st->vma;
279 struct page **pages = vma->vm_private_data;
280 struct page *cur_page = NULL;
281 int ret;
282
283 if (xen_feature(XENFEAT_auto_translated_physmap))
284 cur_page = pages[st->index++];
285
286 ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
287 st->vma->vm_page_prot, st->domain,
288 &cur_page);
289
290 /* Store error code for second pass. */
291 *(st->err++) = ret;
292
293 /* And see if it affects the global_error. */
294 if (ret < 0) {
295 if (ret == -ENOENT)
296 st->global_error = -ENOENT;
297 else {
298 /* Record that at least one error has happened. */
299 if (st->global_error == 0)
300 st->global_error = 1;
301 }
302 }
303 st->va += PAGE_SIZE;
304
305 return 0;
306}
307
308static int mmap_return_errors_v1(void *data, void *state)
309{
310 xen_pfn_t *mfnp = data;
311 struct mmap_batch_state *st = state;
312 int err = *(st->err++);
313
314 /*
315 * V1 encodes the error codes in the 32bit top nibble of the
316 * mfn (with its known limitations vis-a-vis 64 bit callers).
317 */
318 *mfnp |= (err == -ENOENT) ?
319 PRIVCMD_MMAPBATCH_PAGED_ERROR :
320 PRIVCMD_MMAPBATCH_MFN_ERROR;
321 return __put_user(*mfnp, st->user_mfn++);
322}
323
324/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
325 * the vma with the page info to use later.
326 * Returns: 0 if success, otherwise -errno
327 */
328static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
329{
330 int rc;
331 struct page **pages;
332
333 pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
334 if (pages == NULL)
335 return -ENOMEM;
336
337 rc = alloc_xenballooned_pages(numpgs, pages, 0);
338 if (rc != 0) {
339 pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
340 numpgs, rc);
341 kfree(pages);
342 return -ENOMEM;
343 }
344 BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
345 vma->vm_private_data = pages;
346
347 return 0;
348}
349
350static struct vm_operations_struct privcmd_vm_ops;
351
352static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
353{
354 int ret;
355 struct privcmd_mmapbatch_v2 m;
356 struct mm_struct *mm = current->mm;
357 struct vm_area_struct *vma;
358 unsigned long nr_pages;
359 LIST_HEAD(pagelist);
360 int *err_array = NULL;
361 struct mmap_batch_state state;
362
363 if (!xen_initial_domain())
364 return -EPERM;
365
366 switch (version) {
367 case 1:
368 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
369 return -EFAULT;
370 /* Returns per-frame error in m.arr. */
371 m.err = NULL;
372 if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
373 return -EFAULT;
374 break;
375 case 2:
376 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
377 return -EFAULT;
378 /* Returns per-frame error code in m.err. */
379 if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
380 return -EFAULT;
381 break;
382 default:
383 return -EINVAL;
384 }
385
386 nr_pages = m.num;
387 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
388 return -EINVAL;
389
390 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
391
392 if (ret)
393 goto out;
394 if (list_empty(&pagelist)) {
395 ret = -EINVAL;
396 goto out;
397 }
398
399 err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
400 if (err_array == NULL) {
401 ret = -ENOMEM;
402 goto out;
403 }
404
405 down_write(&mm->mmap_sem);
406
407 vma = find_vma(mm, m.addr);
408 if (!vma ||
409 vma->vm_ops != &privcmd_vm_ops ||
410 (m.addr != vma->vm_start) ||
411 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
412 !privcmd_enforce_singleshot_mapping(vma)) {
413 up_write(&mm->mmap_sem);
414 ret = -EINVAL;
415 goto out;
416 }
417 if (xen_feature(XENFEAT_auto_translated_physmap)) {
418 ret = alloc_empty_pages(vma, m.num);
419 if (ret < 0) {
420 up_write(&mm->mmap_sem);
421 goto out;
422 }
423 }
424
425 state.domain = m.dom;
426 state.vma = vma;
427 state.va = m.addr;
428 state.index = 0;
429 state.global_error = 0;
430 state.err = err_array;
431
432 /* mmap_batch_fn guarantees ret == 0 */
433 BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
434 &pagelist, mmap_batch_fn, &state));
435
436 up_write(&mm->mmap_sem);
437
438 if (version == 1) {
439 if (state.global_error) {
440 /* Write back errors in second pass. */
441 state.user_mfn = (xen_pfn_t *)m.arr;
442 state.err = err_array;
443 ret = traverse_pages(m.num, sizeof(xen_pfn_t),
444 &pagelist, mmap_return_errors_v1, &state);
445 } else
446 ret = 0;
447
448 } else if (version == 2) {
449 ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
450 if (ret)
451 ret = -EFAULT;
452 }
453
454 /* If we have not had any EFAULT-like global errors then set the global
455 * error to -ENOENT if necessary. */
456 if ((ret == 0) && (state.global_error == -ENOENT))
457 ret = -ENOENT;
458
459out:
460 kfree(err_array);
461 free_page_list(&pagelist);
462
463 return ret;
464}
465
466static long privcmd_ioctl(struct file *file,
467 unsigned int cmd, unsigned long data)
468{
469 int ret = -ENOSYS;
470 void __user *udata = (void __user *) data;
471
472 switch (cmd) {
473 case IOCTL_PRIVCMD_HYPERCALL:
474 ret = privcmd_ioctl_hypercall(udata);
475 break;
476
477 case IOCTL_PRIVCMD_MMAP:
478 ret = privcmd_ioctl_mmap(udata);
479 break;
480
481 case IOCTL_PRIVCMD_MMAPBATCH:
482 ret = privcmd_ioctl_mmap_batch(udata, 1);
483 break;
484
485 case IOCTL_PRIVCMD_MMAPBATCH_V2:
486 ret = privcmd_ioctl_mmap_batch(udata, 2);
487 break;
488
489 default:
490 ret = -EINVAL;
491 break;
492 }
493
494 return ret;
495}
496
497static void privcmd_close(struct vm_area_struct *vma)
498{
499 struct page **pages = vma->vm_private_data;
500 int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
501
502 if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages))
503 return;
504
505 xen_unmap_domain_mfn_range(vma, numpgs, pages);
506 free_xenballooned_pages(numpgs, pages);
507 kfree(pages);
508}
509
510static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
511{
512 printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
513 vma, vma->vm_start, vma->vm_end,
514 vmf->pgoff, vmf->virtual_address);
515
516 return VM_FAULT_SIGBUS;
517}
518
519static struct vm_operations_struct privcmd_vm_ops = {
520 .close = privcmd_close,
521 .fault = privcmd_fault
522};
523
524static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
525{
526 /* DONTCOPY is essential for Xen because copy_page_range doesn't know
527 * how to recreate these mappings */
528 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
529 VM_DONTEXPAND | VM_DONTDUMP;
530 vma->vm_ops = &privcmd_vm_ops;
531 vma->vm_private_data = NULL;
532
533 return 0;
534}
535
536static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
537{
538 return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
539}
540
541const struct file_operations xen_privcmd_fops = {
542 .owner = THIS_MODULE,
543 .unlocked_ioctl = privcmd_ioctl,
544 .mmap = privcmd_mmap,
545};
546EXPORT_SYMBOL_GPL(xen_privcmd_fops);
547
548static struct miscdevice privcmd_dev = {
549 .minor = MISC_DYNAMIC_MINOR,
550 .name = "xen/privcmd",
551 .fops = &xen_privcmd_fops,
552};
553
554static int __init privcmd_init(void)
555{
556 int err;
557
558 if (!xen_domain())
559 return -ENODEV;
560
561 err = misc_register(&privcmd_dev);
562 if (err != 0) {
563 printk(KERN_ERR "Could not register Xen privcmd device\n");
564 return err;
565 }
566 return 0;
567}
568
569static void __exit privcmd_exit(void)
570{
571 misc_deregister(&privcmd_dev);
572}
573
574module_init(privcmd_init);
575module_exit(privcmd_exit);
diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
deleted file mode 100644
index 14facaeed36..00000000000
--- a/drivers/xen/privcmd.h
+++ /dev/null
@@ -1,3 +0,0 @@
1#include <linux/fs.h>
2
3extern const struct file_operations xen_privcmd_fops;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index af47e759446..84f317e0cc2 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -35,11 +35,9 @@
35 35
36#include <linux/bootmem.h> 36#include <linux/bootmem.h>
37#include <linux/dma-mapping.h> 37#include <linux/dma-mapping.h>
38#include <linux/export.h>
39#include <xen/swiotlb-xen.h> 38#include <xen/swiotlb-xen.h>
40#include <xen/page.h> 39#include <xen/page.h>
41#include <xen/xen-ops.h> 40#include <xen/xen-ops.h>
42#include <xen/hvc-console.h>
43/* 41/*
44 * Used to do a quick range check in swiotlb_tbl_unmap_single and 42 * Used to do a quick range check in swiotlb_tbl_unmap_single and
45 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this 43 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -52,7 +50,7 @@ static unsigned long xen_io_tlb_nslabs;
52 * Quick lookup value of the bus address of the IOTLB. 50 * Quick lookup value of the bus address of the IOTLB.
53 */ 51 */
54 52
55static u64 start_dma_addr; 53u64 start_dma_addr;
56 54
57static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) 55static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
58{ 56{
@@ -144,74 +142,30 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
144 } while (i < nslabs); 142 } while (i < nslabs);
145 return 0; 143 return 0;
146} 144}
147static unsigned long xen_set_nslabs(unsigned long nr_tbl) 145
146void __init xen_swiotlb_init(int verbose)
148{ 147{
149 if (!nr_tbl) { 148 unsigned long bytes;
149 int rc;
150 unsigned long nr_tbl;
151
152 nr_tbl = swioltb_nr_tbl();
153 if (nr_tbl)
154 xen_io_tlb_nslabs = nr_tbl;
155 else {
150 xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); 156 xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
151 xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); 157 xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
152 } else 158 }
153 xen_io_tlb_nslabs = nr_tbl;
154
155 return xen_io_tlb_nslabs << IO_TLB_SHIFT;
156}
157 159
158enum xen_swiotlb_err { 160 bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
159 XEN_SWIOTLB_UNKNOWN = 0,
160 XEN_SWIOTLB_ENOMEM,
161 XEN_SWIOTLB_EFIXUP
162};
163 161
164static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
165{
166 switch (err) {
167 case XEN_SWIOTLB_ENOMEM:
168 return "Cannot allocate Xen-SWIOTLB buffer\n";
169 case XEN_SWIOTLB_EFIXUP:
170 return "Failed to get contiguous memory for DMA from Xen!\n"\
171 "You either: don't have the permissions, do not have"\
172 " enough free memory under 4GB, or the hypervisor memory"\
173 " is too fragmented!";
174 default:
175 break;
176 }
177 return "";
178}
179int __ref xen_swiotlb_init(int verbose, bool early)
180{
181 unsigned long bytes, order;
182 int rc = -ENOMEM;
183 enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
184 unsigned int repeat = 3;
185
186 xen_io_tlb_nslabs = swiotlb_nr_tbl();
187retry:
188 bytes = xen_set_nslabs(xen_io_tlb_nslabs);
189 order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
190 /* 162 /*
191 * Get IO TLB memory from any location. 163 * Get IO TLB memory from any location.
192 */ 164 */
193 if (early) 165 xen_io_tlb_start = alloc_bootmem(bytes);
194 xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); 166 if (!xen_io_tlb_start)
195 else { 167 panic("Cannot allocate SWIOTLB buffer");
196#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) 168
197#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
198 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
199 xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order);
200 if (xen_io_tlb_start)
201 break;
202 order--;
203 }
204 if (order != get_order(bytes)) {
205 pr_warn("Warning: only able to allocate %ld MB "
206 "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
207 xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
208 bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
209 }
210 }
211 if (!xen_io_tlb_start) {
212 m_ret = XEN_SWIOTLB_ENOMEM;
213 goto error;
214 }
215 xen_io_tlb_end = xen_io_tlb_start + bytes; 169 xen_io_tlb_end = xen_io_tlb_start + bytes;
216 /* 170 /*
217 * And replace that memory with pages under 4GB. 171 * And replace that memory with pages under 4GB.
@@ -219,49 +173,27 @@ retry:
219 rc = xen_swiotlb_fixup(xen_io_tlb_start, 173 rc = xen_swiotlb_fixup(xen_io_tlb_start,
220 bytes, 174 bytes,
221 xen_io_tlb_nslabs); 175 xen_io_tlb_nslabs);
222 if (rc) { 176 if (rc)
223 if (early)
224 free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
225 else {
226 free_pages((unsigned long)xen_io_tlb_start, order);
227 xen_io_tlb_start = NULL;
228 }
229 m_ret = XEN_SWIOTLB_EFIXUP;
230 goto error; 177 goto error;
231 } 178
232 start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); 179 start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
233 if (early) { 180 swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
234 swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); 181
235 rc = 0; 182 return;
236 } else
237 rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
238 return rc;
239error: 183error:
240 if (repeat--) { 184 panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\
241 xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ 185 "We either don't have the permission or you do not have enough"\
242 (xen_io_tlb_nslabs >> 1)); 186 "free memory under 4GB!\n", rc);
243 printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n",
244 (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
245 goto retry;
246 }
247 pr_err("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
248 if (early)
249 panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
250 else
251 free_pages((unsigned long)xen_io_tlb_start, order);
252 return rc;
253} 187}
188
254void * 189void *
255xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 190xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
256 dma_addr_t *dma_handle, gfp_t flags, 191 dma_addr_t *dma_handle, gfp_t flags)
257 struct dma_attrs *attrs)
258{ 192{
259 void *ret; 193 void *ret;
260 int order = get_order(size); 194 int order = get_order(size);
261 u64 dma_mask = DMA_BIT_MASK(32); 195 u64 dma_mask = DMA_BIT_MASK(32);
262 unsigned long vstart; 196 unsigned long vstart;
263 phys_addr_t phys;
264 dma_addr_t dev_addr;
265 197
266 /* 198 /*
267 * Ignore region specifiers - the kernel's ideas of 199 * Ignore region specifiers - the kernel's ideas of
@@ -277,50 +209,32 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
277 vstart = __get_free_pages(flags, order); 209 vstart = __get_free_pages(flags, order);
278 ret = (void *)vstart; 210 ret = (void *)vstart;
279 211
280 if (!ret)
281 return ret;
282
283 if (hwdev && hwdev->coherent_dma_mask) 212 if (hwdev && hwdev->coherent_dma_mask)
284 dma_mask = dma_alloc_coherent_mask(hwdev, flags); 213 dma_mask = dma_alloc_coherent_mask(hwdev, flags);
285 214
286 phys = virt_to_phys(ret); 215 if (ret) {
287 dev_addr = xen_phys_to_bus(phys);
288 if (((dev_addr + size - 1 <= dma_mask)) &&
289 !range_straddles_page_boundary(phys, size))
290 *dma_handle = dev_addr;
291 else {
292 if (xen_create_contiguous_region(vstart, order, 216 if (xen_create_contiguous_region(vstart, order,
293 fls64(dma_mask)) != 0) { 217 fls64(dma_mask)) != 0) {
294 free_pages(vstart, order); 218 free_pages(vstart, order);
295 return NULL; 219 return NULL;
296 } 220 }
221 memset(ret, 0, size);
297 *dma_handle = virt_to_machine(ret).maddr; 222 *dma_handle = virt_to_machine(ret).maddr;
298 } 223 }
299 memset(ret, 0, size);
300 return ret; 224 return ret;
301} 225}
302EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); 226EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
303 227
304void 228void
305xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, 229xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
306 dma_addr_t dev_addr, struct dma_attrs *attrs) 230 dma_addr_t dev_addr)
307{ 231{
308 int order = get_order(size); 232 int order = get_order(size);
309 phys_addr_t phys;
310 u64 dma_mask = DMA_BIT_MASK(32);
311 233
312 if (dma_release_from_coherent(hwdev, order, vaddr)) 234 if (dma_release_from_coherent(hwdev, order, vaddr))
313 return; 235 return;
314 236
315 if (hwdev && hwdev->coherent_dma_mask) 237 xen_destroy_contiguous_region((unsigned long)vaddr, order);
316 dma_mask = hwdev->coherent_dma_mask;
317
318 phys = virt_to_phys(vaddr);
319
320 if (((dev_addr + size - 1 > dma_mask)) ||
321 range_straddles_page_boundary(phys, size))
322 xen_destroy_contiguous_region((unsigned long)vaddr, order);
323
324 free_pages((unsigned long)vaddr, order); 238 free_pages((unsigned long)vaddr, order);
325} 239}
326EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); 240EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
@@ -338,8 +252,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
338 enum dma_data_direction dir, 252 enum dma_data_direction dir,
339 struct dma_attrs *attrs) 253 struct dma_attrs *attrs)
340{ 254{
341 phys_addr_t map, phys = page_to_phys(page) + offset; 255 phys_addr_t phys = page_to_phys(page) + offset;
342 dma_addr_t dev_addr = xen_phys_to_bus(phys); 256 dma_addr_t dev_addr = xen_phys_to_bus(phys);
257 void *map;
343 258
344 BUG_ON(dir == DMA_NONE); 259 BUG_ON(dir == DMA_NONE);
345 /* 260 /*
@@ -355,10 +270,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
355 * Oh well, have to allocate and map a bounce buffer. 270 * Oh well, have to allocate and map a bounce buffer.
356 */ 271 */
357 map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir); 272 map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
358 if (map == SWIOTLB_MAP_ERROR) 273 if (!map)
359 return DMA_ERROR_CODE; 274 return DMA_ERROR_CODE;
360 275
361 dev_addr = xen_phys_to_bus(map); 276 dev_addr = xen_virt_to_bus(map);
362 277
363 /* 278 /*
364 * Ensure that the address returned is DMA'ble 279 * Ensure that the address returned is DMA'ble
@@ -388,7 +303,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
388 303
389 /* NOTE: We use dev_addr here, not paddr! */ 304 /* NOTE: We use dev_addr here, not paddr! */
390 if (is_xen_swiotlb_buffer(dev_addr)) { 305 if (is_xen_swiotlb_buffer(dev_addr)) {
391 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); 306 swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
392 return; 307 return;
393 } 308 }
394 309
@@ -433,7 +348,8 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
433 348
434 /* NOTE: We use dev_addr here, not paddr! */ 349 /* NOTE: We use dev_addr here, not paddr! */
435 if (is_xen_swiotlb_buffer(dev_addr)) { 350 if (is_xen_swiotlb_buffer(dev_addr)) {
436 swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); 351 swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
352 target);
437 return; 353 return;
438 } 354 }
439 355
@@ -492,12 +408,11 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
492 if (swiotlb_force || 408 if (swiotlb_force ||
493 !dma_capable(hwdev, dev_addr, sg->length) || 409 !dma_capable(hwdev, dev_addr, sg->length) ||
494 range_straddles_page_boundary(paddr, sg->length)) { 410 range_straddles_page_boundary(paddr, sg->length)) {
495 phys_addr_t map = swiotlb_tbl_map_single(hwdev, 411 void *map = swiotlb_tbl_map_single(hwdev,
496 start_dma_addr, 412 start_dma_addr,
497 sg_phys(sg), 413 sg_phys(sg),
498 sg->length, 414 sg->length, dir);
499 dir); 415 if (!map) {
500 if (map == SWIOTLB_MAP_ERROR) {
501 /* Don't panic here, we expect map_sg users 416 /* Don't panic here, we expect map_sg users
502 to do proper error handling. */ 417 to do proper error handling. */
503 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 418 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
@@ -505,7 +420,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
505 sgl[0].dma_length = 0; 420 sgl[0].dma_length = 0;
506 return DMA_ERROR_CODE; 421 return DMA_ERROR_CODE;
507 } 422 }
508 sg->dma_address = xen_phys_to_bus(map); 423 sg->dma_address = xen_virt_to_bus(map);
509 } else 424 } else
510 sg->dma_address = dev_addr; 425 sg->dma_address = dev_addr;
511 sg->dma_length = sg->length; 426 sg->dma_length = sg->length;
@@ -514,6 +429,14 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
514} 429}
515EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs); 430EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs);
516 431
432int
433xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
434 enum dma_data_direction dir)
435{
436 return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
437}
438EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg);
439
517/* 440/*
518 * Unmap a set of streaming mode DMA translations. Again, cpu read rules 441 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
519 * concerning calls here are the same as for swiotlb_unmap_page() above. 442 * concerning calls here are the same as for swiotlb_unmap_page() above.
@@ -534,6 +457,14 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
534} 457}
535EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); 458EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
536 459
460void
461xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
462 enum dma_data_direction dir)
463{
464 return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
465}
466EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg);
467
537/* 468/*
538 * Make physical memory consistent for a set of streaming mode DMA translations 469 * Make physical memory consistent for a set of streaming mode DMA translations
539 * after a transfer. 470 * after a transfer.
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index 96453f8a85c..1e0fe01eb67 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -11,7 +11,6 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/kobject.h> 13#include <linux/kobject.h>
14#include <linux/err.h>
15 14
16#include <asm/xen/hypervisor.h> 15#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h> 16#include <asm/xen/hypercall.h>
@@ -98,7 +97,7 @@ static struct attribute *version_attrs[] = {
98 NULL 97 NULL
99}; 98};
100 99
101static const struct attribute_group version_group = { 100static struct attribute_group version_group = {
102 .name = "version", 101 .name = "version",
103 .attrs = version_attrs, 102 .attrs = version_attrs,
104}; 103};
@@ -115,7 +114,7 @@ static void xen_sysfs_version_destroy(void)
115 114
116/* UUID */ 115/* UUID */
117 116
118static ssize_t uuid_show_fallback(struct hyp_sysfs_attr *attr, char *buffer) 117static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
119{ 118{
120 char *vm, *val; 119 char *vm, *val;
121 int ret; 120 int ret;
@@ -136,17 +135,6 @@ static ssize_t uuid_show_fallback(struct hyp_sysfs_attr *attr, char *buffer)
136 return ret; 135 return ret;
137} 136}
138 137
139static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
140{
141 xen_domain_handle_t uuid;
142 int ret;
143 ret = HYPERVISOR_xen_version(XENVER_guest_handle, uuid);
144 if (ret)
145 return uuid_show_fallback(attr, buffer);
146 ret = sprintf(buffer, "%pU\n", uuid);
147 return ret;
148}
149
150HYPERVISOR_ATTR_RO(uuid); 138HYPERVISOR_ATTR_RO(uuid);
151 139
152static int __init xen_sysfs_uuid_init(void) 140static int __init xen_sysfs_uuid_init(void)
@@ -222,7 +210,7 @@ static struct attribute *xen_compile_attrs[] = {
222 NULL 210 NULL
223}; 211};
224 212
225static const struct attribute_group xen_compilation_group = { 213static struct attribute_group xen_compilation_group = {
226 .name = "compilation", 214 .name = "compilation",
227 .attrs = xen_compile_attrs, 215 .attrs = xen_compile_attrs,
228}; 216};
@@ -285,8 +273,7 @@ static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
285 ret = HYPERVISOR_xen_version(XENVER_platform_parameters, 273 ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
286 parms); 274 parms);
287 if (!ret) 275 if (!ret)
288 ret = sprintf(buffer, "%"PRI_xen_ulong"\n", 276 ret = sprintf(buffer, "%lx\n", parms->virt_start);
289 parms->virt_start);
290 kfree(parms); 277 kfree(parms);
291 } 278 }
292 279
@@ -353,7 +340,7 @@ static struct attribute *xen_properties_attrs[] = {
353 NULL 340 NULL
354}; 341};
355 342
356static const struct attribute_group xen_properties_group = { 343static struct attribute_group xen_properties_group = {
357 .name = "properties", 344 .name = "properties",
358 .attrs = xen_properties_attrs, 345 .attrs = xen_properties_attrs,
359}; 346};
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 144564e5eb2..d369965e8f8 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -9,6 +9,7 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/module.h>
12#include <linux/cleancache.h> 13#include <linux/cleancache.h>
13 14
14/* temporary ifdef until include/linux/frontswap.h is upstream */ 15/* temporary ifdef until include/linux/frontswap.h is upstream */
@@ -21,7 +22,6 @@
21#include <asm/xen/hypercall.h> 22#include <asm/xen/hypercall.h>
22#include <asm/xen/page.h> 23#include <asm/xen/page.h>
23#include <asm/xen/hypervisor.h> 24#include <asm/xen/hypervisor.h>
24#include <xen/tmem.h>
25 25
26#define TMEM_CONTROL 0 26#define TMEM_CONTROL 0
27#define TMEM_NEW_POOL 1 27#define TMEM_NEW_POOL 1
@@ -128,13 +128,15 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
128 return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); 128 return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
129} 129}
130 130
131bool __read_mostly tmem_enabled = false; 131int tmem_enabled __read_mostly;
132EXPORT_SYMBOL(tmem_enabled);
132 133
133static int __init enable_tmem(char *s) 134static int __init enable_tmem(char *s)
134{ 135{
135 tmem_enabled = true; 136 tmem_enabled = 1;
136 return 1; 137 return 1;
137} 138}
139
138__setup("tmem", enable_tmem); 140__setup("tmem", enable_tmem);
139 141
140#ifdef CONFIG_CLEANCACHE 142#ifdef CONFIG_CLEANCACHE
@@ -227,21 +229,22 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
227 return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); 229 return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
228} 230}
229 231
230static bool __initdata use_cleancache = true; 232static int use_cleancache = 1;
231 233
232static int __init no_cleancache(char *s) 234static int __init no_cleancache(char *s)
233{ 235{
234 use_cleancache = false; 236 use_cleancache = 0;
235 return 1; 237 return 1;
236} 238}
239
237__setup("nocleancache", no_cleancache); 240__setup("nocleancache", no_cleancache);
238 241
239static struct cleancache_ops __initdata tmem_cleancache_ops = { 242static struct cleancache_ops tmem_cleancache_ops = {
240 .put_page = tmem_cleancache_put_page, 243 .put_page = tmem_cleancache_put_page,
241 .get_page = tmem_cleancache_get_page, 244 .get_page = tmem_cleancache_get_page,
242 .invalidate_page = tmem_cleancache_flush_page, 245 .flush_page = tmem_cleancache_flush_page,
243 .invalidate_inode = tmem_cleancache_flush_inode, 246 .flush_inode = tmem_cleancache_flush_inode,
244 .invalidate_fs = tmem_cleancache_flush_fs, 247 .flush_fs = tmem_cleancache_flush_fs,
245 .init_shared_fs = tmem_cleancache_init_shared_fs, 248 .init_shared_fs = tmem_cleancache_init_shared_fs,
246 .init_fs = tmem_cleancache_init_fs 249 .init_fs = tmem_cleancache_init_fs
247}; 250};
@@ -270,7 +273,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
270} 273}
271 274
272/* returns 0 if the page was successfully put into frontswap, -1 if not */ 275/* returns 0 if the page was successfully put into frontswap, -1 if not */
273static int tmem_frontswap_store(unsigned type, pgoff_t offset, 276static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
274 struct page *page) 277 struct page *page)
275{ 278{
276 u64 ind64 = (u64)offset; 279 u64 ind64 = (u64)offset;
@@ -296,7 +299,7 @@ static int tmem_frontswap_store(unsigned type, pgoff_t offset,
296 * returns 0 if the page was successfully gotten from frontswap, -1 if 299 * returns 0 if the page was successfully gotten from frontswap, -1 if
297 * was not present (should never happen!) 300 * was not present (should never happen!)
298 */ 301 */
299static int tmem_frontswap_load(unsigned type, pgoff_t offset, 302static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
300 struct page *page) 303 struct page *page)
301{ 304{
302 u64 ind64 = (u64)offset; 305 u64 ind64 = (u64)offset;
@@ -353,20 +356,21 @@ static void tmem_frontswap_init(unsigned ignored)
353 xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); 356 xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
354} 357}
355 358
356static bool __initdata use_frontswap = true; 359static int __initdata use_frontswap = 1;
357 360
358static int __init no_frontswap(char *s) 361static int __init no_frontswap(char *s)
359{ 362{
360 use_frontswap = false; 363 use_frontswap = 0;
361 return 1; 364 return 1;
362} 365}
366
363__setup("nofrontswap", no_frontswap); 367__setup("nofrontswap", no_frontswap);
364 368
365static struct frontswap_ops __initdata tmem_frontswap_ops = { 369static struct frontswap_ops tmem_frontswap_ops = {
366 .store = tmem_frontswap_store, 370 .put_page = tmem_frontswap_put_page,
367 .load = tmem_frontswap_load, 371 .get_page = tmem_frontswap_get_page,
368 .invalidate_page = tmem_frontswap_flush_page, 372 .flush_page = tmem_frontswap_flush_page,
369 .invalidate_area = tmem_frontswap_flush_area, 373 .flush_area = tmem_frontswap_flush_area,
370 .init = tmem_frontswap_init 374 .init = tmem_frontswap_init
371}; 375};
372#endif 376#endif
diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c
deleted file mode 100644
index da39191e727..00000000000
--- a/drivers/xen/xen-acpi-pad.c
+++ /dev/null
@@ -1,182 +0,0 @@
1/*
2 * xen-acpi-pad.c - Xen pad interface
3 *
4 * Copyright (c) 2012, Intel Corporation.
5 * Author: Liu, Jinsong <jinsong.liu@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 */
16
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <acpi/acpi_bus.h>
20#include <acpi/acpi_drivers.h>
21#include <asm/xen/hypercall.h>
22#include <xen/interface/version.h>
23#include <xen/xen-ops.h>
24
25#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad"
26#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
27#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
28static DEFINE_MUTEX(xen_cpu_lock);
29
30static int xen_acpi_pad_idle_cpus(unsigned int idle_nums)
31{
32 struct xen_platform_op op;
33
34 op.cmd = XENPF_core_parking;
35 op.u.core_parking.type = XEN_CORE_PARKING_SET;
36 op.u.core_parking.idle_nums = idle_nums;
37
38 return HYPERVISOR_dom0_op(&op);
39}
40
41static int xen_acpi_pad_idle_cpus_num(void)
42{
43 struct xen_platform_op op;
44
45 op.cmd = XENPF_core_parking;
46 op.u.core_parking.type = XEN_CORE_PARKING_GET;
47
48 return HYPERVISOR_dom0_op(&op)
49 ?: op.u.core_parking.idle_nums;
50}
51
52/*
53 * Query firmware how many CPUs should be idle
54 * return -1 on failure
55 */
56static int acpi_pad_pur(acpi_handle handle)
57{
58 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
59 union acpi_object *package;
60 int num = -1;
61
62 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer)))
63 return num;
64
65 if (!buffer.length || !buffer.pointer)
66 return num;
67
68 package = buffer.pointer;
69
70 if (package->type == ACPI_TYPE_PACKAGE &&
71 package->package.count == 2 &&
72 package->package.elements[0].integer.value == 1) /* rev 1 */
73 num = package->package.elements[1].integer.value;
74
75 kfree(buffer.pointer);
76 return num;
77}
78
79/* Notify firmware how many CPUs are idle */
80static void acpi_pad_ost(acpi_handle handle, int stat,
81 uint32_t idle_nums)
82{
83 union acpi_object params[3] = {
84 {.type = ACPI_TYPE_INTEGER,},
85 {.type = ACPI_TYPE_INTEGER,},
86 {.type = ACPI_TYPE_BUFFER,},
87 };
88 struct acpi_object_list arg_list = {3, params};
89
90 params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY;
91 params[1].integer.value = stat;
92 params[2].buffer.length = 4;
93 params[2].buffer.pointer = (void *)&idle_nums;
94 acpi_evaluate_object(handle, "_OST", &arg_list, NULL);
95}
96
97static void acpi_pad_handle_notify(acpi_handle handle)
98{
99 int idle_nums;
100
101 mutex_lock(&xen_cpu_lock);
102 idle_nums = acpi_pad_pur(handle);
103 if (idle_nums < 0) {
104 mutex_unlock(&xen_cpu_lock);
105 return;
106 }
107
108 idle_nums = xen_acpi_pad_idle_cpus(idle_nums)
109 ?: xen_acpi_pad_idle_cpus_num();
110 if (idle_nums >= 0)
111 acpi_pad_ost(handle, 0, idle_nums);
112 mutex_unlock(&xen_cpu_lock);
113}
114
115static void acpi_pad_notify(acpi_handle handle, u32 event,
116 void *data)
117{
118 switch (event) {
119 case ACPI_PROCESSOR_AGGREGATOR_NOTIFY:
120 acpi_pad_handle_notify(handle);
121 break;
122 default:
123 pr_warn("Unsupported event [0x%x]\n", event);
124 break;
125 }
126}
127
128static int acpi_pad_add(struct acpi_device *device)
129{
130 acpi_status status;
131
132 strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME);
133 strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS);
134
135 status = acpi_install_notify_handler(device->handle,
136 ACPI_DEVICE_NOTIFY, acpi_pad_notify, device);
137 if (ACPI_FAILURE(status))
138 return -ENODEV;
139
140 return 0;
141}
142
143static int acpi_pad_remove(struct acpi_device *device,
144 int type)
145{
146 mutex_lock(&xen_cpu_lock);
147 xen_acpi_pad_idle_cpus(0);
148 mutex_unlock(&xen_cpu_lock);
149
150 acpi_remove_notify_handler(device->handle,
151 ACPI_DEVICE_NOTIFY, acpi_pad_notify);
152 return 0;
153}
154
155static const struct acpi_device_id pad_device_ids[] = {
156 {"ACPI000C", 0},
157 {"", 0},
158};
159
160static struct acpi_driver acpi_pad_driver = {
161 .name = "processor_aggregator",
162 .class = ACPI_PROCESSOR_AGGREGATOR_CLASS,
163 .ids = pad_device_ids,
164 .ops = {
165 .add = acpi_pad_add,
166 .remove = acpi_pad_remove,
167 },
168};
169
170static int __init xen_acpi_pad_init(void)
171{
172 /* Only DOM0 is responsible for Xen acpi pad */
173 if (!xen_initial_domain())
174 return -ENODEV;
175
176 /* Only Xen4.2 or later support Xen acpi pad */
177 if (!xen_running_on_version_or_later(4, 2))
178 return -ENODEV;
179
180 return acpi_bus_register_driver(&acpi_pad_driver);
181}
182subsys_initcall(xen_acpi_pad_init);
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
deleted file mode 100644
index 316df65163c..00000000000
--- a/drivers/xen/xen-acpi-processor.c
+++ /dev/null
@@ -1,568 +0,0 @@
1/*
2 * Copyright 2012 by Oracle Inc
3 * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
4 *
5 * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249
6 * so many thanks go to Kevin Tian <kevin.tian@intel.com>
7 * and Yu Ke <ke.yu@intel.com>.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 */
19
20#include <linux/cpumask.h>
21#include <linux/cpufreq.h>
22#include <linux/freezer.h>
23#include <linux/kernel.h>
24#include <linux/kthread.h>
25#include <linux/init.h>
26#include <linux/module.h>
27#include <linux/types.h>
28#include <acpi/acpi_bus.h>
29#include <acpi/acpi_drivers.h>
30#include <acpi/processor.h>
31
32#include <xen/xen.h>
33#include <xen/interface/platform.h>
34#include <asm/xen/hypercall.h>
35
36#define DRV_NAME "xen-acpi-processor: "
37
38static int no_hypercall;
39MODULE_PARM_DESC(off, "Inhibit the hypercall.");
40module_param_named(off, no_hypercall, int, 0400);
41
42/*
43 * Note: Do not convert the acpi_id* below to cpumask_var_t or use cpumask_bit
44 * - as those shrink to nr_cpu_bits (which is dependent on possible_cpu), which
45 * can be less than what we want to put in. Instead use the 'nr_acpi_bits'
46 * which is dynamically computed based on the MADT or x2APIC table.
47 */
48static unsigned int nr_acpi_bits;
49/* Mutex to protect the acpi_ids_done - for CPU hotplug use. */
50static DEFINE_MUTEX(acpi_ids_mutex);
51/* Which ACPI ID we have processed from 'struct acpi_processor'. */
52static unsigned long *acpi_ids_done;
53/* Which ACPI ID exist in the SSDT/DSDT processor definitions. */
54static unsigned long __initdata *acpi_id_present;
55/* And if there is an _CST definition (or a PBLK) for the ACPI IDs */
56static unsigned long __initdata *acpi_id_cst_present;
57
58static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
59{
60 struct xen_platform_op op = {
61 .cmd = XENPF_set_processor_pminfo,
62 .interface_version = XENPF_INTERFACE_VERSION,
63 .u.set_pminfo.id = _pr->acpi_id,
64 .u.set_pminfo.type = XEN_PM_CX,
65 };
66 struct xen_processor_cx *dst_cx, *dst_cx_states = NULL;
67 struct acpi_processor_cx *cx;
68 unsigned int i, ok;
69 int ret = 0;
70
71 dst_cx_states = kcalloc(_pr->power.count,
72 sizeof(struct xen_processor_cx), GFP_KERNEL);
73 if (!dst_cx_states)
74 return -ENOMEM;
75
76 for (ok = 0, i = 1; i <= _pr->power.count; i++) {
77 cx = &_pr->power.states[i];
78 if (!cx->valid)
79 continue;
80
81 dst_cx = &(dst_cx_states[ok++]);
82
83 dst_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
84 if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
85 dst_cx->reg.bit_width = 8;
86 dst_cx->reg.bit_offset = 0;
87 dst_cx->reg.access_size = 1;
88 } else {
89 dst_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE;
90 if (cx->entry_method == ACPI_CSTATE_FFH) {
91 /* NATIVE_CSTATE_BEYOND_HALT */
92 dst_cx->reg.bit_offset = 2;
93 dst_cx->reg.bit_width = 1; /* VENDOR_INTEL */
94 }
95 dst_cx->reg.access_size = 0;
96 }
97 dst_cx->reg.address = cx->address;
98
99 dst_cx->type = cx->type;
100 dst_cx->latency = cx->latency;
101
102 dst_cx->dpcnt = 0;
103 set_xen_guest_handle(dst_cx->dp, NULL);
104 }
105 if (!ok) {
106 pr_debug(DRV_NAME "No _Cx for ACPI CPU %u\n", _pr->acpi_id);
107 kfree(dst_cx_states);
108 return -EINVAL;
109 }
110 op.u.set_pminfo.power.count = ok;
111 op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control;
112 op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check;
113 op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst;
114 op.u.set_pminfo.power.flags.power_setup_done =
115 _pr->flags.power_setup_done;
116
117 set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states);
118
119 if (!no_hypercall)
120 ret = HYPERVISOR_dom0_op(&op);
121
122 if (!ret) {
123 pr_debug("ACPI CPU%u - C-states uploaded.\n", _pr->acpi_id);
124 for (i = 1; i <= _pr->power.count; i++) {
125 cx = &_pr->power.states[i];
126 if (!cx->valid)
127 continue;
128 pr_debug(" C%d: %s %d uS\n",
129 cx->type, cx->desc, (u32)cx->latency);
130 }
131 } else if (ret != -EINVAL)
132 /* EINVAL means the ACPI ID is incorrect - meaning the ACPI
133 * table is referencing a non-existing CPU - which can happen
134 * with broken ACPI tables. */
135 pr_err(DRV_NAME "(CX): Hypervisor error (%d) for ACPI CPU%u\n",
136 ret, _pr->acpi_id);
137
138 kfree(dst_cx_states);
139
140 return ret;
141}
142static struct xen_processor_px *
143xen_copy_pss_data(struct acpi_processor *_pr,
144 struct xen_processor_performance *dst_perf)
145{
146 struct xen_processor_px *dst_states = NULL;
147 unsigned int i;
148
149 BUILD_BUG_ON(sizeof(struct xen_processor_px) !=
150 sizeof(struct acpi_processor_px));
151
152 dst_states = kcalloc(_pr->performance->state_count,
153 sizeof(struct xen_processor_px), GFP_KERNEL);
154 if (!dst_states)
155 return ERR_PTR(-ENOMEM);
156
157 dst_perf->state_count = _pr->performance->state_count;
158 for (i = 0; i < _pr->performance->state_count; i++) {
159 /* Fortunatly for us, they are both the same size */
160 memcpy(&(dst_states[i]), &(_pr->performance->states[i]),
161 sizeof(struct acpi_processor_px));
162 }
163 return dst_states;
164}
165static int xen_copy_psd_data(struct acpi_processor *_pr,
166 struct xen_processor_performance *dst)
167{
168 struct acpi_psd_package *pdomain;
169
170 BUILD_BUG_ON(sizeof(struct xen_psd_package) !=
171 sizeof(struct acpi_psd_package));
172
173 /* This information is enumerated only if acpi_processor_preregister_performance
174 * has been called.
175 */
176 dst->shared_type = _pr->performance->shared_type;
177
178 pdomain = &(_pr->performance->domain_info);
179
180 /* 'acpi_processor_preregister_performance' does not parse if the
181 * num_processors <= 1, but Xen still requires it. Do it manually here.
182 */
183 if (pdomain->num_processors <= 1) {
184 if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)
185 dst->shared_type = CPUFREQ_SHARED_TYPE_ALL;
186 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)
187 dst->shared_type = CPUFREQ_SHARED_TYPE_HW;
188 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)
189 dst->shared_type = CPUFREQ_SHARED_TYPE_ANY;
190
191 }
192 memcpy(&(dst->domain_info), pdomain, sizeof(struct acpi_psd_package));
193 return 0;
194}
195static int xen_copy_pct_data(struct acpi_pct_register *pct,
196 struct xen_pct_register *dst_pct)
197{
198 /* It would be nice if you could just do 'memcpy(pct, dst_pct') but
199 * sadly the Xen structure did not have the proper padding so the
200 * descriptor field takes two (dst_pct) bytes instead of one (pct).
201 */
202 dst_pct->descriptor = pct->descriptor;
203 dst_pct->length = pct->length;
204 dst_pct->space_id = pct->space_id;
205 dst_pct->bit_width = pct->bit_width;
206 dst_pct->bit_offset = pct->bit_offset;
207 dst_pct->reserved = pct->reserved;
208 dst_pct->address = pct->address;
209 return 0;
210}
211static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
212{
213 int ret = 0;
214 struct xen_platform_op op = {
215 .cmd = XENPF_set_processor_pminfo,
216 .interface_version = XENPF_INTERFACE_VERSION,
217 .u.set_pminfo.id = _pr->acpi_id,
218 .u.set_pminfo.type = XEN_PM_PX,
219 };
220 struct xen_processor_performance *dst_perf;
221 struct xen_processor_px *dst_states = NULL;
222
223 dst_perf = &op.u.set_pminfo.perf;
224
225 dst_perf->platform_limit = _pr->performance_platform_limit;
226 dst_perf->flags |= XEN_PX_PPC;
227 xen_copy_pct_data(&(_pr->performance->control_register),
228 &dst_perf->control_register);
229 xen_copy_pct_data(&(_pr->performance->status_register),
230 &dst_perf->status_register);
231 dst_perf->flags |= XEN_PX_PCT;
232 dst_states = xen_copy_pss_data(_pr, dst_perf);
233 if (!IS_ERR_OR_NULL(dst_states)) {
234 set_xen_guest_handle(dst_perf->states, dst_states);
235 dst_perf->flags |= XEN_PX_PSS;
236 }
237 if (!xen_copy_psd_data(_pr, dst_perf))
238 dst_perf->flags |= XEN_PX_PSD;
239
240 if (dst_perf->flags != (XEN_PX_PSD | XEN_PX_PSS | XEN_PX_PCT | XEN_PX_PPC)) {
241 pr_warn(DRV_NAME "ACPI CPU%u missing some P-state data (%x), skipping.\n",
242 _pr->acpi_id, dst_perf->flags);
243 ret = -ENODEV;
244 goto err_free;
245 }
246
247 if (!no_hypercall)
248 ret = HYPERVISOR_dom0_op(&op);
249
250 if (!ret) {
251 struct acpi_processor_performance *perf;
252 unsigned int i;
253
254 perf = _pr->performance;
255 pr_debug("ACPI CPU%u - P-states uploaded.\n", _pr->acpi_id);
256 for (i = 0; i < perf->state_count; i++) {
257 pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n",
258 (i == perf->state ? '*' : ' '), i,
259 (u32) perf->states[i].core_frequency,
260 (u32) perf->states[i].power,
261 (u32) perf->states[i].transition_latency);
262 }
263 } else if (ret != -EINVAL)
264 /* EINVAL means the ACPI ID is incorrect - meaning the ACPI
265 * table is referencing a non-existing CPU - which can happen
266 * with broken ACPI tables. */
267 pr_warn(DRV_NAME "(_PXX): Hypervisor error (%d) for ACPI CPU%u\n",
268 ret, _pr->acpi_id);
269err_free:
270 if (!IS_ERR_OR_NULL(dst_states))
271 kfree(dst_states);
272
273 return ret;
274}
275static int upload_pm_data(struct acpi_processor *_pr)
276{
277 int err = 0;
278
279 mutex_lock(&acpi_ids_mutex);
280 if (__test_and_set_bit(_pr->acpi_id, acpi_ids_done)) {
281 mutex_unlock(&acpi_ids_mutex);
282 return -EBUSY;
283 }
284 if (_pr->flags.power)
285 err = push_cxx_to_hypervisor(_pr);
286
287 if (_pr->performance && _pr->performance->states)
288 err |= push_pxx_to_hypervisor(_pr);
289
290 mutex_unlock(&acpi_ids_mutex);
291 return err;
292}
293static unsigned int __init get_max_acpi_id(void)
294{
295 struct xenpf_pcpuinfo *info;
296 struct xen_platform_op op = {
297 .cmd = XENPF_get_cpuinfo,
298 .interface_version = XENPF_INTERFACE_VERSION,
299 };
300 int ret = 0;
301 unsigned int i, last_cpu, max_acpi_id = 0;
302
303 info = &op.u.pcpu_info;
304 info->xen_cpuid = 0;
305
306 ret = HYPERVISOR_dom0_op(&op);
307 if (ret)
308 return NR_CPUS;
309
310 /* The max_present is the same irregardless of the xen_cpuid */
311 last_cpu = op.u.pcpu_info.max_present;
312 for (i = 0; i <= last_cpu; i++) {
313 info->xen_cpuid = i;
314 ret = HYPERVISOR_dom0_op(&op);
315 if (ret)
316 continue;
317 max_acpi_id = max(info->acpi_id, max_acpi_id);
318 }
319 max_acpi_id *= 2; /* Slack for CPU hotplug support. */
320 pr_debug(DRV_NAME "Max ACPI ID: %u\n", max_acpi_id);
321 return max_acpi_id;
322}
323/*
324 * The read_acpi_id and check_acpi_ids are there to support the Xen
325 * oddity of virtual CPUs != physical CPUs in the initial domain.
326 * The user can supply 'xen_max_vcpus=X' on the Xen hypervisor line
327 * which will band the amount of CPUs the initial domain can see.
328 * In general that is OK, except it plays havoc with any of the
329 * for_each_[present|online]_cpu macros which are banded to the virtual
330 * CPU amount.
331 */
332static acpi_status __init
333read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
334{
335 u32 acpi_id;
336 acpi_status status;
337 acpi_object_type acpi_type;
338 unsigned long long tmp;
339 union acpi_object object = { 0 };
340 struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
341 acpi_io_address pblk = 0;
342
343 status = acpi_get_type(handle, &acpi_type);
344 if (ACPI_FAILURE(status))
345 return AE_OK;
346
347 switch (acpi_type) {
348 case ACPI_TYPE_PROCESSOR:
349 status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
350 if (ACPI_FAILURE(status))
351 return AE_OK;
352 acpi_id = object.processor.proc_id;
353 pblk = object.processor.pblk_address;
354 break;
355 case ACPI_TYPE_DEVICE:
356 status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
357 if (ACPI_FAILURE(status))
358 return AE_OK;
359 acpi_id = tmp;
360 break;
361 default:
362 return AE_OK;
363 }
364 /* There are more ACPI Processor objects than in x2APIC or MADT.
365 * This can happen with incorrect ACPI SSDT declerations. */
366 if (acpi_id > nr_acpi_bits) {
367 pr_debug(DRV_NAME "We only have %u, trying to set %u\n",
368 nr_acpi_bits, acpi_id);
369 return AE_OK;
370 }
371 /* OK, There is a ACPI Processor object */
372 __set_bit(acpi_id, acpi_id_present);
373
374 pr_debug(DRV_NAME "ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id,
375 (unsigned long)pblk);
376
377 status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
378 if (ACPI_FAILURE(status)) {
379 if (!pblk)
380 return AE_OK;
381 }
382 /* .. and it has a C-state */
383 __set_bit(acpi_id, acpi_id_cst_present);
384
385 return AE_OK;
386}
387static int __init check_acpi_ids(struct acpi_processor *pr_backup)
388{
389
390 if (!pr_backup)
391 return -ENODEV;
392
393 /* All online CPUs have been processed at this stage. Now verify
394 * whether in fact "online CPUs" == physical CPUs.
395 */
396 acpi_id_present = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
397 if (!acpi_id_present)
398 return -ENOMEM;
399
400 acpi_id_cst_present = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
401 if (!acpi_id_cst_present) {
402 kfree(acpi_id_present);
403 return -ENOMEM;
404 }
405
406 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
407 ACPI_UINT32_MAX,
408 read_acpi_id, NULL, NULL, NULL);
409 acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL);
410
411 if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) {
412 unsigned int i;
413 for_each_set_bit(i, acpi_id_present, nr_acpi_bits) {
414 pr_backup->acpi_id = i;
415 /* Mask out C-states if there are no _CST or PBLK */
416 pr_backup->flags.power = test_bit(i, acpi_id_cst_present);
417 (void)upload_pm_data(pr_backup);
418 }
419 }
420 kfree(acpi_id_present);
421 acpi_id_present = NULL;
422 kfree(acpi_id_cst_present);
423 acpi_id_cst_present = NULL;
424 return 0;
425}
426static int __init check_prereq(void)
427{
428 struct cpuinfo_x86 *c = &cpu_data(0);
429
430 if (!xen_initial_domain())
431 return -ENODEV;
432
433 if (!acpi_gbl_FADT.smi_command)
434 return -ENODEV;
435
436 if (c->x86_vendor == X86_VENDOR_INTEL) {
437 if (!cpu_has(c, X86_FEATURE_EST))
438 return -ENODEV;
439
440 return 0;
441 }
442 if (c->x86_vendor == X86_VENDOR_AMD) {
443 /* Copied from powernow-k8.h, can't include ../cpufreq/powernow
444 * as we get compile warnings for the static functions.
445 */
446#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
447#define USE_HW_PSTATE 0x00000080
448 u32 eax, ebx, ecx, edx;
449 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
450 if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE)
451 return -ENODEV;
452 return 0;
453 }
454 return -ENODEV;
455}
456/* acpi_perf_data is a pointer to percpu data. */
457static struct acpi_processor_performance __percpu *acpi_perf_data;
458
459static void free_acpi_perf_data(void)
460{
461 unsigned int i;
462
463 /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
464 for_each_possible_cpu(i)
465 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
466 ->shared_cpu_map);
467 free_percpu(acpi_perf_data);
468}
469
470static int __init xen_acpi_processor_init(void)
471{
472 struct acpi_processor *pr_backup = NULL;
473 unsigned int i;
474 int rc = check_prereq();
475
476 if (rc)
477 return rc;
478
479 nr_acpi_bits = get_max_acpi_id() + 1;
480 acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
481 if (!acpi_ids_done)
482 return -ENOMEM;
483
484 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
485 if (!acpi_perf_data) {
486 pr_debug(DRV_NAME "Memory allocation error for acpi_perf_data.\n");
487 kfree(acpi_ids_done);
488 return -ENOMEM;
489 }
490 for_each_possible_cpu(i) {
491 if (!zalloc_cpumask_var_node(
492 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
493 GFP_KERNEL, cpu_to_node(i))) {
494 rc = -ENOMEM;
495 goto err_out;
496 }
497 }
498
499 /* Do initialization in ACPI core. It is OK to fail here. */
500 (void)acpi_processor_preregister_performance(acpi_perf_data);
501
502 for_each_possible_cpu(i) {
503 struct acpi_processor_performance *perf;
504
505 perf = per_cpu_ptr(acpi_perf_data, i);
506 rc = acpi_processor_register_performance(perf, i);
507 if (rc)
508 goto err_out;
509 }
510 rc = acpi_processor_notify_smm(THIS_MODULE);
511 if (rc)
512 goto err_unregister;
513
514 for_each_possible_cpu(i) {
515 struct acpi_processor *_pr;
516 _pr = per_cpu(processors, i /* APIC ID */);
517 if (!_pr)
518 continue;
519
520 if (!pr_backup) {
521 pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
522 if (pr_backup)
523 memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
524 }
525 (void)upload_pm_data(_pr);
526 }
527 rc = check_acpi_ids(pr_backup);
528
529 kfree(pr_backup);
530 pr_backup = NULL;
531
532 if (rc)
533 goto err_unregister;
534
535 return 0;
536err_unregister:
537 for_each_possible_cpu(i) {
538 struct acpi_processor_performance *perf;
539 perf = per_cpu_ptr(acpi_perf_data, i);
540 acpi_processor_unregister_performance(perf, i);
541 }
542err_out:
543 /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
544 free_acpi_perf_data();
545 kfree(acpi_ids_done);
546 return rc;
547}
548static void __exit xen_acpi_processor_exit(void)
549{
550 int i;
551
552 kfree(acpi_ids_done);
553 for_each_possible_cpu(i) {
554 struct acpi_processor_performance *perf;
555 perf = per_cpu_ptr(acpi_perf_data, i);
556 acpi_processor_unregister_performance(perf, i);
557 }
558 free_acpi_perf_data();
559}
560
561MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>");
562MODULE_DESCRIPTION("Xen ACPI Processor P-states (and Cx) driver which uploads PM data to Xen hypervisor");
563MODULE_LICENSE("GPL");
564
565/* We want to be loaded before the CPU freq scaling drivers are loaded.
566 * They are loaded in late_initcall. */
567device_initcall(xen_acpi_processor_init);
568module_exit(xen_acpi_processor_exit);
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 8f37e23f6d1..5c9dc43c1e9 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -32,6 +32,7 @@
32 32
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/sysdev.h>
35#include <linux/capability.h> 36#include <linux/capability.h>
36 37
37#include <xen/xen.h> 38#include <xen/xen.h>
@@ -45,9 +46,14 @@
45 46
46#define BALLOON_CLASS_NAME "xen_memory" 47#define BALLOON_CLASS_NAME "xen_memory"
47 48
48static struct device balloon_dev; 49static struct sys_device balloon_sysdev;
49 50
50static int register_balloon(struct device *dev); 51static int register_balloon(struct sys_device *sysdev);
52
53static struct xenbus_watch target_watch =
54{
55 .node = "memory/target"
56};
51 57
52/* React to a change in the target key */ 58/* React to a change in the target key */
53static void watch_target(struct xenbus_watch *watch, 59static void watch_target(struct xenbus_watch *watch,
@@ -67,11 +73,6 @@ static void watch_target(struct xenbus_watch *watch,
67 */ 73 */
68 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); 74 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
69} 75}
70static struct xenbus_watch target_watch = {
71 .node = "memory/target",
72 .callback = watch_target,
73};
74
75 76
76static int balloon_init_watcher(struct notifier_block *notifier, 77static int balloon_init_watcher(struct notifier_block *notifier,
77 unsigned long event, 78 unsigned long event,
@@ -86,9 +87,7 @@ static int balloon_init_watcher(struct notifier_block *notifier,
86 return NOTIFY_DONE; 87 return NOTIFY_DONE;
87} 88}
88 89
89static struct notifier_block xenstore_notifier = { 90static struct notifier_block xenstore_notifier;
90 .notifier_call = balloon_init_watcher,
91};
92 91
93static int __init balloon_init(void) 92static int __init balloon_init(void)
94{ 93{
@@ -97,9 +96,12 @@ static int __init balloon_init(void)
97 96
98 pr_info("xen-balloon: Initialising balloon driver.\n"); 97 pr_info("xen-balloon: Initialising balloon driver.\n");
99 98
100 register_balloon(&balloon_dev); 99 register_balloon(&balloon_sysdev);
100
101 register_xen_selfballooning(&balloon_sysdev);
101 102
102 register_xen_selfballooning(&balloon_dev); 103 target_watch.callback = watch_target;
104 xenstore_notifier.notifier_call = balloon_init_watcher;
103 105
104 register_xenstore_notifier(&xenstore_notifier); 106 register_xenstore_notifier(&xenstore_notifier);
105 107
@@ -116,31 +118,31 @@ static void balloon_exit(void)
116module_exit(balloon_exit); 118module_exit(balloon_exit);
117 119
118#define BALLOON_SHOW(name, format, args...) \ 120#define BALLOON_SHOW(name, format, args...) \
119 static ssize_t show_##name(struct device *dev, \ 121 static ssize_t show_##name(struct sys_device *dev, \
120 struct device_attribute *attr, \ 122 struct sysdev_attribute *attr, \
121 char *buf) \ 123 char *buf) \
122 { \ 124 { \
123 return sprintf(buf, format, ##args); \ 125 return sprintf(buf, format, ##args); \
124 } \ 126 } \
125 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 127 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
126 128
127BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); 129BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
128BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); 130BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
129BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); 131BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
130 132
131static DEVICE_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay); 133static SYSDEV_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay);
132static DEVICE_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay); 134static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay);
133static DEVICE_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count); 135static SYSDEV_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count);
134static DEVICE_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count); 136static SYSDEV_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count);
135 137
136static ssize_t show_target_kb(struct device *dev, struct device_attribute *attr, 138static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
137 char *buf) 139 char *buf)
138{ 140{
139 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); 141 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
140} 142}
141 143
142static ssize_t store_target_kb(struct device *dev, 144static ssize_t store_target_kb(struct sys_device *dev,
143 struct device_attribute *attr, 145 struct sysdev_attribute *attr,
144 const char *buf, 146 const char *buf,
145 size_t count) 147 size_t count)
146{ 148{
@@ -157,11 +159,11 @@ static ssize_t store_target_kb(struct device *dev,
157 return count; 159 return count;
158} 160}
159 161
160static DEVICE_ATTR(target_kb, S_IRUGO | S_IWUSR, 162static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
161 show_target_kb, store_target_kb); 163 show_target_kb, store_target_kb);
162 164
163 165
164static ssize_t show_target(struct device *dev, struct device_attribute *attr, 166static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
165 char *buf) 167 char *buf)
166{ 168{
167 return sprintf(buf, "%llu\n", 169 return sprintf(buf, "%llu\n",
@@ -169,8 +171,8 @@ static ssize_t show_target(struct device *dev, struct device_attribute *attr,
169 << PAGE_SHIFT); 171 << PAGE_SHIFT);
170} 172}
171 173
172static ssize_t store_target(struct device *dev, 174static ssize_t store_target(struct sys_device *dev,
173 struct device_attribute *attr, 175 struct sysdev_attribute *attr,
174 const char *buf, 176 const char *buf,
175 size_t count) 177 size_t count)
176{ 178{
@@ -187,60 +189,59 @@ static ssize_t store_target(struct device *dev,
187 return count; 189 return count;
188} 190}
189 191
190static DEVICE_ATTR(target, S_IRUGO | S_IWUSR, 192static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
191 show_target, store_target); 193 show_target, store_target);
192 194
193 195
194static struct device_attribute *balloon_attrs[] = { 196static struct sysdev_attribute *balloon_attrs[] = {
195 &dev_attr_target_kb, 197 &attr_target_kb,
196 &dev_attr_target, 198 &attr_target,
197 &dev_attr_schedule_delay.attr, 199 &attr_schedule_delay.attr,
198 &dev_attr_max_schedule_delay.attr, 200 &attr_max_schedule_delay.attr,
199 &dev_attr_retry_count.attr, 201 &attr_retry_count.attr,
200 &dev_attr_max_retry_count.attr 202 &attr_max_retry_count.attr
201}; 203};
202 204
203static struct attribute *balloon_info_attrs[] = { 205static struct attribute *balloon_info_attrs[] = {
204 &dev_attr_current_kb.attr, 206 &attr_current_kb.attr,
205 &dev_attr_low_kb.attr, 207 &attr_low_kb.attr,
206 &dev_attr_high_kb.attr, 208 &attr_high_kb.attr,
207 NULL 209 NULL
208}; 210};
209 211
210static const struct attribute_group balloon_info_group = { 212static struct attribute_group balloon_info_group = {
211 .name = "info", 213 .name = "info",
212 .attrs = balloon_info_attrs 214 .attrs = balloon_info_attrs
213}; 215};
214 216
215static struct bus_type balloon_subsys = { 217static struct sysdev_class balloon_sysdev_class = {
216 .name = BALLOON_CLASS_NAME, 218 .name = BALLOON_CLASS_NAME
217 .dev_name = BALLOON_CLASS_NAME,
218}; 219};
219 220
220static int register_balloon(struct device *dev) 221static int register_balloon(struct sys_device *sysdev)
221{ 222{
222 int i, error; 223 int i, error;
223 224
224 error = subsys_system_register(&balloon_subsys, NULL); 225 error = sysdev_class_register(&balloon_sysdev_class);
225 if (error) 226 if (error)
226 return error; 227 return error;
227 228
228 dev->id = 0; 229 sysdev->id = 0;
229 dev->bus = &balloon_subsys; 230 sysdev->cls = &balloon_sysdev_class;
230 231
231 error = device_register(dev); 232 error = sysdev_register(sysdev);
232 if (error) { 233 if (error) {
233 bus_unregister(&balloon_subsys); 234 sysdev_class_unregister(&balloon_sysdev_class);
234 return error; 235 return error;
235 } 236 }
236 237
237 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { 238 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
238 error = device_create_file(dev, balloon_attrs[i]); 239 error = sysdev_create_file(sysdev, balloon_attrs[i]);
239 if (error) 240 if (error)
240 goto fail; 241 goto fail;
241 } 242 }
242 243
243 error = sysfs_create_group(&dev->kobj, &balloon_info_group); 244 error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
244 if (error) 245 if (error)
245 goto fail; 246 goto fail;
246 247
@@ -248,9 +249,9 @@ static int register_balloon(struct device *dev)
248 249
249 fail: 250 fail:
250 while (--i >= 0) 251 while (--i >= 0)
251 device_remove_file(dev, balloon_attrs[i]); 252 sysdev_remove_file(sysdev, balloon_attrs[i]);
252 device_unregister(dev); 253 sysdev_unregister(sysdev);
253 bus_unregister(&balloon_subsys); 254 sysdev_class_unregister(&balloon_sysdev_class);
254 return error; 255 return error;
255} 256}
256 257
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 46ae0f9f02a..a8031445d94 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -10,13 +10,13 @@
10 */ 10 */
11 11
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/pci.h> 13#include <linux/pci.h>
15#include "pciback.h" 14#include "pciback.h"
16#include "conf_space.h" 15#include "conf_space.h"
17#include "conf_space_quirks.h" 16#include "conf_space_quirks.h"
18 17
19static bool permissive; 18#define DRV_NAME "xen-pciback"
19static int permissive;
20module_param(permissive, bool, 0644); 20module_param(permissive, bool, 0644);
21 21
22/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, 22/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
@@ -124,7 +124,7 @@ static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
124 return val; 124 return val;
125} 125}
126 126
127static int xen_pcibios_err_to_errno(int err) 127static int pcibios_err_to_errno(int err)
128{ 128{
129 switch (err) { 129 switch (err) {
130 case PCIBIOS_SUCCESSFUL: 130 case PCIBIOS_SUCCESSFUL:
@@ -202,7 +202,7 @@ out:
202 pci_name(dev), size, offset, value); 202 pci_name(dev), size, offset, value);
203 203
204 *ret_val = value; 204 *ret_val = value;
205 return xen_pcibios_err_to_errno(err); 205 return pcibios_err_to_errno(err);
206} 206}
207 207
208int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) 208int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
@@ -290,7 +290,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
290 } 290 }
291 } 291 }
292 292
293 return xen_pcibios_err_to_errno(err); 293 return pcibios_err_to_errno(err);
294} 294}
295 295
296void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev) 296void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index 3daf862d739..da3cbdfcb5d 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -15,6 +15,7 @@ struct pci_bar_info {
15 int which; 15 int which;
16}; 16};
17 17
18#define DRV_NAME "xen-pciback"
18#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) 19#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
19#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) 20#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
20 21
@@ -24,7 +25,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
24 int ret; 25 int ret;
25 26
26 ret = xen_pcibk_read_config_word(dev, offset, value, data); 27 ret = xen_pcibk_read_config_word(dev, offset, value, data);
27 if (!pci_is_enabled(dev)) 28 if (!atomic_read(&dev->enable_cnt))
28 return ret; 29 return ret;
29 30
30 for (i = 0; i < PCI_ROM_RESOURCE; i++) { 31 for (i = 0; i < PCI_ROM_RESOURCE; i++) {
@@ -186,7 +187,7 @@ static inline void read_dev_bar(struct pci_dev *dev,
186 187
187 bar_info->val = res[pos].start | 188 bar_info->val = res[pos].start |
188 (res[pos].flags & PCI_REGION_FLAG_MASK); 189 (res[pos].flags & PCI_REGION_FLAG_MASK);
189 bar_info->len_val = resource_size(&res[pos]); 190 bar_info->len_val = res[pos].end - res[pos].start + 1;
190} 191}
191 192
192static void *bar_init(struct pci_dev *dev, int offset) 193static void *bar_init(struct pci_dev *dev, int offset)
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c
index 7476791cab4..921a889e65e 100644
--- a/drivers/xen/xen-pciback/conf_space_quirks.c
+++ b/drivers/xen/xen-pciback/conf_space_quirks.c
@@ -12,6 +12,7 @@
12#include "conf_space_quirks.h" 12#include "conf_space_quirks.h"
13 13
14LIST_HEAD(xen_pcibk_quirks); 14LIST_HEAD(xen_pcibk_quirks);
15#define DRV_NAME "xen-pciback"
15static inline const struct pci_device_id * 16static inline const struct pci_device_id *
16match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) 17match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
17{ 18{
@@ -35,7 +36,7 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
35 goto out; 36 goto out;
36 tmp_quirk = NULL; 37 tmp_quirk = NULL;
37 printk(KERN_DEBUG DRV_NAME 38 printk(KERN_DEBUG DRV_NAME
38 ": quirk didn't match any device known\n"); 39 ":quirk didn't match any device xen_pciback knows about\n");
39out: 40out:
40 return tmp_quirk; 41 return tmp_quirk;
41} 42}
diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
index 828dddc360d..1d32a9a42c0 100644
--- a/drivers/xen/xen-pciback/passthrough.c
+++ b/drivers/xen/xen-pciback/passthrough.c
@@ -7,13 +7,13 @@
7 7
8#include <linux/list.h> 8#include <linux/list.h>
9#include <linux/pci.h> 9#include <linux/pci.h>
10#include <linux/mutex.h> 10#include <linux/spinlock.h>
11#include "pciback.h" 11#include "pciback.h"
12 12
13struct passthrough_dev_data { 13struct passthrough_dev_data {
14 /* Access to dev_list must be protected by lock */ 14 /* Access to dev_list must be protected by lock */
15 struct list_head dev_list; 15 struct list_head dev_list;
16 struct mutex lock; 16 spinlock_t lock;
17}; 17};
18 18
19static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, 19static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
@@ -24,8 +24,9 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
24 struct passthrough_dev_data *dev_data = pdev->pci_dev_data; 24 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
25 struct pci_dev_entry *dev_entry; 25 struct pci_dev_entry *dev_entry;
26 struct pci_dev *dev = NULL; 26 struct pci_dev *dev = NULL;
27 unsigned long flags;
27 28
28 mutex_lock(&dev_data->lock); 29 spin_lock_irqsave(&dev_data->lock, flags);
29 30
30 list_for_each_entry(dev_entry, &dev_data->dev_list, list) { 31 list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
31 if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) 32 if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
@@ -36,7 +37,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
36 } 37 }
37 } 38 }
38 39
39 mutex_unlock(&dev_data->lock); 40 spin_unlock_irqrestore(&dev_data->lock, flags);
40 41
41 return dev; 42 return dev;
42} 43}
@@ -47,6 +48,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
47{ 48{
48 struct passthrough_dev_data *dev_data = pdev->pci_dev_data; 49 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
49 struct pci_dev_entry *dev_entry; 50 struct pci_dev_entry *dev_entry;
51 unsigned long flags;
50 unsigned int domain, bus, devfn; 52 unsigned int domain, bus, devfn;
51 int err; 53 int err;
52 54
@@ -55,9 +57,9 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
55 return -ENOMEM; 57 return -ENOMEM;
56 dev_entry->dev = dev; 58 dev_entry->dev = dev;
57 59
58 mutex_lock(&dev_data->lock); 60 spin_lock_irqsave(&dev_data->lock, flags);
59 list_add_tail(&dev_entry->list, &dev_data->dev_list); 61 list_add_tail(&dev_entry->list, &dev_data->dev_list);
60 mutex_unlock(&dev_data->lock); 62 spin_unlock_irqrestore(&dev_data->lock, flags);
61 63
62 /* Publish this device. */ 64 /* Publish this device. */
63 domain = (unsigned int)pci_domain_nr(dev->bus); 65 domain = (unsigned int)pci_domain_nr(dev->bus);
@@ -74,8 +76,9 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
74 struct passthrough_dev_data *dev_data = pdev->pci_dev_data; 76 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
75 struct pci_dev_entry *dev_entry, *t; 77 struct pci_dev_entry *dev_entry, *t;
76 struct pci_dev *found_dev = NULL; 78 struct pci_dev *found_dev = NULL;
79 unsigned long flags;
77 80
78 mutex_lock(&dev_data->lock); 81 spin_lock_irqsave(&dev_data->lock, flags);
79 82
80 list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { 83 list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
81 if (dev_entry->dev == dev) { 84 if (dev_entry->dev == dev) {
@@ -85,7 +88,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
85 } 88 }
86 } 89 }
87 90
88 mutex_unlock(&dev_data->lock); 91 spin_unlock_irqrestore(&dev_data->lock, flags);
89 92
90 if (found_dev) 93 if (found_dev)
91 pcistub_put_pci_dev(found_dev); 94 pcistub_put_pci_dev(found_dev);
@@ -99,7 +102,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
99 if (!dev_data) 102 if (!dev_data)
100 return -ENOMEM; 103 return -ENOMEM;
101 104
102 mutex_init(&dev_data->lock); 105 spin_lock_init(&dev_data->lock);
103 106
104 INIT_LIST_HEAD(&dev_data->dev_list); 107 INIT_LIST_HEAD(&dev_data->dev_list);
105 108
@@ -113,14 +116,14 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
113{ 116{
114 int err = 0; 117 int err = 0;
115 struct passthrough_dev_data *dev_data = pdev->pci_dev_data; 118 struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
116 struct pci_dev_entry *dev_entry, *e; 119 struct pci_dev_entry *dev_entry, *e, *tmp;
117 struct pci_dev *dev; 120 struct pci_dev *dev;
118 int found; 121 int found;
119 unsigned int domain, bus; 122 unsigned int domain, bus;
120 123
121 mutex_lock(&dev_data->lock); 124 spin_lock(&dev_data->lock);
122 125
123 list_for_each_entry(dev_entry, &dev_data->dev_list, list) { 126 list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
124 /* Only publish this device as a root if none of its 127 /* Only publish this device as a root if none of its
125 * parent bridges are exported 128 * parent bridges are exported
126 */ 129 */
@@ -139,13 +142,16 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
139 bus = (unsigned int)dev_entry->dev->bus->number; 142 bus = (unsigned int)dev_entry->dev->bus->number;
140 143
141 if (!found) { 144 if (!found) {
145 spin_unlock(&dev_data->lock);
142 err = publish_root_cb(pdev, domain, bus); 146 err = publish_root_cb(pdev, domain, bus);
143 if (err) 147 if (err)
144 break; 148 break;
149 spin_lock(&dev_data->lock);
145 } 150 }
146 } 151 }
147 152
148 mutex_unlock(&dev_data->lock); 153 if (!err)
154 spin_unlock(&dev_data->lock);
149 155
150 return err; 156 return err;
151} 157}
@@ -176,7 +182,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
176 return 1; 182 return 1;
177} 183}
178 184
179const struct xen_pcibk_backend xen_pcibk_passthrough_backend = { 185struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
180 .name = "passthrough", 186 .name = "passthrough",
181 .init = __xen_pcibk_init_devices, 187 .init = __xen_pcibk_init_devices,
182 .free = __xen_pcibk_release_devices, 188 .free = __xen_pcibk_release_devices,
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 9204126f156..aec214ac0a1 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -21,6 +21,8 @@
21#include "conf_space.h" 21#include "conf_space.h"
22#include "conf_space_quirks.h" 22#include "conf_space_quirks.h"
23 23
24#define DRV_NAME "xen-pciback"
25
24static char *pci_devs_to_hide; 26static char *pci_devs_to_hide;
25wait_queue_head_t xen_pcibk_aer_wait_queue; 27wait_queue_head_t xen_pcibk_aer_wait_queue;
26/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, 28/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
@@ -85,36 +87,20 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
85static void pcistub_device_release(struct kref *kref) 87static void pcistub_device_release(struct kref *kref)
86{ 88{
87 struct pcistub_device *psdev; 89 struct pcistub_device *psdev;
88 struct xen_pcibk_dev_data *dev_data;
89 90
90 psdev = container_of(kref, struct pcistub_device, kref); 91 psdev = container_of(kref, struct pcistub_device, kref);
91 dev_data = pci_get_drvdata(psdev->dev);
92 92
93 dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); 93 dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
94 94
95 xen_unregister_device_domain_owner(psdev->dev); 95 xen_unregister_device_domain_owner(psdev->dev);
96 96
97 /* Call the reset function which does not take lock as this
98 * is called from "unbind" which takes a device_lock mutex.
99 */
100 __pci_reset_function_locked(psdev->dev);
101 if (pci_load_and_free_saved_state(psdev->dev,
102 &dev_data->pci_saved_state)) {
103 dev_dbg(&psdev->dev->dev, "Could not reload PCI state\n");
104 } else
105 pci_restore_state(psdev->dev);
106
107 /* Disable the device */
108 xen_pcibk_reset_device(psdev->dev);
109
110 kfree(dev_data);
111 pci_set_drvdata(psdev->dev, NULL);
112
113 /* Clean-up the device */ 97 /* Clean-up the device */
98 xen_pcibk_reset_device(psdev->dev);
114 xen_pcibk_config_free_dyn_fields(psdev->dev); 99 xen_pcibk_config_free_dyn_fields(psdev->dev);
115 xen_pcibk_config_free_dev(psdev->dev); 100 xen_pcibk_config_free_dev(psdev->dev);
101 kfree(pci_get_drvdata(psdev->dev));
102 pci_set_drvdata(psdev->dev, NULL);
116 103
117 psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
118 pci_dev_put(psdev->dev); 104 pci_dev_put(psdev->dev);
119 105
120 kfree(psdev); 106 kfree(psdev);
@@ -142,8 +128,7 @@ static struct pcistub_device *pcistub_device_find(int domain, int bus,
142 if (psdev->dev != NULL 128 if (psdev->dev != NULL
143 && domain == pci_domain_nr(psdev->dev->bus) 129 && domain == pci_domain_nr(psdev->dev->bus)
144 && bus == psdev->dev->bus->number 130 && bus == psdev->dev->bus->number
145 && slot == PCI_SLOT(psdev->dev->devfn) 131 && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
146 && func == PCI_FUNC(psdev->dev->devfn)) {
147 pcistub_device_get(psdev); 132 pcistub_device_get(psdev);
148 goto out; 133 goto out;
149 } 134 }
@@ -192,8 +177,7 @@ struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
192 if (psdev->dev != NULL 177 if (psdev->dev != NULL
193 && domain == pci_domain_nr(psdev->dev->bus) 178 && domain == pci_domain_nr(psdev->dev->bus)
194 && bus == psdev->dev->bus->number 179 && bus == psdev->dev->bus->number
195 && slot == PCI_SLOT(psdev->dev->devfn) 180 && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
196 && func == PCI_FUNC(psdev->dev->devfn)) {
197 found_dev = pcistub_device_get_pci_dev(pdev, psdev); 181 found_dev = pcistub_device_get_pci_dev(pdev, psdev);
198 break; 182 break;
199 } 183 }
@@ -238,8 +222,6 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
238 } 222 }
239 223
240 spin_unlock_irqrestore(&pcistub_devices_lock, flags); 224 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
241 if (WARN_ON(!found_psdev))
242 return;
243 225
244 /*hold this lock for avoiding breaking link between 226 /*hold this lock for avoiding breaking link between
245 * pcistub and xen_pcibk when AER is in processing 227 * pcistub and xen_pcibk when AER is in processing
@@ -248,22 +230,10 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
248 /* Cleanup our device 230 /* Cleanup our device
249 * (so it's ready for the next domain) 231 * (so it's ready for the next domain)
250 */ 232 */
251
252 /* This is OK - we are running from workqueue context
253 * and want to inhibit the user from fiddling with 'reset'
254 */
255 pci_reset_function(dev);
256 pci_restore_state(psdev->dev);
257
258 /* This disables the device. */
259 xen_pcibk_reset_device(found_psdev->dev); 233 xen_pcibk_reset_device(found_psdev->dev);
260
261 /* And cleanup up our emulated fields. */
262 xen_pcibk_config_free_dyn_fields(found_psdev->dev); 234 xen_pcibk_config_free_dyn_fields(found_psdev->dev);
263 xen_pcibk_config_reset_dev(found_psdev->dev); 235 xen_pcibk_config_reset_dev(found_psdev->dev);
264 236
265 xen_unregister_device_domain_owner(found_psdev->dev);
266
267 spin_lock_irqsave(&found_psdev->lock, flags); 237 spin_lock_irqsave(&found_psdev->lock, flags);
268 found_psdev->pdev = NULL; 238 found_psdev->pdev = NULL;
269 spin_unlock_irqrestore(&found_psdev->lock, flags); 239 spin_unlock_irqrestore(&found_psdev->lock, flags);
@@ -272,8 +242,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
272 up_write(&pcistub_sem); 242 up_write(&pcistub_sem);
273} 243}
274 244
275static int pcistub_match_one(struct pci_dev *dev, 245static int __devinit pcistub_match_one(struct pci_dev *dev,
276 struct pcistub_device_id *pdev_id) 246 struct pcistub_device_id *pdev_id)
277{ 247{
278 /* Match the specified device by domain, bus, slot, func and also if 248 /* Match the specified device by domain, bus, slot, func and also if
279 * any of the device's parent bridges match. 249 * any of the device's parent bridges match.
@@ -292,7 +262,7 @@ static int pcistub_match_one(struct pci_dev *dev,
292 return 0; 262 return 0;
293} 263}
294 264
295static int pcistub_match(struct pci_dev *dev) 265static int __devinit pcistub_match(struct pci_dev *dev)
296{ 266{
297 struct pcistub_device_id *pdev_id; 267 struct pcistub_device_id *pdev_id;
298 unsigned long flags; 268 unsigned long flags;
@@ -310,7 +280,7 @@ static int pcistub_match(struct pci_dev *dev)
310 return found; 280 return found;
311} 281}
312 282
313static int pcistub_init_device(struct pci_dev *dev) 283static int __devinit pcistub_init_device(struct pci_dev *dev)
314{ 284{
315 struct xen_pcibk_dev_data *dev_data; 285 struct xen_pcibk_dev_data *dev_data;
316 int err = 0; 286 int err = 0;
@@ -355,24 +325,12 @@ static int pcistub_init_device(struct pci_dev *dev)
355 if (err) 325 if (err)
356 goto config_release; 326 goto config_release;
357 327
358 /* We need the device active to save the state. */
359 dev_dbg(&dev->dev, "save state of device\n");
360 pci_save_state(dev);
361 dev_data->pci_saved_state = pci_store_saved_state(dev);
362 if (!dev_data->pci_saved_state)
363 dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
364 else {
365 dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
366 __pci_reset_function_locked(dev);
367 pci_restore_state(dev);
368 }
369 /* Now disable the device (this also ensures some private device 328 /* Now disable the device (this also ensures some private device
370 * data is setup before we export) 329 * data is setup before we export)
371 */ 330 */
372 dev_dbg(&dev->dev, "reset device\n"); 331 dev_dbg(&dev->dev, "reset device\n");
373 xen_pcibk_reset_device(dev); 332 xen_pcibk_reset_device(dev);
374 333
375 dev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
376 return 0; 334 return 0;
377 335
378config_release: 336config_release:
@@ -428,7 +386,7 @@ static int __init pcistub_init_devices_late(void)
428 return 0; 386 return 0;
429} 387}
430 388
431static int pcistub_seize(struct pci_dev *dev) 389static int __devinit pcistub_seize(struct pci_dev *dev)
432{ 390{
433 struct pcistub_device *psdev; 391 struct pcistub_device *psdev;
434 unsigned long flags; 392 unsigned long flags;
@@ -463,7 +421,8 @@ static int pcistub_seize(struct pci_dev *dev)
463 return err; 421 return err;
464} 422}
465 423
466static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id) 424static int __devinit pcistub_probe(struct pci_dev *dev,
425 const struct pci_device_id *id)
467{ 426{
468 int err = 0; 427 int err = 0;
469 428
@@ -555,9 +514,12 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
555 int err; 514 int err;
556 char nodename[PCI_NODENAME_MAX]; 515 char nodename[PCI_NODENAME_MAX];
557 516
558 BUG_ON(!psdev); 517 if (!psdev)
518 dev_err(&psdev->dev->dev,
519 "device is NULL when do AER recovery/kill_domain\n");
559 snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", 520 snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
560 psdev->pdev->xdev->otherend_id); 521 psdev->pdev->xdev->otherend_id);
522 nodename[strlen(nodename)] = '\0';
561 523
562again: 524again:
563 err = xenbus_transaction_start(&xbt); 525 err = xenbus_transaction_start(&xbt);
@@ -643,7 +605,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
643 if (test_bit(_XEN_PCIF_active, 605 if (test_bit(_XEN_PCIF_active,
644 (unsigned long *)&psdev->pdev->sh_info->flags)) { 606 (unsigned long *)&psdev->pdev->sh_info->flags)) {
645 dev_dbg(&psdev->dev->dev, 607 dev_dbg(&psdev->dev->dev,
646 "schedule pci_conf service in " DRV_NAME "\n"); 608 "schedule pci_conf service in xen_pcibk\n");
647 xen_pcibk_test_and_schedule_op(psdev->pdev); 609 xen_pcibk_test_and_schedule_op(psdev->pdev);
648 } 610 }
649 611
@@ -683,14 +645,14 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
683 dev_err(&dev->dev, DRV_NAME " device is not connected or owned" 645 dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
684 " by HVM, kill it\n"); 646 " by HVM, kill it\n");
685 kill_domain_by_device(psdev); 647 kill_domain_by_device(psdev);
686 goto end; 648 goto release;
687 } 649 }
688 650
689 if (!test_bit(_XEN_PCIB_AERHANDLER, 651 if (!test_bit(_XEN_PCIB_AERHANDLER,
690 (unsigned long *)&psdev->pdev->sh_info->flags)) { 652 (unsigned long *)&psdev->pdev->sh_info->flags)) {
691 dev_err(&dev->dev, 653 dev_err(&dev->dev,
692 "guest with no AER driver should have been killed\n"); 654 "guest with no AER driver should have been killed\n");
693 goto end; 655 goto release;
694 } 656 }
695 result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); 657 result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
696 658
@@ -700,9 +662,9 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
700 "No AER slot_reset service or disconnected!\n"); 662 "No AER slot_reset service or disconnected!\n");
701 kill_domain_by_device(psdev); 663 kill_domain_by_device(psdev);
702 } 664 }
665release:
666 pcistub_device_put(psdev);
703end: 667end:
704 if (psdev)
705 pcistub_device_put(psdev);
706 up_write(&pcistub_sem); 668 up_write(&pcistub_sem);
707 return result; 669 return result;
708 670
@@ -741,14 +703,14 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
741 dev_err(&dev->dev, DRV_NAME " device is not connected or owned" 703 dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
742 " by HVM, kill it\n"); 704 " by HVM, kill it\n");
743 kill_domain_by_device(psdev); 705 kill_domain_by_device(psdev);
744 goto end; 706 goto release;
745 } 707 }
746 708
747 if (!test_bit(_XEN_PCIB_AERHANDLER, 709 if (!test_bit(_XEN_PCIB_AERHANDLER,
748 (unsigned long *)&psdev->pdev->sh_info->flags)) { 710 (unsigned long *)&psdev->pdev->sh_info->flags)) {
749 dev_err(&dev->dev, 711 dev_err(&dev->dev,
750 "guest with no AER driver should have been killed\n"); 712 "guest with no AER driver should have been killed\n");
751 goto end; 713 goto release;
752 } 714 }
753 result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); 715 result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
754 716
@@ -758,9 +720,9 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
758 "No AER mmio_enabled service or disconnected!\n"); 720 "No AER mmio_enabled service or disconnected!\n");
759 kill_domain_by_device(psdev); 721 kill_domain_by_device(psdev);
760 } 722 }
723release:
724 pcistub_device_put(psdev);
761end: 725end:
762 if (psdev)
763 pcistub_device_put(psdev);
764 up_write(&pcistub_sem); 726 up_write(&pcistub_sem);
765 return result; 727 return result;
766} 728}
@@ -799,7 +761,7 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
799 dev_err(&dev->dev, DRV_NAME " device is not connected or owned" 761 dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
800 " by HVM, kill it\n"); 762 " by HVM, kill it\n");
801 kill_domain_by_device(psdev); 763 kill_domain_by_device(psdev);
802 goto end; 764 goto release;
803 } 765 }
804 766
805 /*Guest owns the device yet no aer handler regiested, kill guest*/ 767 /*Guest owns the device yet no aer handler regiested, kill guest*/
@@ -807,7 +769,7 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
807 (unsigned long *)&psdev->pdev->sh_info->flags)) { 769 (unsigned long *)&psdev->pdev->sh_info->flags)) {
808 dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); 770 dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
809 kill_domain_by_device(psdev); 771 kill_domain_by_device(psdev);
810 goto end; 772 goto release;
811 } 773 }
812 result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); 774 result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
813 775
@@ -817,9 +779,9 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
817 "No AER error_detected service or disconnected!\n"); 779 "No AER error_detected service or disconnected!\n");
818 kill_domain_by_device(psdev); 780 kill_domain_by_device(psdev);
819 } 781 }
782release:
783 pcistub_device_put(psdev);
820end: 784end:
821 if (psdev)
822 pcistub_device_put(psdev);
823 up_write(&pcistub_sem); 785 up_write(&pcistub_sem);
824 return result; 786 return result;
825} 787}
@@ -853,7 +815,7 @@ static void xen_pcibk_error_resume(struct pci_dev *dev)
853 dev_err(&dev->dev, DRV_NAME " device is not connected or owned" 815 dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
854 " by HVM, kill it\n"); 816 " by HVM, kill it\n");
855 kill_domain_by_device(psdev); 817 kill_domain_by_device(psdev);
856 goto end; 818 goto release;
857 } 819 }
858 820
859 if (!test_bit(_XEN_PCIB_AERHANDLER, 821 if (!test_bit(_XEN_PCIB_AERHANDLER,
@@ -861,19 +823,19 @@ static void xen_pcibk_error_resume(struct pci_dev *dev)
861 dev_err(&dev->dev, 823 dev_err(&dev->dev,
862 "guest with no AER driver should have been killed\n"); 824 "guest with no AER driver should have been killed\n");
863 kill_domain_by_device(psdev); 825 kill_domain_by_device(psdev);
864 goto end; 826 goto release;
865 } 827 }
866 common_process(psdev, 1, XEN_PCI_OP_aer_resume, 828 common_process(psdev, 1, XEN_PCI_OP_aer_resume,
867 PCI_ERS_RESULT_RECOVERED); 829 PCI_ERS_RESULT_RECOVERED);
830release:
831 pcistub_device_put(psdev);
868end: 832end:
869 if (psdev)
870 pcistub_device_put(psdev);
871 up_write(&pcistub_sem); 833 up_write(&pcistub_sem);
872 return; 834 return;
873} 835}
874 836
875/*add xen_pcibk AER handling*/ 837/*add xen_pcibk AER handling*/
876static const struct pci_error_handlers xen_pcibk_error_handler = { 838static struct pci_error_handlers xen_pcibk_error_handler = {
877 .error_detected = xen_pcibk_error_detected, 839 .error_detected = xen_pcibk_error_detected,
878 .mmio_enabled = xen_pcibk_mmio_enabled, 840 .mmio_enabled = xen_pcibk_mmio_enabled,
879 .slot_reset = xen_pcibk_slot_reset, 841 .slot_reset = xen_pcibk_slot_reset,
@@ -898,35 +860,18 @@ static struct pci_driver xen_pcibk_pci_driver = {
898static inline int str_to_slot(const char *buf, int *domain, int *bus, 860static inline int str_to_slot(const char *buf, int *domain, int *bus,
899 int *slot, int *func) 861 int *slot, int *func)
900{ 862{
901 int parsed = 0; 863 int err;
902 864
903 switch (sscanf(buf, " %x:%x:%x.%x %n", domain, bus, slot, func, 865 err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
904 &parsed)) { 866 if (err == 4)
905 case 3:
906 *func = -1;
907 sscanf(buf, " %x:%x:%x.* %n", domain, bus, slot, &parsed);
908 break;
909 case 2:
910 *slot = *func = -1;
911 sscanf(buf, " %x:%x:*.* %n", domain, bus, &parsed);
912 break;
913 }
914 if (parsed && !buf[parsed])
915 return 0; 867 return 0;
868 else if (err < 0)
869 return -EINVAL;
916 870
917 /* try again without domain */ 871 /* try again without domain */
918 *domain = 0; 872 *domain = 0;
919 switch (sscanf(buf, " %x:%x.%x %n", bus, slot, func, &parsed)) { 873 err = sscanf(buf, " %x:%x.%x", bus, slot, func);
920 case 2: 874 if (err == 3)
921 *func = -1;
922 sscanf(buf, " %x:%x.* %n", bus, slot, &parsed);
923 break;
924 case 1:
925 *slot = *func = -1;
926 sscanf(buf, " %x:*.* %n", bus, &parsed);
927 break;
928 }
929 if (parsed && !buf[parsed])
930 return 0; 875 return 0;
931 876
932 return -EINVAL; 877 return -EINVAL;
@@ -935,20 +880,13 @@ static inline int str_to_slot(const char *buf, int *domain, int *bus,
935static inline int str_to_quirk(const char *buf, int *domain, int *bus, int 880static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
936 *slot, int *func, int *reg, int *size, int *mask) 881 *slot, int *func, int *reg, int *size, int *mask)
937{ 882{
938 int parsed = 0; 883 int err;
939 884
940 sscanf(buf, " %x:%x:%x.%x-%x:%x:%x %n", domain, bus, slot, func, 885 err =
941 reg, size, mask, &parsed); 886 sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
942 if (parsed && !buf[parsed]) 887 func, reg, size, mask);
888 if (err == 7)
943 return 0; 889 return 0;
944
945 /* try again without domain */
946 *domain = 0;
947 sscanf(buf, " %x:%x.%x-%x:%x:%x %n", bus, slot, func, reg, size,
948 mask, &parsed);
949 if (parsed && !buf[parsed])
950 return 0;
951
952 return -EINVAL; 890 return -EINVAL;
953} 891}
954 892
@@ -956,30 +894,6 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func)
956{ 894{
957 struct pcistub_device_id *pci_dev_id; 895 struct pcistub_device_id *pci_dev_id;
958 unsigned long flags; 896 unsigned long flags;
959 int rc = 0, devfn = PCI_DEVFN(slot, func);
960
961 if (slot < 0) {
962 for (slot = 0; !rc && slot < 32; ++slot)
963 rc = pcistub_device_id_add(domain, bus, slot, func);
964 return rc;
965 }
966
967 if (func < 0) {
968 for (func = 0; !rc && func < 8; ++func)
969 rc = pcistub_device_id_add(domain, bus, slot, func);
970 return rc;
971 }
972
973 if ((
974#if !defined(MODULE) /* pci_domains_supported is not being exported */ \
975 || !defined(CONFIG_PCI_DOMAINS)
976 !pci_domains_supported ? domain :
977#endif
978 domain < 0 || domain > 0xffff)
979 || bus < 0 || bus > 0xff
980 || PCI_SLOT(devfn) != slot
981 || PCI_FUNC(devfn) != func)
982 return -EINVAL;
983 897
984 pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); 898 pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
985 if (!pci_dev_id) 899 if (!pci_dev_id)
@@ -987,9 +901,9 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func)
987 901
988 pci_dev_id->domain = domain; 902 pci_dev_id->domain = domain;
989 pci_dev_id->bus = bus; 903 pci_dev_id->bus = bus;
990 pci_dev_id->devfn = devfn; 904 pci_dev_id->devfn = PCI_DEVFN(slot, func);
991 905
992 pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%d\n", 906 pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%01x\n",
993 domain, bus, slot, func); 907 domain, bus, slot, func);
994 908
995 spin_lock_irqsave(&device_ids_lock, flags); 909 spin_lock_irqsave(&device_ids_lock, flags);
@@ -1002,15 +916,15 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func)
1002static int pcistub_device_id_remove(int domain, int bus, int slot, int func) 916static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
1003{ 917{
1004 struct pcistub_device_id *pci_dev_id, *t; 918 struct pcistub_device_id *pci_dev_id, *t;
919 int devfn = PCI_DEVFN(slot, func);
1005 int err = -ENOENT; 920 int err = -ENOENT;
1006 unsigned long flags; 921 unsigned long flags;
1007 922
1008 spin_lock_irqsave(&device_ids_lock, flags); 923 spin_lock_irqsave(&device_ids_lock, flags);
1009 list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, 924 list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
1010 slot_list) { 925 slot_list) {
1011 if (pci_dev_id->domain == domain && pci_dev_id->bus == bus 926 if (pci_dev_id->domain == domain
1012 && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot) 927 && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
1013 && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) {
1014 /* Don't break; here because it's possible the same 928 /* Don't break; here because it's possible the same
1015 * slot could be in the list more than once 929 * slot could be in the list more than once
1016 */ 930 */
@@ -1019,7 +933,7 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
1019 933
1020 err = 0; 934 err = 0;
1021 935
1022 pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%d from " 936 pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%01x from "
1023 "seize list\n", domain, bus, slot, func); 937 "seize list\n", domain, bus, slot, func);
1024 } 938 }
1025 } 939 }
@@ -1028,20 +942,16 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
1028 return err; 942 return err;
1029} 943}
1030 944
1031static int pcistub_reg_add(int domain, int bus, int slot, int func, 945static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
1032 unsigned int reg, unsigned int size, 946 int size, int mask)
1033 unsigned int mask)
1034{ 947{
1035 int err = 0; 948 int err = 0;
1036 struct pcistub_device *psdev; 949 struct pcistub_device *psdev;
1037 struct pci_dev *dev; 950 struct pci_dev *dev;
1038 struct config_field *field; 951 struct config_field *field;
1039 952
1040 if (reg > 0xfff || (size < 4 && (mask >> (size * 8))))
1041 return -EINVAL;
1042
1043 psdev = pcistub_device_find(domain, bus, slot, func); 953 psdev = pcistub_device_find(domain, bus, slot, func);
1044 if (!psdev) { 954 if (!psdev || !psdev->dev) {
1045 err = -ENODEV; 955 err = -ENODEV;
1046 goto out; 956 goto out;
1047 } 957 }
@@ -1065,8 +975,6 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func,
1065 if (err) 975 if (err)
1066 kfree(field); 976 kfree(field);
1067out: 977out:
1068 if (psdev)
1069 pcistub_device_put(psdev);
1070 return err; 978 return err;
1071} 979}
1072 980
@@ -1087,7 +995,8 @@ out:
1087 err = count; 995 err = count;
1088 return err; 996 return err;
1089} 997}
1090static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); 998
999DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
1091 1000
1092static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, 1001static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
1093 size_t count) 1002 size_t count)
@@ -1106,7 +1015,8 @@ out:
1106 err = count; 1015 err = count;
1107 return err; 1016 return err;
1108} 1017}
1109static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); 1018
1019DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
1110 1020
1111static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) 1021static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
1112{ 1022{
@@ -1120,7 +1030,7 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
1120 break; 1030 break;
1121 1031
1122 count += scnprintf(buf + count, PAGE_SIZE - count, 1032 count += scnprintf(buf + count, PAGE_SIZE - count,
1123 "%04x:%02x:%02x.%d\n", 1033 "%04x:%02x:%02x.%01x\n",
1124 pci_dev_id->domain, pci_dev_id->bus, 1034 pci_dev_id->domain, pci_dev_id->bus,
1125 PCI_SLOT(pci_dev_id->devfn), 1035 PCI_SLOT(pci_dev_id->devfn),
1126 PCI_FUNC(pci_dev_id->devfn)); 1036 PCI_FUNC(pci_dev_id->devfn));
@@ -1129,7 +1039,8 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
1129 1039
1130 return count; 1040 return count;
1131} 1041}
1132static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); 1042
1043DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
1133 1044
1134static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) 1045static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
1135{ 1046{
@@ -1158,7 +1069,8 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
1158 spin_unlock_irqrestore(&pcistub_devices_lock, flags); 1069 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1159 return count; 1070 return count;
1160} 1071}
1161static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); 1072
1073DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
1162 1074
1163static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, 1075static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
1164 const char *buf, 1076 const char *buf,
@@ -1171,9 +1083,10 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
1171 1083
1172 err = str_to_slot(buf, &domain, &bus, &slot, &func); 1084 err = str_to_slot(buf, &domain, &bus, &slot, &func);
1173 if (err) 1085 if (err)
1174 return err; 1086 goto out;
1175 1087
1176 psdev = pcistub_device_find(domain, bus, slot, func); 1088 psdev = pcistub_device_find(domain, bus, slot, func);
1089
1177 if (!psdev) 1090 if (!psdev)
1178 goto out; 1091 goto out;
1179 1092
@@ -1189,14 +1102,11 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
1189 if (dev_data->isr_on) 1102 if (dev_data->isr_on)
1190 dev_data->ack_intr = 1; 1103 dev_data->ack_intr = 1;
1191out: 1104out:
1192 if (psdev)
1193 pcistub_device_put(psdev);
1194 if (!err) 1105 if (!err)
1195 err = count; 1106 err = count;
1196 return err; 1107 return err;
1197} 1108}
1198static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, 1109DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
1199 pcistub_irq_handler_switch);
1200 1110
1201static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, 1111static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
1202 size_t count) 1112 size_t count)
@@ -1260,8 +1170,8 @@ out:
1260 1170
1261 return count; 1171 return count;
1262} 1172}
1263static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, 1173
1264 pcistub_quirk_add); 1174DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
1265 1175
1266static ssize_t permissive_add(struct device_driver *drv, const char *buf, 1176static ssize_t permissive_add(struct device_driver *drv, const char *buf,
1267 size_t count) 1177 size_t count)
@@ -1270,17 +1180,18 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf,
1270 int err; 1180 int err;
1271 struct pcistub_device *psdev; 1181 struct pcistub_device *psdev;
1272 struct xen_pcibk_dev_data *dev_data; 1182 struct xen_pcibk_dev_data *dev_data;
1273
1274 err = str_to_slot(buf, &domain, &bus, &slot, &func); 1183 err = str_to_slot(buf, &domain, &bus, &slot, &func);
1275 if (err) 1184 if (err)
1276 goto out; 1185 goto out;
1277
1278 psdev = pcistub_device_find(domain, bus, slot, func); 1186 psdev = pcistub_device_find(domain, bus, slot, func);
1279 if (!psdev) { 1187 if (!psdev) {
1280 err = -ENODEV; 1188 err = -ENODEV;
1281 goto out; 1189 goto out;
1282 } 1190 }
1283 1191 if (!psdev->dev) {
1192 err = -ENODEV;
1193 goto release;
1194 }
1284 dev_data = pci_get_drvdata(psdev->dev); 1195 dev_data = pci_get_drvdata(psdev->dev);
1285 /* the driver data for a device should never be null at this point */ 1196 /* the driver data for a device should never be null at this point */
1286 if (!dev_data) { 1197 if (!dev_data) {
@@ -1325,8 +1236,8 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
1325 spin_unlock_irqrestore(&pcistub_devices_lock, flags); 1236 spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1326 return count; 1237 return count;
1327} 1238}
1328static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, 1239
1329 permissive_add); 1240DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
1330 1241
1331static void pcistub_exit(void) 1242static void pcistub_exit(void)
1332{ 1243{
@@ -1358,51 +1269,22 @@ static int __init pcistub_init(void)
1358 err = sscanf(pci_devs_to_hide + pos, 1269 err = sscanf(pci_devs_to_hide + pos,
1359 " (%x:%x:%x.%x) %n", 1270 " (%x:%x:%x.%x) %n",
1360 &domain, &bus, &slot, &func, &parsed); 1271 &domain, &bus, &slot, &func, &parsed);
1361 switch (err) { 1272 if (err != 4) {
1362 case 3:
1363 func = -1;
1364 sscanf(pci_devs_to_hide + pos,
1365 " (%x:%x:%x.*) %n",
1366 &domain, &bus, &slot, &parsed);
1367 break;
1368 case 2:
1369 slot = func = -1;
1370 sscanf(pci_devs_to_hide + pos,
1371 " (%x:%x:*.*) %n",
1372 &domain, &bus, &parsed);
1373 break;
1374 }
1375
1376 if (!parsed) {
1377 domain = 0; 1273 domain = 0;
1378 err = sscanf(pci_devs_to_hide + pos, 1274 err = sscanf(pci_devs_to_hide + pos,
1379 " (%x:%x.%x) %n", 1275 " (%x:%x.%x) %n",
1380 &bus, &slot, &func, &parsed); 1276 &bus, &slot, &func, &parsed);
1381 switch (err) { 1277 if (err != 3)
1382 case 2: 1278 goto parse_error;
1383 func = -1;
1384 sscanf(pci_devs_to_hide + pos,
1385 " (%x:%x.*) %n",
1386 &bus, &slot, &parsed);
1387 break;
1388 case 1:
1389 slot = func = -1;
1390 sscanf(pci_devs_to_hide + pos,
1391 " (%x:*.*) %n",
1392 &bus, &parsed);
1393 break;
1394 }
1395 } 1279 }
1396 1280
1397 if (parsed <= 0)
1398 goto parse_error;
1399
1400 err = pcistub_device_id_add(domain, bus, slot, func); 1281 err = pcistub_device_id_add(domain, bus, slot, func);
1401 if (err) 1282 if (err)
1402 goto out; 1283 goto out;
1403 1284
1285 /* if parsed<=0, we've reached the end of the string */
1404 pos += parsed; 1286 pos += parsed;
1405 } while (pci_devs_to_hide[pos]); 1287 } while (parsed > 0 && pci_devs_to_hide[pos]);
1406 } 1288 }
1407 1289
1408 /* If we're the first PCI Device Driver to register, we're the 1290 /* If we're the first PCI Device Driver to register, we're the
@@ -1492,4 +1374,3 @@ module_init(xen_pcibk_init);
1492module_exit(xen_pcibk_cleanup); 1374module_exit(xen_pcibk_cleanup);
1493 1375
1494MODULE_LICENSE("Dual BSD/GPL"); 1376MODULE_LICENSE("Dual BSD/GPL");
1495MODULE_ALIAS("xen-backend:pci");
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index a7def010eba..a0e131a8150 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -15,8 +15,6 @@
15#include <linux/atomic.h> 15#include <linux/atomic.h>
16#include <xen/interface/io/pciif.h> 16#include <xen/interface/io/pciif.h>
17 17
18#define DRV_NAME "xen-pciback"
19
20struct pci_dev_entry { 18struct pci_dev_entry {
21 struct list_head list; 19 struct list_head list;
22 struct pci_dev *dev; 20 struct pci_dev *dev;
@@ -29,7 +27,7 @@ struct pci_dev_entry {
29 27
30struct xen_pcibk_device { 28struct xen_pcibk_device {
31 void *pci_dev_data; 29 void *pci_dev_data;
32 struct mutex dev_lock; 30 spinlock_t dev_lock;
33 struct xenbus_device *xdev; 31 struct xenbus_device *xdev;
34 struct xenbus_watch be_watch; 32 struct xenbus_watch be_watch;
35 u8 be_watching; 33 u8 be_watching;
@@ -41,7 +39,6 @@ struct xen_pcibk_device {
41 39
42struct xen_pcibk_dev_data { 40struct xen_pcibk_dev_data {
43 struct list_head config_fields; 41 struct list_head config_fields;
44 struct pci_saved_state *pci_saved_state;
45 unsigned int permissive:1; 42 unsigned int permissive:1;
46 unsigned int warned_on_write:1; 43 unsigned int warned_on_write:1;
47 unsigned int enable_intx:1; 44 unsigned int enable_intx:1;
@@ -92,7 +89,7 @@ typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
92 * passthrough - BDFs are exactly like in the host. 89 * passthrough - BDFs are exactly like in the host.
93 */ 90 */
94struct xen_pcibk_backend { 91struct xen_pcibk_backend {
95 const char *name; 92 char *name;
96 int (*init)(struct xen_pcibk_device *pdev); 93 int (*init)(struct xen_pcibk_device *pdev);
97 void (*free)(struct xen_pcibk_device *pdev); 94 void (*free)(struct xen_pcibk_device *pdev);
98 int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, 95 int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
@@ -107,9 +104,9 @@ struct xen_pcibk_backend {
107 unsigned int devfn); 104 unsigned int devfn);
108}; 105};
109 106
110extern const struct xen_pcibk_backend xen_pcibk_vpci_backend; 107extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
111extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend; 108extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
112extern const struct xen_pcibk_backend *xen_pcibk_backend; 109extern struct xen_pcibk_backend *xen_pcibk_backend;
113 110
114static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, 111static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
115 struct pci_dev *dev, 112 struct pci_dev *dev,
@@ -119,14 +116,13 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
119 if (xen_pcibk_backend && xen_pcibk_backend->add) 116 if (xen_pcibk_backend && xen_pcibk_backend->add)
120 return xen_pcibk_backend->add(pdev, dev, devid, publish_cb); 117 return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
121 return -1; 118 return -1;
122} 119};
123
124static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, 120static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
125 struct pci_dev *dev) 121 struct pci_dev *dev)
126{ 122{
127 if (xen_pcibk_backend && xen_pcibk_backend->free) 123 if (xen_pcibk_backend && xen_pcibk_backend->free)
128 return xen_pcibk_backend->release(pdev, dev); 124 return xen_pcibk_backend->release(pdev, dev);
129} 125};
130 126
131static inline struct pci_dev * 127static inline struct pci_dev *
132xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, 128xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
@@ -135,8 +131,7 @@ xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
135 if (xen_pcibk_backend && xen_pcibk_backend->get) 131 if (xen_pcibk_backend && xen_pcibk_backend->get)
136 return xen_pcibk_backend->get(pdev, domain, bus, devfn); 132 return xen_pcibk_backend->get(pdev, domain, bus, devfn);
137 return NULL; 133 return NULL;
138} 134};
139
140/** 135/**
141* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk 136* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
142* before sending aer request to pcifront, so that guest could identify 137* before sending aer request to pcifront, so that guest could identify
@@ -153,29 +148,25 @@ static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
153 return xen_pcibk_backend->find(pcidev, pdev, domain, bus, 148 return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
154 devfn); 149 devfn);
155 return -1; 150 return -1;
156} 151};
157
158static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) 152static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
159{ 153{
160 if (xen_pcibk_backend && xen_pcibk_backend->init) 154 if (xen_pcibk_backend && xen_pcibk_backend->init)
161 return xen_pcibk_backend->init(pdev); 155 return xen_pcibk_backend->init(pdev);
162 return -1; 156 return -1;
163} 157};
164
165static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, 158static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
166 publish_pci_root_cb cb) 159 publish_pci_root_cb cb)
167{ 160{
168 if (xen_pcibk_backend && xen_pcibk_backend->publish) 161 if (xen_pcibk_backend && xen_pcibk_backend->publish)
169 return xen_pcibk_backend->publish(pdev, cb); 162 return xen_pcibk_backend->publish(pdev, cb);
170 return -1; 163 return -1;
171} 164};
172
173static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) 165static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
174{ 166{
175 if (xen_pcibk_backend && xen_pcibk_backend->free) 167 if (xen_pcibk_backend && xen_pcibk_backend->free)
176 return xen_pcibk_backend->free(pdev); 168 return xen_pcibk_backend->free(pdev);
177} 169};
178
179/* Handles events from front-end */ 170/* Handles events from front-end */
180irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); 171irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
181void xen_pcibk_do_op(struct work_struct *data); 172void xen_pcibk_do_op(struct work_struct *data);
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 97f5d264c31..8c95c3415b7 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -10,6 +10,7 @@
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include "pciback.h" 11#include "pciback.h"
12 12
13#define DRV_NAME "xen-pciback"
13int verbose_request; 14int verbose_request;
14module_param(verbose_request, int, 0644); 15module_param(verbose_request, int, 0644);
15 16
@@ -234,7 +235,7 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
234 if (dev_data) 235 if (dev_data)
235 dev_data->ack_intr = 0; 236 dev_data->ack_intr = 0;
236 237
237 return result > 0 ? 0 : result; 238 return result;
238} 239}
239 240
240static 241static
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 0f478ac483c..4a42cfb0959 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -8,15 +8,16 @@
8#include <linux/list.h> 8#include <linux/list.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/pci.h> 10#include <linux/pci.h>
11#include <linux/mutex.h> 11#include <linux/spinlock.h>
12#include "pciback.h" 12#include "pciback.h"
13 13
14#define PCI_SLOT_MAX 32 14#define PCI_SLOT_MAX 32
15#define DRV_NAME "xen-pciback"
15 16
16struct vpci_dev_data { 17struct vpci_dev_data {
17 /* Access to dev_list must be protected by lock */ 18 /* Access to dev_list must be protected by lock */
18 struct list_head dev_list[PCI_SLOT_MAX]; 19 struct list_head dev_list[PCI_SLOT_MAX];
19 struct mutex lock; 20 spinlock_t lock;
20}; 21};
21 22
22static inline struct list_head *list_first(struct list_head *head) 23static inline struct list_head *list_first(struct list_head *head)
@@ -32,12 +33,13 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
32 struct pci_dev_entry *entry; 33 struct pci_dev_entry *entry;
33 struct pci_dev *dev = NULL; 34 struct pci_dev *dev = NULL;
34 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; 35 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
36 unsigned long flags;
35 37
36 if (domain != 0 || bus != 0) 38 if (domain != 0 || bus != 0)
37 return NULL; 39 return NULL;
38 40
39 if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { 41 if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
40 mutex_lock(&vpci_dev->lock); 42 spin_lock_irqsave(&vpci_dev->lock, flags);
41 43
42 list_for_each_entry(entry, 44 list_for_each_entry(entry,
43 &vpci_dev->dev_list[PCI_SLOT(devfn)], 45 &vpci_dev->dev_list[PCI_SLOT(devfn)],
@@ -48,7 +50,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
48 } 50 }
49 } 51 }
50 52
51 mutex_unlock(&vpci_dev->lock); 53 spin_unlock_irqrestore(&vpci_dev->lock, flags);
52 } 54 }
53 return dev; 55 return dev;
54} 56}
@@ -69,6 +71,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
69 int err = 0, slot, func = -1; 71 int err = 0, slot, func = -1;
70 struct pci_dev_entry *t, *dev_entry; 72 struct pci_dev_entry *t, *dev_entry;
71 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; 73 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
74 unsigned long flags;
72 75
73 if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { 76 if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
74 err = -EFAULT; 77 err = -EFAULT;
@@ -87,17 +90,11 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
87 90
88 dev_entry->dev = dev; 91 dev_entry->dev = dev;
89 92
90 mutex_lock(&vpci_dev->lock); 93 spin_lock_irqsave(&vpci_dev->lock, flags);
91
92 /*
93 * Keep multi-function devices together on the virtual PCI bus, except
94 * virtual functions.
95 */
96 if (!dev->is_virtfn) {
97 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
98 if (list_empty(&vpci_dev->dev_list[slot]))
99 continue;
100 94
95 /* Keep multi-function devices together on the virtual PCI bus */
96 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
97 if (!list_empty(&vpci_dev->dev_list[slot])) {
101 t = list_entry(list_first(&vpci_dev->dev_list[slot]), 98 t = list_entry(list_first(&vpci_dev->dev_list[slot]),
102 struct pci_dev_entry, list); 99 struct pci_dev_entry, list);
103 100
@@ -122,7 +119,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
122 pci_name(dev), slot); 119 pci_name(dev), slot);
123 list_add_tail(&dev_entry->list, 120 list_add_tail(&dev_entry->list,
124 &vpci_dev->dev_list[slot]); 121 &vpci_dev->dev_list[slot]);
125 func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn); 122 func = PCI_FUNC(dev->devfn);
126 goto unlock; 123 goto unlock;
127 } 124 }
128 } 125 }
@@ -132,7 +129,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
132 "No more space on root virtual PCI bus"); 129 "No more space on root virtual PCI bus");
133 130
134unlock: 131unlock:
135 mutex_unlock(&vpci_dev->lock); 132 spin_unlock_irqrestore(&vpci_dev->lock, flags);
136 133
137 /* Publish this device. */ 134 /* Publish this device. */
138 if (!err) 135 if (!err)
@@ -148,13 +145,14 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
148 int slot; 145 int slot;
149 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; 146 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
150 struct pci_dev *found_dev = NULL; 147 struct pci_dev *found_dev = NULL;
148 unsigned long flags;
151 149
152 mutex_lock(&vpci_dev->lock); 150 spin_lock_irqsave(&vpci_dev->lock, flags);
153 151
154 for (slot = 0; slot < PCI_SLOT_MAX; slot++) { 152 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
155 struct pci_dev_entry *e; 153 struct pci_dev_entry *e, *tmp;
156 154 list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
157 list_for_each_entry(e, &vpci_dev->dev_list[slot], list) { 155 list) {
158 if (e->dev == dev) { 156 if (e->dev == dev) {
159 list_del(&e->list); 157 list_del(&e->list);
160 found_dev = e->dev; 158 found_dev = e->dev;
@@ -165,7 +163,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
165 } 163 }
166 164
167out: 165out:
168 mutex_unlock(&vpci_dev->lock); 166 spin_unlock_irqrestore(&vpci_dev->lock, flags);
169 167
170 if (found_dev) 168 if (found_dev)
171 pcistub_put_pci_dev(found_dev); 169 pcistub_put_pci_dev(found_dev);
@@ -180,7 +178,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
180 if (!vpci_dev) 178 if (!vpci_dev)
181 return -ENOMEM; 179 return -ENOMEM;
182 180
183 mutex_init(&vpci_dev->lock); 181 spin_lock_init(&vpci_dev->lock);
184 182
185 for (slot = 0; slot < PCI_SLOT_MAX; slot++) 183 for (slot = 0; slot < PCI_SLOT_MAX; slot++)
186 INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); 184 INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
@@ -224,9 +222,10 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
224 struct pci_dev_entry *entry; 222 struct pci_dev_entry *entry;
225 struct pci_dev *dev = NULL; 223 struct pci_dev *dev = NULL;
226 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; 224 struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
225 unsigned long flags;
227 int found = 0, slot; 226 int found = 0, slot;
228 227
229 mutex_lock(&vpci_dev->lock); 228 spin_lock_irqsave(&vpci_dev->lock, flags);
230 for (slot = 0; slot < PCI_SLOT_MAX; slot++) { 229 for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
231 list_for_each_entry(entry, 230 list_for_each_entry(entry,
232 &vpci_dev->dev_list[slot], 231 &vpci_dev->dev_list[slot],
@@ -244,11 +243,11 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
244 } 243 }
245 } 244 }
246 } 245 }
247 mutex_unlock(&vpci_dev->lock); 246 spin_unlock_irqrestore(&vpci_dev->lock, flags);
248 return found; 247 return found;
249} 248}
250 249
251const struct xen_pcibk_backend xen_pcibk_vpci_backend = { 250struct xen_pcibk_backend xen_pcibk_vpci_backend = {
252 .name = "vpci", 251 .name = "vpci",
253 .init = __xen_pcibk_init_devices, 252 .init = __xen_pcibk_init_devices,
254 .free = __xen_pcibk_release_devices, 253 .free = __xen_pcibk_release_devices,
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f99eac..978d2c6f5dc 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -13,10 +13,11 @@
13#include <asm/xen/pci.h> 13#include <asm/xen/pci.h>
14#include "pciback.h" 14#include "pciback.h"
15 15
16#define DRV_NAME "xen-pciback"
16#define INVALID_EVTCHN_IRQ (-1) 17#define INVALID_EVTCHN_IRQ (-1)
17struct workqueue_struct *xen_pcibk_wq; 18struct workqueue_struct *xen_pcibk_wq;
18 19
19static bool __read_mostly passthrough; 20static int __read_mostly passthrough;
20module_param(passthrough, bool, S_IRUGO); 21module_param(passthrough, bool, S_IRUGO);
21MODULE_PARM_DESC(passthrough, 22MODULE_PARM_DESC(passthrough,
22 "Option to specify how to export PCI topology to guest:\n"\ 23 "Option to specify how to export PCI topology to guest:\n"\
@@ -43,7 +44,7 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
43 pdev->xdev = xdev; 44 pdev->xdev = xdev;
44 dev_set_drvdata(&xdev->dev, pdev); 45 dev_set_drvdata(&xdev->dev, pdev);
45 46
46 mutex_init(&pdev->dev_lock); 47 spin_lock_init(&pdev->dev_lock);
47 48
48 pdev->sh_info = NULL; 49 pdev->sh_info = NULL;
49 pdev->evtchn_irq = INVALID_EVTCHN_IRQ; 50 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
@@ -61,12 +62,14 @@ out:
61 62
62static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) 63static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
63{ 64{
64 mutex_lock(&pdev->dev_lock); 65 spin_lock(&pdev->dev_lock);
66
65 /* Ensure the guest can't trigger our handler before removing devices */ 67 /* Ensure the guest can't trigger our handler before removing devices */
66 if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { 68 if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
67 unbind_from_irqhandler(pdev->evtchn_irq, pdev); 69 unbind_from_irqhandler(pdev->evtchn_irq, pdev);
68 pdev->evtchn_irq = INVALID_EVTCHN_IRQ; 70 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
69 } 71 }
72 spin_unlock(&pdev->dev_lock);
70 73
71 /* If the driver domain started an op, make sure we complete it 74 /* If the driver domain started an op, make sure we complete it
72 * before releasing the shared memory */ 75 * before releasing the shared memory */
@@ -74,11 +77,13 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
74 /* Note, the workqueue does not use spinlocks at all.*/ 77 /* Note, the workqueue does not use spinlocks at all.*/
75 flush_workqueue(xen_pcibk_wq); 78 flush_workqueue(xen_pcibk_wq);
76 79
80 spin_lock(&pdev->dev_lock);
77 if (pdev->sh_info != NULL) { 81 if (pdev->sh_info != NULL) {
78 xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); 82 xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
79 pdev->sh_info = NULL; 83 pdev->sh_info = NULL;
80 } 84 }
81 mutex_unlock(&pdev->dev_lock); 85 spin_unlock(&pdev->dev_lock);
86
82} 87}
83 88
84static void free_pdev(struct xen_pcibk_device *pdev) 89static void free_pdev(struct xen_pcibk_device *pdev)
@@ -115,7 +120,9 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
115 goto out; 120 goto out;
116 } 121 }
117 122
123 spin_lock(&pdev->dev_lock);
118 pdev->sh_info = vaddr; 124 pdev->sh_info = vaddr;
125 spin_unlock(&pdev->dev_lock);
119 126
120 err = bind_interdomain_evtchn_to_irqhandler( 127 err = bind_interdomain_evtchn_to_irqhandler(
121 pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, 128 pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
@@ -125,7 +132,10 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
125 "Error binding event channel to IRQ"); 132 "Error binding event channel to IRQ");
126 goto out; 133 goto out;
127 } 134 }
135
136 spin_lock(&pdev->dev_lock);
128 pdev->evtchn_irq = err; 137 pdev->evtchn_irq = err;
138 spin_unlock(&pdev->dev_lock);
129 err = 0; 139 err = 0;
130 140
131 dev_dbg(&pdev->xdev->dev, "Attached!\n"); 141 dev_dbg(&pdev->xdev->dev, "Attached!\n");
@@ -140,7 +150,6 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
140 char *magic = NULL; 150 char *magic = NULL;
141 151
142 152
143 mutex_lock(&pdev->dev_lock);
144 /* Make sure we only do this setup once */ 153 /* Make sure we only do this setup once */
145 if (xenbus_read_driver_state(pdev->xdev->nodename) != 154 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
146 XenbusStateInitialised) 155 XenbusStateInitialised)
@@ -167,7 +176,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
167 if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { 176 if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
168 xenbus_dev_fatal(pdev->xdev, -EFAULT, 177 xenbus_dev_fatal(pdev->xdev, -EFAULT,
169 "version mismatch (%s/%s) with pcifront - " 178 "version mismatch (%s/%s) with pcifront - "
170 "halting " DRV_NAME, 179 "halting xen_pcibk",
171 magic, XEN_PCI_MAGIC); 180 magic, XEN_PCI_MAGIC);
172 goto out; 181 goto out;
173 } 182 }
@@ -185,7 +194,6 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
185 194
186 dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); 195 dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
187out: 196out:
188 mutex_unlock(&pdev->dev_lock);
189 197
190 kfree(magic); 198 kfree(magic);
191 199
@@ -206,7 +214,6 @@ static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
206 goto out; 214 goto out;
207 } 215 }
208 216
209 /* Note: The PV protocol uses %02x, don't change it */
210 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, 217 err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
211 "%04x:%02x:%02x.%02x", domain, bus, 218 "%04x:%02x:%02x.%02x", domain, bus,
212 PCI_SLOT(devfn), PCI_FUNC(devfn)); 219 PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -230,7 +237,7 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
230 err = -EINVAL; 237 err = -EINVAL;
231 xenbus_dev_fatal(pdev->xdev, err, 238 xenbus_dev_fatal(pdev->xdev, err,
232 "Couldn't locate PCI device " 239 "Couldn't locate PCI device "
233 "(%04x:%02x:%02x.%d)! " 240 "(%04x:%02x:%02x.%01x)! "
234 "perhaps already in-use?", 241 "perhaps already in-use?",
235 domain, bus, slot, func); 242 domain, bus, slot, func);
236 goto out; 243 goto out;
@@ -244,8 +251,8 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
244 dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); 251 dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
245 if (xen_register_device_domain_owner(dev, 252 if (xen_register_device_domain_owner(dev,
246 pdev->xdev->otherend_id) != 0) { 253 pdev->xdev->otherend_id) != 0) {
247 dev_err(&dev->dev, "Stealing ownership from dom%d.\n", 254 dev_err(&dev->dev, "device has been assigned to another " \
248 xen_find_device_domain_owner(dev)); 255 "domain! Over-writting the ownership, but beware.\n");
249 xen_unregister_device_domain_owner(dev); 256 xen_unregister_device_domain_owner(dev);
250 xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); 257 xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
251 } 258 }
@@ -275,7 +282,7 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
275 if (!dev) { 282 if (!dev) {
276 err = -EINVAL; 283 err = -EINVAL;
277 dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " 284 dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
278 "(%04x:%02x:%02x.%d)! not owned by this domain\n", 285 "(%04x:%02x:%02x.%01x)! not owned by this domain\n",
279 domain, bus, slot, func); 286 domain, bus, slot, func);
280 goto out; 287 goto out;
281 } 288 }
@@ -362,7 +369,6 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
362 369
363 dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); 370 dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
364 371
365 mutex_lock(&pdev->dev_lock);
366 /* Make sure we only reconfigure once */ 372 /* Make sure we only reconfigure once */
367 if (xenbus_read_driver_state(pdev->xdev->nodename) != 373 if (xenbus_read_driver_state(pdev->xdev->nodename) !=
368 XenbusStateReconfiguring) 374 XenbusStateReconfiguring)
@@ -500,7 +506,6 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
500 } 506 }
501 507
502out: 508out:
503 mutex_unlock(&pdev->dev_lock);
504 return 0; 509 return 0;
505} 510}
506 511
@@ -557,7 +562,6 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
557 char dev_str[64]; 562 char dev_str[64];
558 char state_str[64]; 563 char state_str[64];
559 564
560 mutex_lock(&pdev->dev_lock);
561 /* It's possible we could get the call to setup twice, so make sure 565 /* It's possible we could get the call to setup twice, so make sure
562 * we're not already connected. 566 * we're not already connected.
563 */ 567 */
@@ -638,10 +642,10 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
638 "Error switching to initialised state!"); 642 "Error switching to initialised state!");
639 643
640out: 644out:
641 mutex_unlock(&pdev->dev_lock);
642 if (!err) 645 if (!err)
643 /* see if pcifront is already configured (if not, we'll wait) */ 646 /* see if pcifront is already configured (if not, we'll wait) */
644 xen_pcibk_attach(pdev); 647 xen_pcibk_attach(pdev);
648
645 return err; 649 return err;
646} 650}
647 651
@@ -706,18 +710,21 @@ static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
706 return 0; 710 return 0;
707} 711}
708 712
709static const struct xenbus_device_id xen_pcibk_ids[] = { 713static const struct xenbus_device_id xenpci_ids[] = {
710 {"pci"}, 714 {"pci"},
711 {""}, 715 {""},
712}; 716};
713 717
714static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME, 718static struct xenbus_driver xenbus_xen_pcibk_driver = {
719 .name = DRV_NAME,
720 .owner = THIS_MODULE,
721 .ids = xenpci_ids,
715 .probe = xen_pcibk_xenbus_probe, 722 .probe = xen_pcibk_xenbus_probe,
716 .remove = xen_pcibk_xenbus_remove, 723 .remove = xen_pcibk_xenbus_remove,
717 .otherend_changed = xen_pcibk_frontend_changed, 724 .otherend_changed = xen_pcibk_frontend_changed,
718); 725};
719 726
720const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; 727struct xen_pcibk_backend *xen_pcibk_backend;
721 728
722int __init xen_pcibk_xenbus_register(void) 729int __init xen_pcibk_xenbus_register(void)
723{ 730{
@@ -731,11 +738,11 @@ int __init xen_pcibk_xenbus_register(void)
731 if (passthrough) 738 if (passthrough)
732 xen_pcibk_backend = &xen_pcibk_passthrough_backend; 739 xen_pcibk_backend = &xen_pcibk_passthrough_backend;
733 pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name); 740 pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
734 return xenbus_register_backend(&xen_pcibk_driver); 741 return xenbus_register_backend(&xenbus_xen_pcibk_driver);
735} 742}
736 743
737void __exit xen_pcibk_xenbus_unregister(void) 744void __exit xen_pcibk_xenbus_unregister(void)
738{ 745{
739 destroy_workqueue(xen_pcibk_wq); 746 destroy_workqueue(xen_pcibk_wq);
740 xenbus_unregister_driver(&xen_pcibk_driver); 747 xenbus_unregister_driver(&xenbus_xen_pcibk_driver);
741} 748}
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 2552d3e0a70..6ea852e2516 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -68,13 +68,10 @@
68 */ 68 */
69 69
70#include <linux/kernel.h> 70#include <linux/kernel.h>
71#include <linux/bootmem.h>
72#include <linux/swap.h>
73#include <linux/mm.h> 71#include <linux/mm.h>
74#include <linux/mman.h> 72#include <linux/mman.h>
75#include <linux/module.h> 73#include <linux/module.h>
76#include <linux/workqueue.h> 74#include <linux/workqueue.h>
77#include <linux/device.h>
78#include <xen/balloon.h> 75#include <xen/balloon.h>
79#include <xen/tmem.h> 76#include <xen/tmem.h>
80#include <xen/xen.h> 77#include <xen/xen.h>
@@ -96,21 +93,6 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1;
96/* In HZ, controls frequency of worker invocation. */ 93/* In HZ, controls frequency of worker invocation. */
97static unsigned int selfballoon_interval __read_mostly = 5; 94static unsigned int selfballoon_interval __read_mostly = 5;
98 95
99/*
100 * Minimum usable RAM in MB for selfballooning target for balloon.
101 * If non-zero, it is added to totalreserve_pages and self-ballooning
102 * will not balloon below the sum. If zero, a piecewise linear function
103 * is calculated as a minimum and added to totalreserve_pages. Note that
104 * setting this value indiscriminately may cause OOMs and crashes.
105 */
106static unsigned int selfballoon_min_usable_mb;
107
108/*
109 * Amount of RAM in MB to add to the target number of pages.
110 * Can be used to reserve some more room for caches and the like.
111 */
112static unsigned int selfballoon_reserved_mb;
113
114static void selfballoon_process(struct work_struct *work); 96static void selfballoon_process(struct work_struct *work);
115static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); 97static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
116 98
@@ -207,24 +189,20 @@ static int __init xen_selfballooning_setup(char *s)
207__setup("selfballooning", xen_selfballooning_setup); 189__setup("selfballooning", xen_selfballooning_setup);
208#endif /* CONFIG_FRONTSWAP */ 190#endif /* CONFIG_FRONTSWAP */
209 191
210#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
211
212/* 192/*
213 * Use current balloon size, the goal (vm_committed_as), and hysteresis 193 * Use current balloon size, the goal (vm_committed_as), and hysteresis
214 * parameters to set a new target balloon size 194 * parameters to set a new target balloon size
215 */ 195 */
216static void selfballoon_process(struct work_struct *work) 196static void selfballoon_process(struct work_struct *work)
217{ 197{
218 unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; 198 unsigned long cur_pages, goal_pages, tgt_pages;
219 unsigned long useful_pages;
220 bool reset_timer = false; 199 bool reset_timer = false;
221 200
222 if (xen_selfballooning_enabled) { 201 if (xen_selfballooning_enabled) {
223 cur_pages = totalram_pages; 202 cur_pages = balloon_stats.current_pages;
224 tgt_pages = cur_pages; /* default is no change */ 203 tgt_pages = cur_pages; /* default is no change */
225 goal_pages = vm_memory_committed() + 204 goal_pages = percpu_counter_read_positive(&vm_committed_as) +
226 totalreserve_pages + 205 balloon_stats.current_pages - totalram_pages;
227 MB2PAGES(selfballoon_reserved_mb);
228#ifdef CONFIG_FRONTSWAP 206#ifdef CONFIG_FRONTSWAP
229 /* allow space for frontswap pages to be repatriated */ 207 /* allow space for frontswap pages to be repatriated */
230 if (frontswap_selfshrinking && frontswap_enabled) 208 if (frontswap_selfshrinking && frontswap_enabled)
@@ -239,26 +217,7 @@ static void selfballoon_process(struct work_struct *work)
239 ((goal_pages - cur_pages) / 217 ((goal_pages - cur_pages) /
240 selfballoon_uphysteresis); 218 selfballoon_uphysteresis);
241 /* else if cur_pages == goal_pages, no change */ 219 /* else if cur_pages == goal_pages, no change */
242 useful_pages = max_pfn - totalreserve_pages; 220 balloon_set_new_target(tgt_pages);
243 if (selfballoon_min_usable_mb != 0)
244 floor_pages = totalreserve_pages +
245 MB2PAGES(selfballoon_min_usable_mb);
246 /* piecewise linear function ending in ~3% slope */
247 else if (useful_pages < MB2PAGES(16))
248 floor_pages = max_pfn; /* not worth ballooning */
249 else if (useful_pages < MB2PAGES(64))
250 floor_pages = totalreserve_pages + MB2PAGES(16) +
251 ((useful_pages - MB2PAGES(16)) >> 1);
252 else if (useful_pages < MB2PAGES(512))
253 floor_pages = totalreserve_pages + MB2PAGES(40) +
254 ((useful_pages - MB2PAGES(40)) >> 3);
255 else /* useful_pages >= MB2PAGES(512) */
256 floor_pages = totalreserve_pages + MB2PAGES(99) +
257 ((useful_pages - MB2PAGES(99)) >> 5);
258 if (tgt_pages < floor_pages)
259 tgt_pages = floor_pages;
260 balloon_set_new_target(tgt_pages +
261 balloon_stats.current_pages - totalram_pages);
262 reset_timer = true; 221 reset_timer = true;
263 } 222 }
264#ifdef CONFIG_FRONTSWAP 223#ifdef CONFIG_FRONTSWAP
@@ -274,20 +233,21 @@ static void selfballoon_process(struct work_struct *work)
274 233
275#ifdef CONFIG_SYSFS 234#ifdef CONFIG_SYSFS
276 235
236#include <linux/sysdev.h>
277#include <linux/capability.h> 237#include <linux/capability.h>
278 238
279#define SELFBALLOON_SHOW(name, format, args...) \ 239#define SELFBALLOON_SHOW(name, format, args...) \
280 static ssize_t show_##name(struct device *dev, \ 240 static ssize_t show_##name(struct sys_device *dev, \
281 struct device_attribute *attr, \ 241 struct sysdev_attribute *attr, \
282 char *buf) \ 242 char *buf) \
283 { \ 243 { \
284 return sprintf(buf, format, ##args); \ 244 return sprintf(buf, format, ##args); \
285 } 245 }
286 246
287SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); 247SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled);
288 248
289static ssize_t store_selfballooning(struct device *dev, 249static ssize_t store_selfballooning(struct sys_device *dev,
290 struct device_attribute *attr, 250 struct sysdev_attribute *attr,
291 const char *buf, 251 const char *buf,
292 size_t count) 252 size_t count)
293{ 253{
@@ -310,13 +270,13 @@ static ssize_t store_selfballooning(struct device *dev,
310 return count; 270 return count;
311} 271}
312 272
313static DEVICE_ATTR(selfballooning, S_IRUGO | S_IWUSR, 273static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR,
314 show_selfballooning, store_selfballooning); 274 show_selfballooning, store_selfballooning);
315 275
316SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); 276SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval);
317 277
318static ssize_t store_selfballoon_interval(struct device *dev, 278static ssize_t store_selfballoon_interval(struct sys_device *dev,
319 struct device_attribute *attr, 279 struct sysdev_attribute *attr,
320 const char *buf, 280 const char *buf,
321 size_t count) 281 size_t count)
322{ 282{
@@ -332,13 +292,13 @@ static ssize_t store_selfballoon_interval(struct device *dev,
332 return count; 292 return count;
333} 293}
334 294
335static DEVICE_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, 295static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR,
336 show_selfballoon_interval, store_selfballoon_interval); 296 show_selfballoon_interval, store_selfballoon_interval);
337 297
338SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); 298SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis);
339 299
340static ssize_t store_selfballoon_downhys(struct device *dev, 300static ssize_t store_selfballoon_downhys(struct sys_device *dev,
341 struct device_attribute *attr, 301 struct sysdev_attribute *attr,
342 const char *buf, 302 const char *buf,
343 size_t count) 303 size_t count)
344{ 304{
@@ -354,14 +314,14 @@ static ssize_t store_selfballoon_downhys(struct device *dev,
354 return count; 314 return count;
355} 315}
356 316
357static DEVICE_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, 317static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR,
358 show_selfballoon_downhys, store_selfballoon_downhys); 318 show_selfballoon_downhys, store_selfballoon_downhys);
359 319
360 320
361SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); 321SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis);
362 322
363static ssize_t store_selfballoon_uphys(struct device *dev, 323static ssize_t store_selfballoon_uphys(struct sys_device *dev,
364 struct device_attribute *attr, 324 struct sysdev_attribute *attr,
365 const char *buf, 325 const char *buf,
366 size_t count) 326 size_t count)
367{ 327{
@@ -377,63 +337,14 @@ static ssize_t store_selfballoon_uphys(struct device *dev,
377 return count; 337 return count;
378} 338}
379 339
380static DEVICE_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, 340static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
381 show_selfballoon_uphys, store_selfballoon_uphys); 341 show_selfballoon_uphys, store_selfballoon_uphys);
382 342
383SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n",
384 selfballoon_min_usable_mb);
385
386static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
387 struct device_attribute *attr,
388 const char *buf,
389 size_t count)
390{
391 unsigned long val;
392 int err;
393
394 if (!capable(CAP_SYS_ADMIN))
395 return -EPERM;
396 err = strict_strtoul(buf, 10, &val);
397 if (err || val == 0)
398 return -EINVAL;
399 selfballoon_min_usable_mb = val;
400 return count;
401}
402
403static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
404 show_selfballoon_min_usable_mb,
405 store_selfballoon_min_usable_mb);
406
407SELFBALLOON_SHOW(selfballoon_reserved_mb, "%d\n",
408 selfballoon_reserved_mb);
409
410static ssize_t store_selfballoon_reserved_mb(struct device *dev,
411 struct device_attribute *attr,
412 const char *buf,
413 size_t count)
414{
415 unsigned long val;
416 int err;
417
418 if (!capable(CAP_SYS_ADMIN))
419 return -EPERM;
420 err = strict_strtoul(buf, 10, &val);
421 if (err || val == 0)
422 return -EINVAL;
423 selfballoon_reserved_mb = val;
424 return count;
425}
426
427static DEVICE_ATTR(selfballoon_reserved_mb, S_IRUGO | S_IWUSR,
428 show_selfballoon_reserved_mb,
429 store_selfballoon_reserved_mb);
430
431
432#ifdef CONFIG_FRONTSWAP 343#ifdef CONFIG_FRONTSWAP
433SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); 344SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
434 345
435static ssize_t store_frontswap_selfshrinking(struct device *dev, 346static ssize_t store_frontswap_selfshrinking(struct sys_device *dev,
436 struct device_attribute *attr, 347 struct sysdev_attribute *attr,
437 const char *buf, 348 const char *buf,
438 size_t count) 349 size_t count)
439{ 350{
@@ -455,13 +366,13 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev,
455 return count; 366 return count;
456} 367}
457 368
458static DEVICE_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, 369static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR,
459 show_frontswap_selfshrinking, store_frontswap_selfshrinking); 370 show_frontswap_selfshrinking, store_frontswap_selfshrinking);
460 371
461SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); 372SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia);
462 373
463static ssize_t store_frontswap_inertia(struct device *dev, 374static ssize_t store_frontswap_inertia(struct sys_device *dev,
464 struct device_attribute *attr, 375 struct sysdev_attribute *attr,
465 const char *buf, 376 const char *buf,
466 size_t count) 377 size_t count)
467{ 378{
@@ -478,13 +389,13 @@ static ssize_t store_frontswap_inertia(struct device *dev,
478 return count; 389 return count;
479} 390}
480 391
481static DEVICE_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, 392static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR,
482 show_frontswap_inertia, store_frontswap_inertia); 393 show_frontswap_inertia, store_frontswap_inertia);
483 394
484SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); 395SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis);
485 396
486static ssize_t store_frontswap_hysteresis(struct device *dev, 397static ssize_t store_frontswap_hysteresis(struct sys_device *dev,
487 struct device_attribute *attr, 398 struct sysdev_attribute *attr,
488 const char *buf, 399 const char *buf,
489 size_t count) 400 size_t count)
490{ 401{
@@ -500,38 +411,36 @@ static ssize_t store_frontswap_hysteresis(struct device *dev,
500 return count; 411 return count;
501} 412}
502 413
503static DEVICE_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, 414static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR,
504 show_frontswap_hysteresis, store_frontswap_hysteresis); 415 show_frontswap_hysteresis, store_frontswap_hysteresis);
505 416
506#endif /* CONFIG_FRONTSWAP */ 417#endif /* CONFIG_FRONTSWAP */
507 418
508static struct attribute *selfballoon_attrs[] = { 419static struct attribute *selfballoon_attrs[] = {
509 &dev_attr_selfballooning.attr, 420 &attr_selfballooning.attr,
510 &dev_attr_selfballoon_interval.attr, 421 &attr_selfballoon_interval.attr,
511 &dev_attr_selfballoon_downhysteresis.attr, 422 &attr_selfballoon_downhysteresis.attr,
512 &dev_attr_selfballoon_uphysteresis.attr, 423 &attr_selfballoon_uphysteresis.attr,
513 &dev_attr_selfballoon_min_usable_mb.attr,
514 &dev_attr_selfballoon_reserved_mb.attr,
515#ifdef CONFIG_FRONTSWAP 424#ifdef CONFIG_FRONTSWAP
516 &dev_attr_frontswap_selfshrinking.attr, 425 &attr_frontswap_selfshrinking.attr,
517 &dev_attr_frontswap_hysteresis.attr, 426 &attr_frontswap_hysteresis.attr,
518 &dev_attr_frontswap_inertia.attr, 427 &attr_frontswap_inertia.attr,
519#endif 428#endif
520 NULL 429 NULL
521}; 430};
522 431
523static const struct attribute_group selfballoon_group = { 432static struct attribute_group selfballoon_group = {
524 .name = "selfballoon", 433 .name = "selfballoon",
525 .attrs = selfballoon_attrs 434 .attrs = selfballoon_attrs
526}; 435};
527#endif 436#endif
528 437
529int register_xen_selfballooning(struct device *dev) 438int register_xen_selfballooning(struct sys_device *sysdev)
530{ 439{
531 int error = -1; 440 int error = -1;
532 441
533#ifdef CONFIG_SYSFS 442#ifdef CONFIG_SYSFS
534 error = sysfs_create_group(&dev->kobj, &selfballoon_group); 443 error = sysfs_create_group(&sysdev->kobj, &selfballoon_group);
535#endif 444#endif
536 return error; 445 return error;
537} 446}
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
index 31e2e9050c7..8dca685358b 100644
--- a/drivers/xen/xenbus/Makefile
+++ b/drivers/xen/xenbus/Makefile
@@ -1,5 +1,4 @@
1obj-y += xenbus.o 1obj-y += xenbus.o
2obj-y += xenbus_dev_frontend.o
3 2
4xenbus-objs = 3xenbus-objs =
5xenbus-objs += xenbus_client.o 4xenbus-objs += xenbus_client.o
@@ -10,5 +9,4 @@ xenbus-objs += xenbus_probe.o
10xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o 9xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
11xenbus-objs += $(xenbus-be-objs-y) 10xenbus-objs += $(xenbus-be-objs-y)
12 11
13obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o
14obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o 12obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index bcf3ba4a6ec..cdacf923e07 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -32,39 +32,13 @@
32 32
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/types.h> 34#include <linux/types.h>
35#include <linux/spinlock.h>
36#include <linux/vmalloc.h> 35#include <linux/vmalloc.h>
37#include <linux/export.h>
38#include <asm/xen/hypervisor.h> 36#include <asm/xen/hypervisor.h>
39#include <asm/xen/page.h>
40#include <xen/interface/xen.h> 37#include <xen/interface/xen.h>
41#include <xen/interface/event_channel.h> 38#include <xen/interface/event_channel.h>
42#include <xen/balloon.h>
43#include <xen/events.h> 39#include <xen/events.h>
44#include <xen/grant_table.h> 40#include <xen/grant_table.h>
45#include <xen/xenbus.h> 41#include <xen/xenbus.h>
46#include <xen/xen.h>
47
48#include "xenbus_probe.h"
49
50struct xenbus_map_node {
51 struct list_head next;
52 union {
53 struct vm_struct *area; /* PV */
54 struct page *page; /* HVM */
55 };
56 grant_handle_t handle;
57};
58
59static DEFINE_SPINLOCK(xenbus_valloc_lock);
60static LIST_HEAD(xenbus_valloc_pages);
61
62struct xenbus_ring_ops {
63 int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
64 int (*unmap)(struct xenbus_device *dev, void *vaddr);
65};
66
67static const struct xenbus_ring_ops *ring_ops __read_mostly;
68 42
69const char *xenbus_strstate(enum xenbus_state state) 43const char *xenbus_strstate(enum xenbus_state state)
70{ 44{
@@ -460,93 +434,39 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
460 */ 434 */
461int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) 435int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
462{ 436{
463 return ring_ops->map(dev, gnt_ref, vaddr);
464}
465EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
466
467static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
468 int gnt_ref, void **vaddr)
469{
470 struct gnttab_map_grant_ref op = { 437 struct gnttab_map_grant_ref op = {
471 .flags = GNTMAP_host_map | GNTMAP_contains_pte, 438 .flags = GNTMAP_host_map,
472 .ref = gnt_ref, 439 .ref = gnt_ref,
473 .dom = dev->otherend_id, 440 .dom = dev->otherend_id,
474 }; 441 };
475 struct xenbus_map_node *node;
476 struct vm_struct *area; 442 struct vm_struct *area;
477 pte_t *pte;
478 443
479 *vaddr = NULL; 444 *vaddr = NULL;
480 445
481 node = kzalloc(sizeof(*node), GFP_KERNEL); 446 area = xen_alloc_vm_area(PAGE_SIZE);
482 if (!node) 447 if (!area)
483 return -ENOMEM; 448 return -ENOMEM;
484 449
485 area = alloc_vm_area(PAGE_SIZE, &pte); 450 op.host_addr = (unsigned long)area->addr;
486 if (!area) {
487 kfree(node);
488 return -ENOMEM;
489 }
490
491 op.host_addr = arbitrary_virt_to_machine(pte).maddr;
492 451
493 gnttab_batch_map(&op, 1); 452 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
453 BUG();
494 454
495 if (op.status != GNTST_okay) { 455 if (op.status != GNTST_okay) {
496 free_vm_area(area); 456 xen_free_vm_area(area);
497 kfree(node);
498 xenbus_dev_fatal(dev, op.status, 457 xenbus_dev_fatal(dev, op.status,
499 "mapping in shared page %d from domain %d", 458 "mapping in shared page %d from domain %d",
500 gnt_ref, dev->otherend_id); 459 gnt_ref, dev->otherend_id);
501 return op.status; 460 return op.status;
502 } 461 }
503 462
504 node->handle = op.handle; 463 /* Stuff the handle in an unused field */
505 node->area = area; 464 area->phys_addr = (unsigned long)op.handle;
506
507 spin_lock(&xenbus_valloc_lock);
508 list_add(&node->next, &xenbus_valloc_pages);
509 spin_unlock(&xenbus_valloc_lock);
510 465
511 *vaddr = area->addr; 466 *vaddr = area->addr;
512 return 0; 467 return 0;
513} 468}
514 469EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
515static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
516 int gnt_ref, void **vaddr)
517{
518 struct xenbus_map_node *node;
519 int err;
520 void *addr;
521
522 *vaddr = NULL;
523
524 node = kzalloc(sizeof(*node), GFP_KERNEL);
525 if (!node)
526 return -ENOMEM;
527
528 err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
529 if (err)
530 goto out_err;
531
532 addr = pfn_to_kaddr(page_to_pfn(node->page));
533
534 err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
535 if (err)
536 goto out_err;
537
538 spin_lock(&xenbus_valloc_lock);
539 list_add(&node->next, &xenbus_valloc_pages);
540 spin_unlock(&xenbus_valloc_lock);
541
542 *vaddr = addr;
543 return 0;
544
545 out_err:
546 free_xenballooned_pages(1, &node->page);
547 kfree(node);
548 return err;
549}
550 470
551 471
552/** 472/**
@@ -566,12 +486,15 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
566int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, 486int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
567 grant_handle_t *handle, void *vaddr) 487 grant_handle_t *handle, void *vaddr)
568{ 488{
569 struct gnttab_map_grant_ref op; 489 struct gnttab_map_grant_ref op = {
570 490 .host_addr = (unsigned long)vaddr,
571 gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, 491 .flags = GNTMAP_host_map,
572 dev->otherend_id); 492 .ref = gnt_ref,
493 .dom = dev->otherend_id,
494 };
573 495
574 gnttab_batch_map(&op, 1); 496 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
497 BUG();
575 498
576 if (op.status != GNTST_okay) { 499 if (op.status != GNTST_okay) {
577 xenbus_dev_fatal(dev, op.status, 500 xenbus_dev_fatal(dev, op.status,
@@ -599,87 +522,46 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring);
599 */ 522 */
600int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) 523int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
601{ 524{
602 return ring_ops->unmap(dev, vaddr); 525 struct vm_struct *area;
603}
604EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
605
606static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
607{
608 struct xenbus_map_node *node;
609 struct gnttab_unmap_grant_ref op = { 526 struct gnttab_unmap_grant_ref op = {
610 .host_addr = (unsigned long)vaddr, 527 .host_addr = (unsigned long)vaddr,
611 }; 528 };
612 unsigned int level; 529
613 530 /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
614 spin_lock(&xenbus_valloc_lock); 531 * method so that we don't have to muck with vmalloc internals here.
615 list_for_each_entry(node, &xenbus_valloc_pages, next) { 532 * We could force the user to hang on to their struct vm_struct from
616 if (node->area->addr == vaddr) { 533 * xenbus_map_ring_valloc, but these 6 lines considerably simplify
617 list_del(&node->next); 534 * this API.
618 goto found; 535 */
619 } 536 read_lock(&vmlist_lock);
537 for (area = vmlist; area != NULL; area = area->next) {
538 if (area->addr == vaddr)
539 break;
620 } 540 }
621 node = NULL; 541 read_unlock(&vmlist_lock);
622 found:
623 spin_unlock(&xenbus_valloc_lock);
624 542
625 if (!node) { 543 if (!area) {
626 xenbus_dev_error(dev, -ENOENT, 544 xenbus_dev_error(dev, -ENOENT,
627 "can't find mapped virtual address %p", vaddr); 545 "can't find mapped virtual address %p", vaddr);
628 return GNTST_bad_virt_addr; 546 return GNTST_bad_virt_addr;
629 } 547 }
630 548
631 op.handle = node->handle; 549 op.handle = (grant_handle_t)area->phys_addr;
632 op.host_addr = arbitrary_virt_to_machine(
633 lookup_address((unsigned long)vaddr, &level)).maddr;
634 550
635 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) 551 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
636 BUG(); 552 BUG();
637 553
638 if (op.status == GNTST_okay) 554 if (op.status == GNTST_okay)
639 free_vm_area(node->area); 555 xen_free_vm_area(area);
640 else 556 else
641 xenbus_dev_error(dev, op.status, 557 xenbus_dev_error(dev, op.status,
642 "unmapping page at handle %d error %d", 558 "unmapping page at handle %d error %d",
643 node->handle, op.status); 559 (int16_t)area->phys_addr, op.status);
644 560
645 kfree(node);
646 return op.status; 561 return op.status;
647} 562}
563EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
648 564
649static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
650{
651 int rv;
652 struct xenbus_map_node *node;
653 void *addr;
654
655 spin_lock(&xenbus_valloc_lock);
656 list_for_each_entry(node, &xenbus_valloc_pages, next) {
657 addr = pfn_to_kaddr(page_to_pfn(node->page));
658 if (addr == vaddr) {
659 list_del(&node->next);
660 goto found;
661 }
662 }
663 node = addr = NULL;
664 found:
665 spin_unlock(&xenbus_valloc_lock);
666
667 if (!node) {
668 xenbus_dev_error(dev, -ENOENT,
669 "can't find mapped virtual address %p", vaddr);
670 return GNTST_bad_virt_addr;
671 }
672
673 rv = xenbus_unmap_ring(dev, node->handle, addr);
674
675 if (!rv)
676 free_xenballooned_pages(1, &node->page);
677 else
678 WARN(1, "Leaking %p\n", vaddr);
679
680 kfree(node);
681 return rv;
682}
683 565
684/** 566/**
685 * xenbus_unmap_ring 567 * xenbus_unmap_ring
@@ -694,9 +576,10 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
694int xenbus_unmap_ring(struct xenbus_device *dev, 576int xenbus_unmap_ring(struct xenbus_device *dev,
695 grant_handle_t handle, void *vaddr) 577 grant_handle_t handle, void *vaddr)
696{ 578{
697 struct gnttab_unmap_grant_ref op; 579 struct gnttab_unmap_grant_ref op = {
698 580 .host_addr = (unsigned long)vaddr,
699 gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, handle); 581 .handle = handle,
582 };
700 583
701 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) 584 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
702 BUG(); 585 BUG();
@@ -728,21 +611,3 @@ enum xenbus_state xenbus_read_driver_state(const char *path)
728 return result; 611 return result;
729} 612}
730EXPORT_SYMBOL_GPL(xenbus_read_driver_state); 613EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
731
732static const struct xenbus_ring_ops ring_ops_pv = {
733 .map = xenbus_map_ring_valloc_pv,
734 .unmap = xenbus_unmap_ring_vfree_pv,
735};
736
737static const struct xenbus_ring_ops ring_ops_hvm = {
738 .map = xenbus_map_ring_valloc_hvm,
739 .unmap = xenbus_unmap_ring_vfree_hvm,
740};
741
742void __init xenbus_ring_ops_init(void)
743{
744 if (xen_pv_domain())
745 ring_ops = &ring_ops_pv;
746 else
747 ring_ops = &ring_ops_hvm;
748}
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index c5aa55c5d37..090c61ee8fd 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -212,9 +212,7 @@ int xb_init_comms(void)
212 printk(KERN_WARNING "XENBUS response ring is not quiescent " 212 printk(KERN_WARNING "XENBUS response ring is not quiescent "
213 "(%08x:%08x): fixing up\n", 213 "(%08x:%08x): fixing up\n",
214 intf->rsp_cons, intf->rsp_prod); 214 intf->rsp_cons, intf->rsp_prod);
215 /* breaks kdump */ 215 intf->rsp_cons = intf->rsp_prod;
216 if (!reset_devices)
217 intf->rsp_cons = intf->rsp_prod;
218 } 216 }
219 217
220 if (xenbus_irq) { 218 if (xenbus_irq) {
@@ -224,7 +222,7 @@ int xb_init_comms(void)
224 int err; 222 int err;
225 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 223 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
226 0, "xenbus", &xb_waitq); 224 0, "xenbus", &xb_waitq);
227 if (err < 0) { 225 if (err <= 0) {
228 printk(KERN_ERR "XENBUS request irq failed %i\n", err); 226 printk(KERN_ERR "XENBUS request irq failed %i\n", err);
229 return err; 227 return err;
230 } 228 }
@@ -234,9 +232,3 @@ int xb_init_comms(void)
234 232
235 return 0; 233 return 0;
236} 234}
237
238void xb_deinit_comms(void)
239{
240 unbind_from_irqhandler(xenbus_irq, &xb_waitq);
241 xenbus_irq = 0;
242}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
index c8abd3b8a6c..c21db751373 100644
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -31,11 +31,8 @@
31#ifndef _XENBUS_COMMS_H 31#ifndef _XENBUS_COMMS_H
32#define _XENBUS_COMMS_H 32#define _XENBUS_COMMS_H
33 33
34#include <linux/fs.h>
35
36int xs_init(void); 34int xs_init(void);
37int xb_init_comms(void); 35int xb_init_comms(void);
38void xb_deinit_comms(void);
39 36
40/* Low level routines. */ 37/* Low level routines. */
41int xb_write(const void *data, unsigned len); 38int xb_write(const void *data, unsigned len);
@@ -46,6 +43,4 @@ int xs_input_avail(void);
46extern struct xenstore_domain_interface *xen_store_interface; 43extern struct xenstore_domain_interface *xen_store_interface;
47extern int xen_store_evtchn; 44extern int xen_store_evtchn;
48 45
49extern const struct file_operations xen_xenbus_fops;
50
51#endif /* _XENBUS_COMMS_H */ 46#endif /* _XENBUS_COMMS_H */
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
deleted file mode 100644
index d7300080076..00000000000
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ /dev/null
@@ -1,141 +0,0 @@
1#include <linux/slab.h>
2#include <linux/types.h>
3#include <linux/mm.h>
4#include <linux/fs.h>
5#include <linux/miscdevice.h>
6#include <linux/module.h>
7#include <linux/capability.h>
8
9#include <xen/xen.h>
10#include <xen/page.h>
11#include <xen/xenbus.h>
12#include <xen/xenbus_dev.h>
13#include <xen/grant_table.h>
14#include <xen/events.h>
15#include <asm/xen/hypervisor.h>
16
17#include "xenbus_comms.h"
18
19MODULE_LICENSE("GPL");
20
21static int xenbus_backend_open(struct inode *inode, struct file *filp)
22{
23 if (!capable(CAP_SYS_ADMIN))
24 return -EPERM;
25
26 return nonseekable_open(inode, filp);
27}
28
29static long xenbus_alloc(domid_t domid)
30{
31 struct evtchn_alloc_unbound arg;
32 int err = -EEXIST;
33
34 xs_suspend();
35
36 /* If xenstored_ready is nonzero, that means we have already talked to
37 * xenstore and set up watches. These watches will be restored by
38 * xs_resume, but that requires communication over the port established
39 * below that is not visible to anyone until the ioctl returns.
40 *
41 * This can be resolved by splitting the ioctl into two parts
42 * (postponing the resume until xenstored is active) but this is
43 * unnecessarily complex for the intended use where xenstored is only
44 * started once - so return -EEXIST if it's already running.
45 */
46 if (xenstored_ready)
47 goto out_err;
48
49 gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid,
50 virt_to_mfn(xen_store_interface), 0 /* writable */);
51
52 arg.dom = DOMID_SELF;
53 arg.remote_dom = domid;
54
55 err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &arg);
56 if (err)
57 goto out_err;
58
59 if (xen_store_evtchn > 0)
60 xb_deinit_comms();
61
62 xen_store_evtchn = arg.port;
63
64 xs_resume();
65
66 return arg.port;
67
68 out_err:
69 xs_suspend_cancel();
70 return err;
71}
72
73static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data)
74{
75 if (!capable(CAP_SYS_ADMIN))
76 return -EPERM;
77
78 switch (cmd) {
79 case IOCTL_XENBUS_BACKEND_EVTCHN:
80 if (xen_store_evtchn > 0)
81 return xen_store_evtchn;
82 return -ENODEV;
83
84 case IOCTL_XENBUS_BACKEND_SETUP:
85 return xenbus_alloc(data);
86
87 default:
88 return -ENOTTY;
89 }
90}
91
92static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma)
93{
94 size_t size = vma->vm_end - vma->vm_start;
95
96 if (!capable(CAP_SYS_ADMIN))
97 return -EPERM;
98
99 if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
100 return -EINVAL;
101
102 if (remap_pfn_range(vma, vma->vm_start,
103 virt_to_pfn(xen_store_interface),
104 size, vma->vm_page_prot))
105 return -EAGAIN;
106
107 return 0;
108}
109
110static const struct file_operations xenbus_backend_fops = {
111 .open = xenbus_backend_open,
112 .mmap = xenbus_backend_mmap,
113 .unlocked_ioctl = xenbus_backend_ioctl,
114};
115
116static struct miscdevice xenbus_backend_dev = {
117 .minor = MISC_DYNAMIC_MINOR,
118 .name = "xen/xenbus_backend",
119 .fops = &xenbus_backend_fops,
120};
121
122static int __init xenbus_backend_init(void)
123{
124 int err;
125
126 if (!xen_initial_domain())
127 return -ENODEV;
128
129 err = misc_register(&xenbus_backend_dev);
130 if (err)
131 printk(KERN_ERR "Could not register xenbus backend device\n");
132 return err;
133}
134
135static void __exit xenbus_backend_exit(void)
136{
137 misc_deregister(&xenbus_backend_dev);
138}
139
140module_init(xenbus_backend_init);
141module_exit(xenbus_backend_exit);
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
deleted file mode 100644
index ac727028e65..00000000000
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ /dev/null
@@ -1,629 +0,0 @@
1/*
2 * Driver giving user-space access to the kernel's xenbus connection
3 * to xenstore.
4 *
5 * Copyright (c) 2005, Christian Limpach
6 * Copyright (c) 2005, Rusty Russell, IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 *
32 * Changes:
33 * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem
34 * and /proc/xen compatibility mount point.
35 * Turned xenfs into a loadable module.
36 */
37
38#include <linux/kernel.h>
39#include <linux/errno.h>
40#include <linux/uio.h>
41#include <linux/notifier.h>
42#include <linux/wait.h>
43#include <linux/fs.h>
44#include <linux/poll.h>
45#include <linux/mutex.h>
46#include <linux/sched.h>
47#include <linux/spinlock.h>
48#include <linux/mount.h>
49#include <linux/pagemap.h>
50#include <linux/uaccess.h>
51#include <linux/init.h>
52#include <linux/namei.h>
53#include <linux/string.h>
54#include <linux/slab.h>
55#include <linux/miscdevice.h>
56#include <linux/module.h>
57
58#include "xenbus_comms.h"
59
60#include <xen/xenbus.h>
61#include <xen/xen.h>
62#include <asm/xen/hypervisor.h>
63
64MODULE_LICENSE("GPL");
65
66/*
67 * An element of a list of outstanding transactions, for which we're
68 * still waiting a reply.
69 */
70struct xenbus_transaction_holder {
71 struct list_head list;
72 struct xenbus_transaction handle;
73};
74
75/*
76 * A buffer of data on the queue.
77 */
78struct read_buffer {
79 struct list_head list;
80 unsigned int cons;
81 unsigned int len;
82 char msg[];
83};
84
85struct xenbus_file_priv {
86 /*
87 * msgbuffer_mutex is held while partial requests are built up
88 * and complete requests are acted on. It therefore protects
89 * the "transactions" and "watches" lists, and the partial
90 * request length and buffer.
91 *
92 * reply_mutex protects the reply being built up to return to
93 * usermode. It nests inside msgbuffer_mutex but may be held
94 * alone during a watch callback.
95 */
96 struct mutex msgbuffer_mutex;
97
98 /* In-progress transactions */
99 struct list_head transactions;
100
101 /* Active watches. */
102 struct list_head watches;
103
104 /* Partial request. */
105 unsigned int len;
106 union {
107 struct xsd_sockmsg msg;
108 char buffer[XENSTORE_PAYLOAD_MAX];
109 } u;
110
111 /* Response queue. */
112 struct mutex reply_mutex;
113 struct list_head read_buffers;
114 wait_queue_head_t read_waitq;
115
116};
117
118/* Read out any raw xenbus messages queued up. */
119static ssize_t xenbus_file_read(struct file *filp,
120 char __user *ubuf,
121 size_t len, loff_t *ppos)
122{
123 struct xenbus_file_priv *u = filp->private_data;
124 struct read_buffer *rb;
125 unsigned i;
126 int ret;
127
128 mutex_lock(&u->reply_mutex);
129again:
130 while (list_empty(&u->read_buffers)) {
131 mutex_unlock(&u->reply_mutex);
132 if (filp->f_flags & O_NONBLOCK)
133 return -EAGAIN;
134
135 ret = wait_event_interruptible(u->read_waitq,
136 !list_empty(&u->read_buffers));
137 if (ret)
138 return ret;
139 mutex_lock(&u->reply_mutex);
140 }
141
142 rb = list_entry(u->read_buffers.next, struct read_buffer, list);
143 i = 0;
144 while (i < len) {
145 unsigned sz = min((unsigned)len - i, rb->len - rb->cons);
146
147 ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
148
149 i += sz - ret;
150 rb->cons += sz - ret;
151
152 if (ret != 0) {
153 if (i == 0)
154 i = -EFAULT;
155 goto out;
156 }
157
158 /* Clear out buffer if it has been consumed */
159 if (rb->cons == rb->len) {
160 list_del(&rb->list);
161 kfree(rb);
162 if (list_empty(&u->read_buffers))
163 break;
164 rb = list_entry(u->read_buffers.next,
165 struct read_buffer, list);
166 }
167 }
168 if (i == 0)
169 goto again;
170
171out:
172 mutex_unlock(&u->reply_mutex);
173 return i;
174}
175
176/*
177 * Add a buffer to the queue. Caller must hold the appropriate lock
178 * if the queue is not local. (Commonly the caller will build up
179 * multiple queued buffers on a temporary local list, and then add it
180 * to the appropriate list under lock once all the buffers have een
181 * successfully allocated.)
182 */
183static int queue_reply(struct list_head *queue, const void *data, size_t len)
184{
185 struct read_buffer *rb;
186
187 if (len == 0)
188 return 0;
189
190 rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
191 if (rb == NULL)
192 return -ENOMEM;
193
194 rb->cons = 0;
195 rb->len = len;
196
197 memcpy(rb->msg, data, len);
198
199 list_add_tail(&rb->list, queue);
200 return 0;
201}
202
203/*
204 * Free all the read_buffer s on a list.
205 * Caller must have sole reference to list.
206 */
207static void queue_cleanup(struct list_head *list)
208{
209 struct read_buffer *rb;
210
211 while (!list_empty(list)) {
212 rb = list_entry(list->next, struct read_buffer, list);
213 list_del(list->next);
214 kfree(rb);
215 }
216}
217
218struct watch_adapter {
219 struct list_head list;
220 struct xenbus_watch watch;
221 struct xenbus_file_priv *dev_data;
222 char *token;
223};
224
225static void free_watch_adapter(struct watch_adapter *watch)
226{
227 kfree(watch->watch.node);
228 kfree(watch->token);
229 kfree(watch);
230}
231
232static struct watch_adapter *alloc_watch_adapter(const char *path,
233 const char *token)
234{
235 struct watch_adapter *watch;
236
237 watch = kzalloc(sizeof(*watch), GFP_KERNEL);
238 if (watch == NULL)
239 goto out_fail;
240
241 watch->watch.node = kstrdup(path, GFP_KERNEL);
242 if (watch->watch.node == NULL)
243 goto out_free;
244
245 watch->token = kstrdup(token, GFP_KERNEL);
246 if (watch->token == NULL)
247 goto out_free;
248
249 return watch;
250
251out_free:
252 free_watch_adapter(watch);
253
254out_fail:
255 return NULL;
256}
257
258static void watch_fired(struct xenbus_watch *watch,
259 const char **vec,
260 unsigned int len)
261{
262 struct watch_adapter *adap;
263 struct xsd_sockmsg hdr;
264 const char *path, *token;
265 int path_len, tok_len, body_len, data_len = 0;
266 int ret;
267 LIST_HEAD(staging_q);
268
269 adap = container_of(watch, struct watch_adapter, watch);
270
271 path = vec[XS_WATCH_PATH];
272 token = adap->token;
273
274 path_len = strlen(path) + 1;
275 tok_len = strlen(token) + 1;
276 if (len > 2)
277 data_len = vec[len] - vec[2] + 1;
278 body_len = path_len + tok_len + data_len;
279
280 hdr.type = XS_WATCH_EVENT;
281 hdr.len = body_len;
282
283 mutex_lock(&adap->dev_data->reply_mutex);
284
285 ret = queue_reply(&staging_q, &hdr, sizeof(hdr));
286 if (!ret)
287 ret = queue_reply(&staging_q, path, path_len);
288 if (!ret)
289 ret = queue_reply(&staging_q, token, tok_len);
290 if (!ret && len > 2)
291 ret = queue_reply(&staging_q, vec[2], data_len);
292
293 if (!ret) {
294 /* success: pass reply list onto watcher */
295 list_splice_tail(&staging_q, &adap->dev_data->read_buffers);
296 wake_up(&adap->dev_data->read_waitq);
297 } else
298 queue_cleanup(&staging_q);
299
300 mutex_unlock(&adap->dev_data->reply_mutex);
301}
302
303static int xenbus_write_transaction(unsigned msg_type,
304 struct xenbus_file_priv *u)
305{
306 int rc;
307 void *reply;
308 struct xenbus_transaction_holder *trans = NULL;
309 LIST_HEAD(staging_q);
310
311 if (msg_type == XS_TRANSACTION_START) {
312 trans = kmalloc(sizeof(*trans), GFP_KERNEL);
313 if (!trans) {
314 rc = -ENOMEM;
315 goto out;
316 }
317 }
318
319 reply = xenbus_dev_request_and_reply(&u->u.msg);
320 if (IS_ERR(reply)) {
321 kfree(trans);
322 rc = PTR_ERR(reply);
323 goto out;
324 }
325
326 if (msg_type == XS_TRANSACTION_START) {
327 trans->handle.id = simple_strtoul(reply, NULL, 0);
328
329 list_add(&trans->list, &u->transactions);
330 } else if (msg_type == XS_TRANSACTION_END) {
331 list_for_each_entry(trans, &u->transactions, list)
332 if (trans->handle.id == u->u.msg.tx_id)
333 break;
334 BUG_ON(&trans->list == &u->transactions);
335 list_del(&trans->list);
336
337 kfree(trans);
338 }
339
340 mutex_lock(&u->reply_mutex);
341 rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
342 if (!rc)
343 rc = queue_reply(&staging_q, reply, u->u.msg.len);
344 if (!rc) {
345 list_splice_tail(&staging_q, &u->read_buffers);
346 wake_up(&u->read_waitq);
347 } else {
348 queue_cleanup(&staging_q);
349 }
350 mutex_unlock(&u->reply_mutex);
351
352 kfree(reply);
353
354out:
355 return rc;
356}
357
358static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
359{
360 struct watch_adapter *watch, *tmp_watch;
361 char *path, *token;
362 int err, rc;
363 LIST_HEAD(staging_q);
364
365 path = u->u.buffer + sizeof(u->u.msg);
366 token = memchr(path, 0, u->u.msg.len);
367 if (token == NULL) {
368 rc = -EILSEQ;
369 goto out;
370 }
371 token++;
372 if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) {
373 rc = -EILSEQ;
374 goto out;
375 }
376
377 if (msg_type == XS_WATCH) {
378 watch = alloc_watch_adapter(path, token);
379 if (watch == NULL) {
380 rc = -ENOMEM;
381 goto out;
382 }
383
384 watch->watch.callback = watch_fired;
385 watch->dev_data = u;
386
387 err = register_xenbus_watch(&watch->watch);
388 if (err) {
389 free_watch_adapter(watch);
390 rc = err;
391 goto out;
392 }
393 list_add(&watch->list, &u->watches);
394 } else {
395 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
396 if (!strcmp(watch->token, token) &&
397 !strcmp(watch->watch.node, path)) {
398 unregister_xenbus_watch(&watch->watch);
399 list_del(&watch->list);
400 free_watch_adapter(watch);
401 break;
402 }
403 }
404 }
405
406 /* Success. Synthesize a reply to say all is OK. */
407 {
408 struct {
409 struct xsd_sockmsg hdr;
410 char body[3];
411 } __packed reply = {
412 {
413 .type = msg_type,
414 .len = sizeof(reply.body)
415 },
416 "OK"
417 };
418
419 mutex_lock(&u->reply_mutex);
420 rc = queue_reply(&u->read_buffers, &reply, sizeof(reply));
421 wake_up(&u->read_waitq);
422 mutex_unlock(&u->reply_mutex);
423 }
424
425out:
426 return rc;
427}
428
429static ssize_t xenbus_file_write(struct file *filp,
430 const char __user *ubuf,
431 size_t len, loff_t *ppos)
432{
433 struct xenbus_file_priv *u = filp->private_data;
434 uint32_t msg_type;
435 int rc = len;
436 int ret;
437 LIST_HEAD(staging_q);
438
439 /*
440 * We're expecting usermode to be writing properly formed
441 * xenbus messages. If they write an incomplete message we
442 * buffer it up. Once it is complete, we act on it.
443 */
444
445 /*
446 * Make sure concurrent writers can't stomp all over each
447 * other's messages and make a mess of our partial message
448 * buffer. We don't make any attemppt to stop multiple
449 * writers from making a mess of each other's incomplete
450 * messages; we're just trying to guarantee our own internal
451 * consistency and make sure that single writes are handled
452 * atomically.
453 */
454 mutex_lock(&u->msgbuffer_mutex);
455
456 /* Get this out of the way early to avoid confusion */
457 if (len == 0)
458 goto out;
459
460 /* Can't write a xenbus message larger we can buffer */
461 if (len > sizeof(u->u.buffer) - u->len) {
462 /* On error, dump existing buffer */
463 u->len = 0;
464 rc = -EINVAL;
465 goto out;
466 }
467
468 ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
469
470 if (ret != 0) {
471 rc = -EFAULT;
472 goto out;
473 }
474
475 /* Deal with a partial copy. */
476 len -= ret;
477 rc = len;
478
479 u->len += len;
480
481 /* Return if we haven't got a full message yet */
482 if (u->len < sizeof(u->u.msg))
483 goto out; /* not even the header yet */
484
485 /* If we're expecting a message that's larger than we can
486 possibly send, dump what we have and return an error. */
487 if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) {
488 rc = -E2BIG;
489 u->len = 0;
490 goto out;
491 }
492
493 if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
494 goto out; /* incomplete data portion */
495
496 /*
497 * OK, now we have a complete message. Do something with it.
498 */
499
500 msg_type = u->u.msg.type;
501
502 switch (msg_type) {
503 case XS_WATCH:
504 case XS_UNWATCH:
505 /* (Un)Ask for some path to be watched for changes */
506 ret = xenbus_write_watch(msg_type, u);
507 break;
508
509 default:
510 /* Send out a transaction */
511 ret = xenbus_write_transaction(msg_type, u);
512 break;
513 }
514 if (ret != 0)
515 rc = ret;
516
517 /* Buffered message consumed */
518 u->len = 0;
519
520 out:
521 mutex_unlock(&u->msgbuffer_mutex);
522 return rc;
523}
524
525static int xenbus_file_open(struct inode *inode, struct file *filp)
526{
527 struct xenbus_file_priv *u;
528
529 if (xen_store_evtchn == 0)
530 return -ENOENT;
531
532 nonseekable_open(inode, filp);
533
534 u = kzalloc(sizeof(*u), GFP_KERNEL);
535 if (u == NULL)
536 return -ENOMEM;
537
538 INIT_LIST_HEAD(&u->transactions);
539 INIT_LIST_HEAD(&u->watches);
540 INIT_LIST_HEAD(&u->read_buffers);
541 init_waitqueue_head(&u->read_waitq);
542
543 mutex_init(&u->reply_mutex);
544 mutex_init(&u->msgbuffer_mutex);
545
546 filp->private_data = u;
547
548 return 0;
549}
550
551static int xenbus_file_release(struct inode *inode, struct file *filp)
552{
553 struct xenbus_file_priv *u = filp->private_data;
554 struct xenbus_transaction_holder *trans, *tmp;
555 struct watch_adapter *watch, *tmp_watch;
556 struct read_buffer *rb, *tmp_rb;
557
558 /*
559 * No need for locking here because there are no other users,
560 * by definition.
561 */
562
563 list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
564 xenbus_transaction_end(trans->handle, 1);
565 list_del(&trans->list);
566 kfree(trans);
567 }
568
569 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
570 unregister_xenbus_watch(&watch->watch);
571 list_del(&watch->list);
572 free_watch_adapter(watch);
573 }
574
575 list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
576 list_del(&rb->list);
577 kfree(rb);
578 }
579 kfree(u);
580
581 return 0;
582}
583
584static unsigned int xenbus_file_poll(struct file *file, poll_table *wait)
585{
586 struct xenbus_file_priv *u = file->private_data;
587
588 poll_wait(file, &u->read_waitq, wait);
589 if (!list_empty(&u->read_buffers))
590 return POLLIN | POLLRDNORM;
591 return 0;
592}
593
594const struct file_operations xen_xenbus_fops = {
595 .read = xenbus_file_read,
596 .write = xenbus_file_write,
597 .open = xenbus_file_open,
598 .release = xenbus_file_release,
599 .poll = xenbus_file_poll,
600 .llseek = no_llseek,
601};
602EXPORT_SYMBOL_GPL(xen_xenbus_fops);
603
604static struct miscdevice xenbus_dev = {
605 .minor = MISC_DYNAMIC_MINOR,
606 .name = "xen/xenbus",
607 .fops = &xen_xenbus_fops,
608};
609
610static int __init xenbus_init(void)
611{
612 int err;
613
614 if (!xen_domain())
615 return -ENODEV;
616
617 err = misc_register(&xenbus_dev);
618 if (err)
619 printk(KERN_ERR "Could not register xenbus frontend device\n");
620 return err;
621}
622
623static void __exit xenbus_exit(void)
624{
625 misc_deregister(&xenbus_dev);
626}
627
628module_init(xenbus_init);
629module_exit(xenbus_exit);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 038b71dbf03..bd2f90c9ac8 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -46,7 +46,6 @@
46#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <linux/io.h> 47#include <linux/io.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <linux/module.h>
50 49
51#include <asm/page.h> 50#include <asm/page.h>
52#include <asm/pgtable.h> 51#include <asm/pgtable.h>
@@ -257,12 +256,11 @@ int xenbus_dev_remove(struct device *_dev)
257 DPRINTK("%s", dev->nodename); 256 DPRINTK("%s", dev->nodename);
258 257
259 free_otherend_watch(dev); 258 free_otherend_watch(dev);
259 free_otherend_details(dev);
260 260
261 if (drv->remove) 261 if (drv->remove)
262 drv->remove(dev); 262 drv->remove(dev);
263 263
264 free_otherend_details(dev);
265
266 xenbus_switch_state(dev, XenbusStateClosed); 264 xenbus_switch_state(dev, XenbusStateClosed);
267 return 0; 265 return 0;
268} 266}
@@ -292,9 +290,14 @@ void xenbus_dev_shutdown(struct device *_dev)
292EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); 290EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
293 291
294int xenbus_register_driver_common(struct xenbus_driver *drv, 292int xenbus_register_driver_common(struct xenbus_driver *drv,
295 struct xen_bus_type *bus) 293 struct xen_bus_type *bus,
294 struct module *owner,
295 const char *mod_name)
296{ 296{
297 drv->driver.name = drv->name;
297 drv->driver.bus = &bus->bus; 298 drv->driver.bus = &bus->bus;
299 drv->driver.owner = owner;
300 drv->driver.mod_name = mod_name;
298 301
299 return driver_register(&drv->driver); 302 return driver_register(&drv->driver);
300} 303}
@@ -306,7 +309,8 @@ void xenbus_unregister_driver(struct xenbus_driver *drv)
306} 309}
307EXPORT_SYMBOL_GPL(xenbus_unregister_driver); 310EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
308 311
309struct xb_find_info { 312struct xb_find_info
313{
310 struct xenbus_device *dev; 314 struct xenbus_device *dev;
311 const char *nodename; 315 const char *nodename;
312}; 316};
@@ -324,8 +328,8 @@ static int cmp_dev(struct device *dev, void *data)
324 return 0; 328 return 0;
325} 329}
326 330
327static struct xenbus_device *xenbus_device_find(const char *nodename, 331struct xenbus_device *xenbus_device_find(const char *nodename,
328 struct bus_type *bus) 332 struct bus_type *bus)
329{ 333{
330 struct xb_find_info info = { .dev = NULL, .nodename = nodename }; 334 struct xb_find_info info = { .dev = NULL, .nodename = nodename };
331 335
@@ -635,7 +639,7 @@ int xenbus_dev_cancel(struct device *dev)
635EXPORT_SYMBOL_GPL(xenbus_dev_cancel); 639EXPORT_SYMBOL_GPL(xenbus_dev_cancel);
636 640
637/* A flag to determine if xenstored is 'ready' (i.e. has started) */ 641/* A flag to determine if xenstored is 'ready' (i.e. has started) */
638int xenstored_ready; 642int xenstored_ready = 0;
639 643
640 644
641int register_xenstore_notifier(struct notifier_block *nb) 645int register_xenstore_notifier(struct notifier_block *nb)
@@ -680,100 +684,64 @@ static int __init xenbus_probe_initcall(void)
680 684
681device_initcall(xenbus_probe_initcall); 685device_initcall(xenbus_probe_initcall);
682 686
683/* Set up event channel for xenstored which is run as a local process 687static int __init xenbus_init(void)
684 * (this is normally used only in dom0)
685 */
686static int __init xenstored_local_init(void)
687{ 688{
688 int err = 0; 689 int err = 0;
689 unsigned long page = 0; 690 unsigned long page = 0;
690 struct evtchn_alloc_unbound alloc_unbound;
691
692 /* Allocate Xenstore page */
693 page = get_zeroed_page(GFP_KERNEL);
694 if (!page)
695 goto out_err;
696 691
697 xen_store_mfn = xen_start_info->store_mfn = 692 DPRINTK("");
698 pfn_to_mfn(virt_to_phys((void *)page) >>
699 PAGE_SHIFT);
700 693
701 /* Next allocate a local port which xenstored can bind to */ 694 err = -ENODEV;
702 alloc_unbound.dom = DOMID_SELF; 695 if (!xen_domain())
703 alloc_unbound.remote_dom = DOMID_SELF; 696 return err;
704 697
705 err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 698 /*
706 &alloc_unbound); 699 * Domain0 doesn't have a store_evtchn or store_mfn yet.
707 if (err == -ENOSYS) 700 */
708 goto out_err; 701 if (xen_initial_domain()) {
702 struct evtchn_alloc_unbound alloc_unbound;
709 703
710 BUG_ON(err); 704 /* Allocate Xenstore page */
711 xen_store_evtchn = xen_start_info->store_evtchn = 705 page = get_zeroed_page(GFP_KERNEL);
712 alloc_unbound.port; 706 if (!page)
707 goto out_error;
713 708
714 return 0; 709 xen_store_mfn = xen_start_info->store_mfn =
710 pfn_to_mfn(virt_to_phys((void *)page) >>
711 PAGE_SHIFT);
715 712
716 out_err: 713 /* Next allocate a local port which xenstored can bind to */
717 if (page != 0) 714 alloc_unbound.dom = DOMID_SELF;
718 free_page(page); 715 alloc_unbound.remote_dom = 0;
719 return err;
720}
721 716
722enum xenstore_init { 717 err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
723 UNKNOWN, 718 &alloc_unbound);
724 PV, 719 if (err == -ENOSYS)
725 HVM, 720 goto out_error;
726 LOCAL,
727};
728static int __init xenbus_init(void)
729{
730 int err = 0;
731 enum xenstore_init usage = UNKNOWN;
732 uint64_t v = 0;
733 721
734 if (!xen_domain()) 722 BUG_ON(err);
735 return -ENODEV; 723 xen_store_evtchn = xen_start_info->store_evtchn =
724 alloc_unbound.port;
736 725
737 xenbus_ring_ops_init();
738
739 if (xen_pv_domain())
740 usage = PV;
741 if (xen_hvm_domain())
742 usage = HVM;
743 if (xen_hvm_domain() && xen_initial_domain())
744 usage = LOCAL;
745 if (xen_pv_domain() && !xen_start_info->store_evtchn)
746 usage = LOCAL;
747 if (xen_pv_domain() && xen_start_info->store_evtchn)
748 xenstored_ready = 1;
749
750 switch (usage) {
751 case LOCAL:
752 err = xenstored_local_init();
753 if (err)
754 goto out_error;
755 xen_store_interface = mfn_to_virt(xen_store_mfn); 726 xen_store_interface = mfn_to_virt(xen_store_mfn);
756 break; 727 } else {
757 case PV: 728 if (xen_hvm_domain()) {
758 xen_store_evtchn = xen_start_info->store_evtchn; 729 uint64_t v = 0;
759 xen_store_mfn = xen_start_info->store_mfn; 730 err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
760 xen_store_interface = mfn_to_virt(xen_store_mfn); 731 if (err)
761 break; 732 goto out_error;
762 case HVM: 733 xen_store_evtchn = (int)v;
763 err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); 734 err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
764 if (err) 735 if (err)
765 goto out_error; 736 goto out_error;
766 xen_store_evtchn = (int)v; 737 xen_store_mfn = (unsigned long)v;
767 err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); 738 xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
768 if (err) 739 } else {
769 goto out_error; 740 xen_store_evtchn = xen_start_info->store_evtchn;
770 xen_store_mfn = (unsigned long)v; 741 xen_store_mfn = xen_start_info->store_mfn;
771 xen_store_interface = 742 xen_store_interface = mfn_to_virt(xen_store_mfn);
772 ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); 743 xenstored_ready = 1;
773 break; 744 }
774 default:
775 pr_warn("Xenstore state unknown\n");
776 break;
777 } 745 }
778 746
779 /* Initialize the interface to xenstore. */ 747 /* Initialize the interface to xenstore. */
@@ -792,7 +760,12 @@ static int __init xenbus_init(void)
792 proc_mkdir("xen", NULL); 760 proc_mkdir("xen", NULL);
793#endif 761#endif
794 762
795out_error: 763 return 0;
764
765 out_error:
766 if (page != 0)
767 free_page(page);
768
796 return err; 769 return err;
797} 770}
798 771
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index bb4f92ed873..b814935378c 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -36,7 +36,8 @@
36 36
37#define XEN_BUS_ID_SIZE 20 37#define XEN_BUS_ID_SIZE 20
38 38
39struct xen_bus_type { 39struct xen_bus_type
40{
40 char *root; 41 char *root;
41 unsigned int levels; 42 unsigned int levels;
42 int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); 43 int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
@@ -53,7 +54,9 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
53extern int xenbus_dev_probe(struct device *_dev); 54extern int xenbus_dev_probe(struct device *_dev);
54extern int xenbus_dev_remove(struct device *_dev); 55extern int xenbus_dev_remove(struct device *_dev);
55extern int xenbus_register_driver_common(struct xenbus_driver *drv, 56extern int xenbus_register_driver_common(struct xenbus_driver *drv,
56 struct xen_bus_type *bus); 57 struct xen_bus_type *bus,
58 struct module *owner,
59 const char *mod_name);
57extern int xenbus_probe_node(struct xen_bus_type *bus, 60extern int xenbus_probe_node(struct xen_bus_type *bus,
58 const char *type, 61 const char *type,
59 const char *nodename); 62 const char *nodename);
@@ -74,6 +77,4 @@ extern void xenbus_otherend_changed(struct xenbus_watch *watch,
74extern int xenbus_read_otherend_details(struct xenbus_device *xendev, 77extern int xenbus_read_otherend_details(struct xenbus_device *xendev,
75 char *id_node, char *path_node); 78 char *id_node, char *path_node);
76 79
77void xenbus_ring_ops_init(void);
78
79#endif 80#endif
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 257be37d909..60adf919d78 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -42,7 +42,6 @@
42#include <linux/fcntl.h> 42#include <linux/fcntl.h>
43#include <linux/mm.h> 43#include <linux/mm.h>
44#include <linux/notifier.h> 44#include <linux/notifier.h>
45#include <linux/export.h>
46 45
47#include <asm/page.h> 46#include <asm/page.h>
48#include <asm/pgtable.h> 47#include <asm/pgtable.h>
@@ -105,6 +104,8 @@ static int xenbus_uevent_backend(struct device *dev,
105 104
106 xdev = to_xenbus_device(dev); 105 xdev = to_xenbus_device(dev);
107 bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); 106 bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
107 if (xdev == NULL)
108 return -ENODEV;
108 109
109 if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) 110 if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
110 return -ENOMEM; 111 return -ENOMEM;
@@ -232,13 +233,15 @@ int xenbus_dev_is_online(struct xenbus_device *dev)
232} 233}
233EXPORT_SYMBOL_GPL(xenbus_dev_is_online); 234EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
234 235
235int xenbus_register_backend(struct xenbus_driver *drv) 236int __xenbus_register_backend(struct xenbus_driver *drv,
237 struct module *owner, const char *mod_name)
236{ 238{
237 drv->read_otherend_details = read_frontend_details; 239 drv->read_otherend_details = read_frontend_details;
238 240
239 return xenbus_register_driver_common(drv, &xenbus_backend); 241 return xenbus_register_driver_common(drv, &xenbus_backend,
242 owner, mod_name);
240} 243}
241EXPORT_SYMBOL_GPL(xenbus_register_backend); 244EXPORT_SYMBOL_GPL(__xenbus_register_backend);
242 245
243static int backend_probe_and_watch(struct notifier_block *notifier, 246static int backend_probe_and_watch(struct notifier_block *notifier,
244 unsigned long event, 247 unsigned long event,
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 3159a37d966..ed2ba474a56 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -13,7 +13,6 @@
13#include <linux/kthread.h> 13#include <linux/kthread.h>
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/io.h> 15#include <linux/io.h>
16#include <linux/module.h>
17 16
18#include <asm/page.h> 17#include <asm/page.h>
19#include <asm/pgtable.h> 18#include <asm/pgtable.h>
@@ -21,7 +20,6 @@
21#include <xen/xenbus.h> 20#include <xen/xenbus.h>
22#include <xen/events.h> 21#include <xen/events.h>
23#include <xen/page.h> 22#include <xen/page.h>
24#include <xen/xen.h>
25 23
26#include <xen/platform_pci.h> 24#include <xen/platform_pci.h>
27 25
@@ -54,12 +52,6 @@ static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type,
54 char *nodename; 52 char *nodename;
55 int err; 53 int err;
56 54
57 /* ignore console/0 */
58 if (!strncmp(type, "console", 7) && !strncmp(name, "0", 1)) {
59 DPRINTK("Ignoring buggy device entry console/0");
60 return 0;
61 }
62
63 nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); 55 nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name);
64 if (!nodename) 56 if (!nodename)
65 return -ENOMEM; 57 return -ENOMEM;
@@ -136,7 +128,7 @@ static int read_backend_details(struct xenbus_device *xendev)
136 return xenbus_read_otherend_details(xendev, "backend-id", "backend"); 128 return xenbus_read_otherend_details(xendev, "backend-id", "backend");
137} 129}
138 130
139static int is_device_connecting(struct device *dev, void *data, bool ignore_nonessential) 131static int is_device_connecting(struct device *dev, void *data)
140{ 132{
141 struct xenbus_device *xendev = to_xenbus_device(dev); 133 struct xenbus_device *xendev = to_xenbus_device(dev);
142 struct device_driver *drv = data; 134 struct device_driver *drv = data;
@@ -153,41 +145,16 @@ static int is_device_connecting(struct device *dev, void *data, bool ignore_none
153 if (drv && (dev->driver != drv)) 145 if (drv && (dev->driver != drv))
154 return 0; 146 return 0;
155 147
156 if (ignore_nonessential) {
157 /* With older QEMU, for PVonHVM guests the guest config files
158 * could contain: vfb = [ 'vnc=1, vnclisten=0.0.0.0']
159 * which is nonsensical as there is no PV FB (there can be
160 * a PVKB) running as HVM guest. */
161
162 if ((strncmp(xendev->nodename, "device/vkbd", 11) == 0))
163 return 0;
164
165 if ((strncmp(xendev->nodename, "device/vfb", 10) == 0))
166 return 0;
167 }
168 xendrv = to_xenbus_driver(dev->driver); 148 xendrv = to_xenbus_driver(dev->driver);
169 return (xendev->state < XenbusStateConnected || 149 return (xendev->state < XenbusStateConnected ||
170 (xendev->state == XenbusStateConnected && 150 (xendev->state == XenbusStateConnected &&
171 xendrv->is_ready && !xendrv->is_ready(xendev))); 151 xendrv->is_ready && !xendrv->is_ready(xendev)));
172} 152}
173static int essential_device_connecting(struct device *dev, void *data)
174{
175 return is_device_connecting(dev, data, true /* ignore PV[KBB+FB] */);
176}
177static int non_essential_device_connecting(struct device *dev, void *data)
178{
179 return is_device_connecting(dev, data, false);
180}
181 153
182static int exists_essential_connecting_device(struct device_driver *drv) 154static int exists_connecting_device(struct device_driver *drv)
183{ 155{
184 return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, 156 return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
185 essential_device_connecting); 157 is_device_connecting);
186}
187static int exists_non_essential_connecting_device(struct device_driver *drv)
188{
189 return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
190 non_essential_device_connecting);
191} 158}
192 159
193static int print_device_status(struct device *dev, void *data) 160static int print_device_status(struct device *dev, void *data)
@@ -218,23 +185,6 @@ static int print_device_status(struct device *dev, void *data)
218/* We only wait for device setup after most initcalls have run. */ 185/* We only wait for device setup after most initcalls have run. */
219static int ready_to_wait_for_devices; 186static int ready_to_wait_for_devices;
220 187
221static bool wait_loop(unsigned long start, unsigned int max_delay,
222 unsigned int *seconds_waited)
223{
224 if (time_after(jiffies, start + (*seconds_waited+5)*HZ)) {
225 if (!*seconds_waited)
226 printk(KERN_WARNING "XENBUS: Waiting for "
227 "devices to initialise: ");
228 *seconds_waited += 5;
229 printk("%us...", max_delay - *seconds_waited);
230 if (*seconds_waited == max_delay)
231 return true;
232 }
233
234 schedule_timeout_interruptible(HZ/10);
235
236 return false;
237}
238/* 188/*
239 * On a 5-minute timeout, wait for all devices currently configured. We need 189 * On a 5-minute timeout, wait for all devices currently configured. We need
240 * to do this to guarantee that the filesystems and / or network devices 190 * to do this to guarantee that the filesystems and / or network devices
@@ -258,14 +208,19 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
258 if (!ready_to_wait_for_devices || !xen_domain()) 208 if (!ready_to_wait_for_devices || !xen_domain())
259 return; 209 return;
260 210
261 while (exists_non_essential_connecting_device(drv)) 211 while (exists_connecting_device(drv)) {
262 if (wait_loop(start, 30, &seconds_waited)) 212 if (time_after(jiffies, start + (seconds_waited+5)*HZ)) {
263 break; 213 if (!seconds_waited)
264 214 printk(KERN_WARNING "XENBUS: Waiting for "
265 /* Skips PVKB and PVFB check.*/ 215 "devices to initialise: ");
266 while (exists_essential_connecting_device(drv)) 216 seconds_waited += 5;
267 if (wait_loop(start, 270, &seconds_waited)) 217 printk("%us...", 300 - seconds_waited);
268 break; 218 if (seconds_waited == 300)
219 break;
220 }
221
222 schedule_timeout_interruptible(HZ/10);
223 }
269 224
270 if (seconds_waited) 225 if (seconds_waited)
271 printk("\n"); 226 printk("\n");
@@ -274,13 +229,15 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
274 print_device_status); 229 print_device_status);
275} 230}
276 231
277int xenbus_register_frontend(struct xenbus_driver *drv) 232int __xenbus_register_frontend(struct xenbus_driver *drv,
233 struct module *owner, const char *mod_name)
278{ 234{
279 int ret; 235 int ret;
280 236
281 drv->read_otherend_details = read_backend_details; 237 drv->read_otherend_details = read_backend_details;
282 238
283 ret = xenbus_register_driver_common(drv, &xenbus_frontend); 239 ret = xenbus_register_driver_common(drv, &xenbus_frontend,
240 owner, mod_name);
284 if (ret) 241 if (ret)
285 return ret; 242 return ret;
286 243
@@ -289,133 +246,12 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
289 246
290 return 0; 247 return 0;
291} 248}
292EXPORT_SYMBOL_GPL(xenbus_register_frontend); 249EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
293
294static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
295static int backend_state;
296
297static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
298 const char **v, unsigned int l)
299{
300 xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
301 printk(KERN_DEBUG "XENBUS: backend %s %s\n",
302 v[XS_WATCH_PATH], xenbus_strstate(backend_state));
303 wake_up(&backend_state_wq);
304}
305
306static void xenbus_reset_wait_for_backend(char *be, int expected)
307{
308 long timeout;
309 timeout = wait_event_interruptible_timeout(backend_state_wq,
310 backend_state == expected, 5 * HZ);
311 if (timeout <= 0)
312 printk(KERN_INFO "XENBUS: backend %s timed out.\n", be);
313}
314
315/*
316 * Reset frontend if it is in Connected or Closed state.
317 * Wait for backend to catch up.
318 * State Connected happens during kdump, Closed after kexec.
319 */
320static void xenbus_reset_frontend(char *fe, char *be, int be_state)
321{
322 struct xenbus_watch be_watch;
323
324 printk(KERN_DEBUG "XENBUS: backend %s %s\n",
325 be, xenbus_strstate(be_state));
326
327 memset(&be_watch, 0, sizeof(be_watch));
328 be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
329 if (!be_watch.node)
330 return;
331
332 be_watch.callback = xenbus_reset_backend_state_changed;
333 backend_state = XenbusStateUnknown;
334
335 printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
336 register_xenbus_watch(&be_watch);
337
338 /* fall through to forward backend to state XenbusStateInitialising */
339 switch (be_state) {
340 case XenbusStateConnected:
341 xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
342 xenbus_reset_wait_for_backend(be, XenbusStateClosing);
343
344 case XenbusStateClosing:
345 xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
346 xenbus_reset_wait_for_backend(be, XenbusStateClosed);
347
348 case XenbusStateClosed:
349 xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
350 xenbus_reset_wait_for_backend(be, XenbusStateInitWait);
351 }
352
353 unregister_xenbus_watch(&be_watch);
354 printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
355 kfree(be_watch.node);
356}
357
358static void xenbus_check_frontend(char *class, char *dev)
359{
360 int be_state, fe_state, err;
361 char *backend, *frontend;
362
363 frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
364 if (!frontend)
365 return;
366
367 err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
368 if (err != 1)
369 goto out;
370
371 switch (fe_state) {
372 case XenbusStateConnected:
373 case XenbusStateClosed:
374 printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
375 frontend, xenbus_strstate(fe_state));
376 backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
377 if (!backend || IS_ERR(backend))
378 goto out;
379 err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
380 if (err == 1)
381 xenbus_reset_frontend(frontend, backend, be_state);
382 kfree(backend);
383 break;
384 default:
385 break;
386 }
387out:
388 kfree(frontend);
389}
390
391static void xenbus_reset_state(void)
392{
393 char **devclass, **dev;
394 int devclass_n, dev_n;
395 int i, j;
396
397 devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
398 if (IS_ERR(devclass))
399 return;
400
401 for (i = 0; i < devclass_n; i++) {
402 dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
403 if (IS_ERR(dev))
404 continue;
405 for (j = 0; j < dev_n; j++)
406 xenbus_check_frontend(devclass[i], dev[j]);
407 kfree(dev);
408 }
409 kfree(devclass);
410}
411 250
412static int frontend_probe_and_watch(struct notifier_block *notifier, 251static int frontend_probe_and_watch(struct notifier_block *notifier,
413 unsigned long event, 252 unsigned long event,
414 void *data) 253 void *data)
415{ 254{
416 /* reset devices in Connected or Closed state */
417 if (xen_hvm_domain())
418 xenbus_reset_state();
419 /* Enumerate devices in xenstore and watch for changes. */ 255 /* Enumerate devices in xenstore and watch for changes. */
420 xenbus_probe_devices(&xenbus_frontend); 256 xenbus_probe_devices(&xenbus_frontend);
421 register_xenbus_watch(&fe_watch); 257 register_xenbus_watch(&fe_watch);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 88e677b0de7..daee5db4bef 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -44,9 +44,7 @@
44#include <linux/rwsem.h> 44#include <linux/rwsem.h>
45#include <linux/module.h> 45#include <linux/module.h>
46#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <asm/xen/hypervisor.h>
48#include <xen/xenbus.h> 47#include <xen/xenbus.h>
49#include <xen/xen.h>
50#include "xenbus_comms.h" 48#include "xenbus_comms.h"
51 49
52struct xs_stored_msg { 50struct xs_stored_msg {
@@ -533,18 +531,21 @@ int xenbus_printf(struct xenbus_transaction t,
533{ 531{
534 va_list ap; 532 va_list ap;
535 int ret; 533 int ret;
536 char *buf; 534#define PRINTF_BUFFER_SIZE 4096
535 char *printf_buffer;
536
537 printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH);
538 if (printf_buffer == NULL)
539 return -ENOMEM;
537 540
538 va_start(ap, fmt); 541 va_start(ap, fmt);
539 buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap); 542 ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
540 va_end(ap); 543 va_end(ap);
541 544
542 if (!buf) 545 BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
543 return -ENOMEM; 546 ret = xenbus_write(t, dir, node, printf_buffer);
544 547
545 ret = xenbus_write(t, dir, node, buf); 548 kfree(printf_buffer);
546
547 kfree(buf);
548 549
549 return ret; 550 return ret;
550} 551}
@@ -618,45 +619,6 @@ static struct xenbus_watch *find_watch(const char *token)
618 619
619 return NULL; 620 return NULL;
620} 621}
621/*
622 * Certain older XenBus toolstack cannot handle reading values that are
623 * not populated. Some Xen 3.4 installation are incapable of doing this
624 * so if we are running on anything older than 4 do not attempt to read
625 * control/platform-feature-xs_reset_watches.
626 */
627static bool xen_strict_xenbus_quirk(void)
628{
629#ifdef CONFIG_X86
630 uint32_t eax, ebx, ecx, edx, base;
631
632 base = xen_cpuid_base();
633 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
634
635 if ((eax >> 16) < 4)
636 return true;
637#endif
638 return false;
639
640}
641static void xs_reset_watches(void)
642{
643 int err, supported = 0;
644
645 if (!xen_hvm_domain() || xen_initial_domain())
646 return;
647
648 if (xen_strict_xenbus_quirk())
649 return;
650
651 err = xenbus_scanf(XBT_NIL, "control",
652 "platform-feature-xs_reset_watches", "%d", &supported);
653 if (err != 1 || !supported)
654 return;
655
656 err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
657 if (err && err != -EEXIST)
658 printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
659}
660 622
661/* Register callback to watch this node. */ 623/* Register callback to watch this node. */
662int register_xenbus_watch(struct xenbus_watch *watch) 624int register_xenbus_watch(struct xenbus_watch *watch)
@@ -676,7 +638,8 @@ int register_xenbus_watch(struct xenbus_watch *watch)
676 638
677 err = xs_watch(watch->node, token); 639 err = xs_watch(watch->node, token);
678 640
679 if (err) { 641 /* Ignore errors due to multiple registration. */
642 if ((err != 0) && (err != -EEXIST)) {
680 spin_lock(&watches_lock); 643 spin_lock(&watches_lock);
681 list_del(&watch->list); 644 list_del(&watch->list);
682 spin_unlock(&watches_lock); 645 spin_unlock(&watches_lock);
@@ -940,8 +903,5 @@ int xs_init(void)
940 if (IS_ERR(task)) 903 if (IS_ERR(task))
941 return PTR_ERR(task); 904 return PTR_ERR(task);
942 905
943 /* shutdown watches for kexec boot */
944 xs_reset_watches();
945
946 return 0; 906 return 0;
947} 907}
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index b019865fcc5..4fde9440fe1 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_XENFS) += xenfs.o 1obj-$(CONFIG_XENFS) += xenfs.o
2 2
3xenfs-y = super.o 3xenfs-y = super.o xenbus.o privcmd.o
4xenfs-$(CONFIG_XEN_DOM0) += xenstored.o 4xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 459b9ac45cf..1aa38971984 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -16,8 +16,6 @@
16#include <xen/xen.h> 16#include <xen/xen.h>
17 17
18#include "xenfs.h" 18#include "xenfs.h"
19#include "../privcmd.h"
20#include "../xenbus/xenbus_comms.h"
21 19
22#include <asm/xen/hypervisor.h> 20#include <asm/xen/hypervisor.h>
23 21
@@ -30,8 +28,7 @@ static struct inode *xenfs_make_inode(struct super_block *sb, int mode)
30 28
31 if (ret) { 29 if (ret) {
32 ret->i_mode = mode; 30 ret->i_mode = mode;
33 ret->i_uid = GLOBAL_ROOT_UID; 31 ret->i_uid = ret->i_gid = 0;
34 ret->i_gid = GLOBAL_ROOT_GID;
35 ret->i_blocks = 0; 32 ret->i_blocks = 0;
36 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; 33 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
37 } 34 }
@@ -85,9 +82,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
85{ 82{
86 static struct tree_descr xenfs_files[] = { 83 static struct tree_descr xenfs_files[] = {
87 [1] = {}, 84 [1] = {},
88 { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR }, 85 { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
89 { "capabilities", &capabilities_file_ops, S_IRUGO }, 86 { "capabilities", &capabilities_file_ops, S_IRUGO },
90 { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, 87 { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
91 {""}, 88 {""},
92 }; 89 };
93 int rc; 90 int rc;
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index 6b80c7779c0..b68aa620000 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -1,6 +1,8 @@
1#ifndef _XENFS_XENBUS_H 1#ifndef _XENFS_XENBUS_H
2#define _XENFS_XENBUS_H 2#define _XENFS_XENBUS_H
3 3
4extern const struct file_operations xenbus_file_ops;
5extern const struct file_operations privcmd_file_ops;
4extern const struct file_operations xsd_kva_file_ops; 6extern const struct file_operations xsd_kva_file_ops;
5extern const struct file_operations xsd_port_file_ops; 7extern const struct file_operations xsd_port_file_ops;
6 8