aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 19:02:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 19:02:08 -0400
commitb5f4035adfffbcc6b478de5b8c44b618b3124aff (patch)
treee7a5f011d8aaf5c95edf933f98f25dfc8fa46837 /drivers
parentce004178be1bbaa292e9e6497939e2970300095a (diff)
parent68c2c39a76b094e9b2773e5846424ea674bf2c46 (diff)
Merge tag 'stable/for-linus-3.5-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
Pull Xen updates from Konrad Rzeszutek Wilk: "Features: * Extend the APIC ops implementation and add IRQ_WORKER vector support so that 'perf' can work properly. * Fix self-ballooning code, and balloon logic when booting as initial domain. * Move array printing code to generic debugfs * Support XenBus domains. * Lazily free grants when a domain is dead/non-existent. * In M2P code use batching calls Bug-fixes: * Fix NULL dereference in allocation failure path (hvc_xen) * Fix unbinding of IRQ_WORKER vector during vCPU hot-unplug * Fix HVM guest resume - we would leak an PIRQ value instead of reusing the existing one." Fix up add-add onflicts in arch/x86/xen/enlighten.c due to addition of apic ipi interface next to the new apic_id functions. * tag 'stable/for-linus-3.5-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen: do not map the same GSI twice in PVHVM guests. hvc_xen: NULL dereference on allocation failure xen: Add selfballoning memory reservation tunable. xenbus: Add support for xenbus backend in stub domain xen/smp: unbind irqworkX when unplugging vCPUs. xen: enter/exit lazy_mmu_mode around m2p_override calls xen/acpi/sleep: Enable ACPI sleep via the __acpi_os_prepare_sleep xen: implement IRQ_WORK_VECTOR handler xen: implement apic ipi interface xen/setup: update VA mapping when releasing memory during setup xen/setup: Combine the two hypercall functions - since they are quite similar. xen/setup: Populate freed MFNs from non-RAM E820 entries and gaps to E820 RAM xen/setup: Only print "Freeing XXX-YYY pfn range: Z pages freed" if Z > 0 xen/gnttab: add deferred freeing logic debugfs: Add support to print u32 array in debugfs xen/p2m: An early bootup variant of set_phys_to_machine xen/p2m: Collapse early_alloc_p2m_middle redundant checks. xen/p2m: Allow alloc_p2m_middle to call reserve_brk depending on argument xen/p2m: Move code around to allow for better re-usage.
Diffstat (limited to 'drivers')
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/acpi.c62
-rw-r--r--drivers/xen/events.c5
-rw-r--r--drivers/xen/grant-table.c125
-rw-r--r--drivers/xen/xen-selfballoon.c34
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c6
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h1
-rw-r--r--drivers/xen/xenbus/xenbus_dev_backend.c51
8 files changed, 272 insertions, 14 deletions
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 9adc5be57b13..fc3488631136 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
17obj-$(CONFIG_XEN_PVHVM) += platform-pci.o 17obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
18obj-$(CONFIG_XEN_TMEM) += tmem.o 18obj-$(CONFIG_XEN_TMEM) += tmem.o
19obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o 19obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
20obj-$(CONFIG_XEN_DOM0) += pci.o 20obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
21obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ 21obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
22obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o 22obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
23obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o 23obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
new file mode 100644
index 000000000000..119d42a2bf57
--- /dev/null
+++ b/drivers/xen/acpi.c
@@ -0,0 +1,62 @@
1/******************************************************************************
2 * acpi.c
3 * acpi file for domain 0 kernel
4 *
5 * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
6 * Copyright (c) 2011 Yu Ke ke.yu@intel.com
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#include <xen/acpi.h>
34#include <xen/interface/platform.h>
35#include <asm/xen/hypercall.h>
36#include <asm/xen/hypervisor.h>
37
38int xen_acpi_notify_hypervisor_state(u8 sleep_state,
39 u32 pm1a_cnt, u32 pm1b_cnt)
40{
41 struct xen_platform_op op = {
42 .cmd = XENPF_enter_acpi_sleep,
43 .interface_version = XENPF_INTERFACE_VERSION,
44 .u = {
45 .enter_acpi_sleep = {
46 .pm1a_cnt_val = (u16)pm1a_cnt,
47 .pm1b_cnt_val = (u16)pm1b_cnt,
48 .sleep_state = sleep_state,
49 },
50 },
51 };
52
53 if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) {
54 WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!"
55 "Email xen-devel@lists.xensource.com Thank you.\n", \
56 pm1a_cnt, pm1b_cnt);
57 return -1;
58 }
59
60 HYPERVISOR_dom0_op(&op);
61 return 1;
62}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0a8a17cd80be..6908e4ce2a0d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -611,7 +611,7 @@ static void disable_pirq(struct irq_data *data)
611 disable_dynirq(data); 611 disable_dynirq(data);
612} 612}
613 613
614static int find_irq_by_gsi(unsigned gsi) 614int xen_irq_from_gsi(unsigned gsi)
615{ 615{
616 struct irq_info *info; 616 struct irq_info *info;
617 617
@@ -625,6 +625,7 @@ static int find_irq_by_gsi(unsigned gsi)
625 625
626 return -1; 626 return -1;
627} 627}
628EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
628 629
629/* 630/*
630 * Do not make any assumptions regarding the relationship between the 631 * Do not make any assumptions regarding the relationship between the
@@ -644,7 +645,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
644 645
645 mutex_lock(&irq_mapping_update_lock); 646 mutex_lock(&irq_mapping_update_lock);
646 647
647 irq = find_irq_by_gsi(gsi); 648 irq = xen_irq_from_gsi(gsi);
648 if (irq != -1) { 649 if (irq != -1) {
649 printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", 650 printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
650 irq, gsi); 651 irq, gsi);
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index f100ce20b16b..0bfc1ef11259 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -38,6 +38,7 @@
38#include <linux/vmalloc.h> 38#include <linux/vmalloc.h>
39#include <linux/uaccess.h> 39#include <linux/uaccess.h>
40#include <linux/io.h> 40#include <linux/io.h>
41#include <linux/hardirq.h>
41 42
42#include <xen/xen.h> 43#include <xen/xen.h>
43#include <xen/interface/xen.h> 44#include <xen/interface/xen.h>
@@ -426,10 +427,8 @@ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
426 nflags = *pflags; 427 nflags = *pflags;
427 do { 428 do {
428 flags = nflags; 429 flags = nflags;
429 if (flags & (GTF_reading|GTF_writing)) { 430 if (flags & (GTF_reading|GTF_writing))
430 printk(KERN_ALERT "WARNING: g.e. still in use!\n");
431 return 0; 431 return 0;
432 }
433 } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); 432 } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
434 433
435 return 1; 434 return 1;
@@ -458,12 +457,103 @@ static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
458 return 1; 457 return 1;
459} 458}
460 459
461int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) 460static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
462{ 461{
463 return gnttab_interface->end_foreign_access_ref(ref, readonly); 462 return gnttab_interface->end_foreign_access_ref(ref, readonly);
464} 463}
464
465int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
466{
467 if (_gnttab_end_foreign_access_ref(ref, readonly))
468 return 1;
469 pr_warn("WARNING: g.e. %#x still in use!\n", ref);
470 return 0;
471}
465EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); 472EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
466 473
474struct deferred_entry {
475 struct list_head list;
476 grant_ref_t ref;
477 bool ro;
478 uint16_t warn_delay;
479 struct page *page;
480};
481static LIST_HEAD(deferred_list);
482static void gnttab_handle_deferred(unsigned long);
483static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
484
485static void gnttab_handle_deferred(unsigned long unused)
486{
487 unsigned int nr = 10;
488 struct deferred_entry *first = NULL;
489 unsigned long flags;
490
491 spin_lock_irqsave(&gnttab_list_lock, flags);
492 while (nr--) {
493 struct deferred_entry *entry
494 = list_first_entry(&deferred_list,
495 struct deferred_entry, list);
496
497 if (entry == first)
498 break;
499 list_del(&entry->list);
500 spin_unlock_irqrestore(&gnttab_list_lock, flags);
501 if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
502 put_free_entry(entry->ref);
503 if (entry->page) {
504 pr_debug("freeing g.e. %#x (pfn %#lx)\n",
505 entry->ref, page_to_pfn(entry->page));
506 __free_page(entry->page);
507 } else
508 pr_info("freeing g.e. %#x\n", entry->ref);
509 kfree(entry);
510 entry = NULL;
511 } else {
512 if (!--entry->warn_delay)
513 pr_info("g.e. %#x still pending\n",
514 entry->ref);
515 if (!first)
516 first = entry;
517 }
518 spin_lock_irqsave(&gnttab_list_lock, flags);
519 if (entry)
520 list_add_tail(&entry->list, &deferred_list);
521 else if (list_empty(&deferred_list))
522 break;
523 }
524 if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
525 deferred_timer.expires = jiffies + HZ;
526 add_timer(&deferred_timer);
527 }
528 spin_unlock_irqrestore(&gnttab_list_lock, flags);
529}
530
531static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
532 struct page *page)
533{
534 struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
535 const char *what = KERN_WARNING "leaking";
536
537 if (entry) {
538 unsigned long flags;
539
540 entry->ref = ref;
541 entry->ro = readonly;
542 entry->page = page;
543 entry->warn_delay = 60;
544 spin_lock_irqsave(&gnttab_list_lock, flags);
545 list_add_tail(&entry->list, &deferred_list);
546 if (!timer_pending(&deferred_timer)) {
547 deferred_timer.expires = jiffies + HZ;
548 add_timer(&deferred_timer);
549 }
550 spin_unlock_irqrestore(&gnttab_list_lock, flags);
551 what = KERN_DEBUG "deferring";
552 }
553 printk("%s g.e. %#x (pfn %#lx)\n",
554 what, ref, page ? page_to_pfn(page) : -1);
555}
556
467void gnttab_end_foreign_access(grant_ref_t ref, int readonly, 557void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
468 unsigned long page) 558 unsigned long page)
469{ 559{
@@ -471,12 +561,9 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
471 put_free_entry(ref); 561 put_free_entry(ref);
472 if (page != 0) 562 if (page != 0)
473 free_page(page); 563 free_page(page);
474 } else { 564 } else
475 /* XXX This needs to be fixed so that the ref and page are 565 gnttab_add_deferred(ref, readonly,
476 placed on a list to be freed up later. */ 566 page ? virt_to_page(page) : NULL);
477 printk(KERN_WARNING
478 "WARNING: leaking g.e. and page still in use!\n");
479 }
480} 567}
481EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); 568EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
482 569
@@ -741,6 +828,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
741 struct page **pages, unsigned int count) 828 struct page **pages, unsigned int count)
742{ 829{
743 int i, ret; 830 int i, ret;
831 bool lazy = false;
744 pte_t *pte; 832 pte_t *pte;
745 unsigned long mfn; 833 unsigned long mfn;
746 834
@@ -751,6 +839,11 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
751 if (xen_feature(XENFEAT_auto_translated_physmap)) 839 if (xen_feature(XENFEAT_auto_translated_physmap))
752 return ret; 840 return ret;
753 841
842 if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
843 arch_enter_lazy_mmu_mode();
844 lazy = true;
845 }
846
754 for (i = 0; i < count; i++) { 847 for (i = 0; i < count; i++) {
755 /* Do not add to override if the map failed. */ 848 /* Do not add to override if the map failed. */
756 if (map_ops[i].status) 849 if (map_ops[i].status)
@@ -769,6 +862,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
769 return ret; 862 return ret;
770 } 863 }
771 864
865 if (lazy)
866 arch_leave_lazy_mmu_mode();
867
772 return ret; 868 return ret;
773} 869}
774EXPORT_SYMBOL_GPL(gnttab_map_refs); 870EXPORT_SYMBOL_GPL(gnttab_map_refs);
@@ -777,6 +873,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
777 struct page **pages, unsigned int count, bool clear_pte) 873 struct page **pages, unsigned int count, bool clear_pte)
778{ 874{
779 int i, ret; 875 int i, ret;
876 bool lazy = false;
780 877
781 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); 878 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
782 if (ret) 879 if (ret)
@@ -785,12 +882,20 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
785 if (xen_feature(XENFEAT_auto_translated_physmap)) 882 if (xen_feature(XENFEAT_auto_translated_physmap))
786 return ret; 883 return ret;
787 884
885 if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
886 arch_enter_lazy_mmu_mode();
887 lazy = true;
888 }
889
788 for (i = 0; i < count; i++) { 890 for (i = 0; i < count; i++) {
789 ret = m2p_remove_override(pages[i], clear_pte); 891 ret = m2p_remove_override(pages[i], clear_pte);
790 if (ret) 892 if (ret)
791 return ret; 893 return ret;
792 } 894 }
793 895
896 if (lazy)
897 arch_leave_lazy_mmu_mode();
898
794 return ret; 899 return ret;
795} 900}
796EXPORT_SYMBOL_GPL(gnttab_unmap_refs); 901EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 146c94897016..7d041cb6da26 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -105,6 +105,12 @@ static unsigned int selfballoon_interval __read_mostly = 5;
105 */ 105 */
106static unsigned int selfballoon_min_usable_mb; 106static unsigned int selfballoon_min_usable_mb;
107 107
108/*
109 * Amount of RAM in MB to add to the target number of pages.
110 * Can be used to reserve some more room for caches and the like.
111 */
112static unsigned int selfballoon_reserved_mb;
113
108static void selfballoon_process(struct work_struct *work); 114static void selfballoon_process(struct work_struct *work);
109static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); 115static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
110 116
@@ -217,7 +223,8 @@ static void selfballoon_process(struct work_struct *work)
217 cur_pages = totalram_pages; 223 cur_pages = totalram_pages;
218 tgt_pages = cur_pages; /* default is no change */ 224 tgt_pages = cur_pages; /* default is no change */
219 goal_pages = percpu_counter_read_positive(&vm_committed_as) + 225 goal_pages = percpu_counter_read_positive(&vm_committed_as) +
220 totalreserve_pages; 226 totalreserve_pages +
227 MB2PAGES(selfballoon_reserved_mb);
221#ifdef CONFIG_FRONTSWAP 228#ifdef CONFIG_FRONTSWAP
222 /* allow space for frontswap pages to be repatriated */ 229 /* allow space for frontswap pages to be repatriated */
223 if (frontswap_selfshrinking && frontswap_enabled) 230 if (frontswap_selfshrinking && frontswap_enabled)
@@ -397,6 +404,30 @@ static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
397 show_selfballoon_min_usable_mb, 404 show_selfballoon_min_usable_mb,
398 store_selfballoon_min_usable_mb); 405 store_selfballoon_min_usable_mb);
399 406
407SELFBALLOON_SHOW(selfballoon_reserved_mb, "%d\n",
408 selfballoon_reserved_mb);
409
410static ssize_t store_selfballoon_reserved_mb(struct device *dev,
411 struct device_attribute *attr,
412 const char *buf,
413 size_t count)
414{
415 unsigned long val;
416 int err;
417
418 if (!capable(CAP_SYS_ADMIN))
419 return -EPERM;
420 err = strict_strtoul(buf, 10, &val);
421 if (err || val == 0)
422 return -EINVAL;
423 selfballoon_reserved_mb = val;
424 return count;
425}
426
427static DEVICE_ATTR(selfballoon_reserved_mb, S_IRUGO | S_IWUSR,
428 show_selfballoon_reserved_mb,
429 store_selfballoon_reserved_mb);
430
400 431
401#ifdef CONFIG_FRONTSWAP 432#ifdef CONFIG_FRONTSWAP
402SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); 433SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
@@ -480,6 +511,7 @@ static struct attribute *selfballoon_attrs[] = {
480 &dev_attr_selfballoon_downhysteresis.attr, 511 &dev_attr_selfballoon_downhysteresis.attr,
481 &dev_attr_selfballoon_uphysteresis.attr, 512 &dev_attr_selfballoon_uphysteresis.attr,
482 &dev_attr_selfballoon_min_usable_mb.attr, 513 &dev_attr_selfballoon_min_usable_mb.attr,
514 &dev_attr_selfballoon_reserved_mb.attr,
483#ifdef CONFIG_FRONTSWAP 515#ifdef CONFIG_FRONTSWAP
484 &dev_attr_frontswap_selfshrinking.attr, 516 &dev_attr_frontswap_selfshrinking.attr,
485 &dev_attr_frontswap_hysteresis.attr, 517 &dev_attr_frontswap_hysteresis.attr,
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 2eff7a6aaa20..52fe7ad07666 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -234,3 +234,9 @@ int xb_init_comms(void)
234 234
235 return 0; 235 return 0;
236} 236}
237
238void xb_deinit_comms(void)
239{
240 unbind_from_irqhandler(xenbus_irq, &xb_waitq);
241 xenbus_irq = 0;
242}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
index 6e42800fa499..c8abd3b8a6c4 100644
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -35,6 +35,7 @@
35 35
36int xs_init(void); 36int xs_init(void);
37int xb_init_comms(void); 37int xb_init_comms(void);
38void xb_deinit_comms(void);
38 39
39/* Low level routines. */ 40/* Low level routines. */
40int xb_write(const void *data, unsigned len); 41int xb_write(const void *data, unsigned len);
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
index 3d3be78c1093..be738c43104b 100644
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -8,7 +8,11 @@
8 8
9#include <xen/xen.h> 9#include <xen/xen.h>
10#include <xen/page.h> 10#include <xen/page.h>
11#include <xen/xenbus.h>
11#include <xen/xenbus_dev.h> 12#include <xen/xenbus_dev.h>
13#include <xen/grant_table.h>
14#include <xen/events.h>
15#include <asm/xen/hypervisor.h>
12 16
13#include "xenbus_comms.h" 17#include "xenbus_comms.h"
14 18
@@ -22,6 +26,50 @@ static int xenbus_backend_open(struct inode *inode, struct file *filp)
22 return nonseekable_open(inode, filp); 26 return nonseekable_open(inode, filp);
23} 27}
24 28
29static long xenbus_alloc(domid_t domid)
30{
31 struct evtchn_alloc_unbound arg;
32 int err = -EEXIST;
33
34 xs_suspend();
35
36 /* If xenstored_ready is nonzero, that means we have already talked to
37 * xenstore and set up watches. These watches will be restored by
38 * xs_resume, but that requires communication over the port established
39 * below that is not visible to anyone until the ioctl returns.
40 *
41 * This can be resolved by splitting the ioctl into two parts
42 * (postponing the resume until xenstored is active) but this is
43 * unnecessarily complex for the intended use where xenstored is only
44 * started once - so return -EEXIST if it's already running.
45 */
46 if (xenstored_ready)
47 goto out_err;
48
49 gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid,
50 virt_to_mfn(xen_store_interface), 0 /* writable */);
51
52 arg.dom = DOMID_SELF;
53 arg.remote_dom = domid;
54
55 err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &arg);
56 if (err)
57 goto out_err;
58
59 if (xen_store_evtchn > 0)
60 xb_deinit_comms();
61
62 xen_store_evtchn = arg.port;
63
64 xs_resume();
65
66 return arg.port;
67
68 out_err:
69 xs_suspend_cancel();
70 return err;
71}
72
25static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data) 73static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data)
26{ 74{
27 if (!capable(CAP_SYS_ADMIN)) 75 if (!capable(CAP_SYS_ADMIN))
@@ -33,6 +81,9 @@ static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned l
33 return xen_store_evtchn; 81 return xen_store_evtchn;
34 return -ENODEV; 82 return -ENODEV;
35 83
84 case IOCTL_XENBUS_BACKEND_SETUP:
85 return xenbus_alloc(data);
86
36 default: 87 default:
37 return -ENOTTY; 88 return -ENOTTY;
38 } 89 }