aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-08-31 03:45:21 -0400
committerIngo Molnar <mingo@elte.hu>2010-08-31 03:45:46 -0400
commitdaab7fc734a53fdeaf844b7c03053118ad1769da (patch)
tree575deb3cdcc6dda562acaed6f7c29bc81ae01cf2 /arch/x86/xen
parent774ea0bcb27f57b6fd521b3b6c43237782fed4b9 (diff)
parent2bfc96a127bc1cc94d26bfaa40159966064f9c8c (diff)
Merge commit 'v2.6.36-rc3' into x86/memblock
Conflicts: arch/x86/kernel/trampoline.c mm/memblock.c Merge reason: Resolve the conflicts, update to latest upstream. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Kconfig5
-rw-r--r--arch/x86/xen/Makefile3
-rw-r--r--arch/x86/xen/enlighten.c201
-rw-r--r--arch/x86/xen/mmu.c328
-rw-r--r--arch/x86/xen/mmu.h1
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c58
-rw-r--r--arch/x86/xen/platform-pci-unplug.c143
-rw-r--r--arch/x86/xen/setup.c72
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/suspend.c12
-rw-r--r--arch/x86/xen/time.c96
-rw-r--r--arch/x86/xen/xen-ops.h13
12 files changed, 863 insertions, 71 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index b83e119fbeb0..68128a1b401a 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -13,6 +13,11 @@ config XEN
13 kernel to boot in a paravirtualized environment under the 13 kernel to boot in a paravirtualized environment under the
14 Xen hypervisor. 14 Xen hypervisor.
15 15
16config XEN_PVHVM
17 def_bool y
18 depends on XEN
19 depends on X86_LOCAL_APIC
20
16config XEN_MAX_DOMAIN_MEMORY 21config XEN_MAX_DOMAIN_MEMORY
17 int "Maximum allowed size of a domain in gigabytes" 22 int "Maximum allowed size of a domain in gigabytes"
18 default 8 if X86_32 23 default 8 if X86_32
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3bb4fc21f4f2..779385158915 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -12,9 +12,10 @@ CFLAGS_mmu.o := $(nostackp)
12 12
13obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ 13obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
14 time.o xen-asm.o xen-asm_$(BITS).o \ 14 time.o xen-asm.o xen-asm_$(BITS).o \
15 grant-table.o suspend.o 15 grant-table.o suspend.o platform-pci-unplug.o
16 16
17obj-$(CONFIG_SMP) += smp.o 17obj-$(CONFIG_SMP) += smp.o
18obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 18obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
19obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o 19obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
20 20
21obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 65d8d79b46a8..7d46c8441418 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -11,6 +11,7 @@
11 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 11 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
12 */ 12 */
13 13
14#include <linux/cpu.h>
14#include <linux/kernel.h> 15#include <linux/kernel.h>
15#include <linux/init.h> 16#include <linux/init.h>
16#include <linux/smp.h> 17#include <linux/smp.h>
@@ -35,8 +36,10 @@
35#include <xen/interface/version.h> 36#include <xen/interface/version.h>
36#include <xen/interface/physdev.h> 37#include <xen/interface/physdev.h>
37#include <xen/interface/vcpu.h> 38#include <xen/interface/vcpu.h>
39#include <xen/interface/memory.h>
38#include <xen/features.h> 40#include <xen/features.h>
39#include <xen/page.h> 41#include <xen/page.h>
42#include <xen/hvm.h>
40#include <xen/hvc-console.h> 43#include <xen/hvc-console.h>
41 44
42#include <asm/paravirt.h> 45#include <asm/paravirt.h>
@@ -55,7 +58,9 @@
55#include <asm/pgtable.h> 58#include <asm/pgtable.h>
56#include <asm/tlbflush.h> 59#include <asm/tlbflush.h>
57#include <asm/reboot.h> 60#include <asm/reboot.h>
61#include <asm/setup.h>
58#include <asm/stackprotector.h> 62#include <asm/stackprotector.h>
63#include <asm/hypervisor.h>
59 64
60#include "xen-ops.h" 65#include "xen-ops.h"
61#include "mmu.h" 66#include "mmu.h"
@@ -76,6 +81,10 @@ struct shared_info xen_dummy_shared_info;
76 81
77void *xen_initial_gdt; 82void *xen_initial_gdt;
78 83
84RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
85__read_mostly int xen_have_vector_callback;
86EXPORT_SYMBOL_GPL(xen_have_vector_callback);
87
79/* 88/*
80 * Point at some empty memory to start with. We map the real shared_info 89 * Point at some empty memory to start with. We map the real shared_info
81 * page as soon as fixmap is up and running. 90 * page as soon as fixmap is up and running.
@@ -97,6 +106,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
97 */ 106 */
98static int have_vcpu_info_placement = 1; 107static int have_vcpu_info_placement = 1;
99 108
109static void clamp_max_cpus(void)
110{
111#ifdef CONFIG_SMP
112 if (setup_max_cpus > MAX_VIRT_CPUS)
113 setup_max_cpus = MAX_VIRT_CPUS;
114#endif
115}
116
100static void xen_vcpu_setup(int cpu) 117static void xen_vcpu_setup(int cpu)
101{ 118{
102 struct vcpu_register_vcpu_info info; 119 struct vcpu_register_vcpu_info info;
@@ -104,13 +121,17 @@ static void xen_vcpu_setup(int cpu)
104 struct vcpu_info *vcpup; 121 struct vcpu_info *vcpup;
105 122
106 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 123 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
107 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
108 124
109 if (!have_vcpu_info_placement) 125 if (cpu < MAX_VIRT_CPUS)
110 return; /* already tested, not available */ 126 per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
111 127
112 vcpup = &per_cpu(xen_vcpu_info, cpu); 128 if (!have_vcpu_info_placement) {
129 if (cpu >= MAX_VIRT_CPUS)
130 clamp_max_cpus();
131 return;
132 }
113 133
134 vcpup = &per_cpu(xen_vcpu_info, cpu);
114 info.mfn = arbitrary_virt_to_mfn(vcpup); 135 info.mfn = arbitrary_virt_to_mfn(vcpup);
115 info.offset = offset_in_page(vcpup); 136 info.offset = offset_in_page(vcpup);
116 137
@@ -125,6 +146,7 @@ static void xen_vcpu_setup(int cpu)
125 if (err) { 146 if (err) {
126 printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); 147 printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
127 have_vcpu_info_placement = 0; 148 have_vcpu_info_placement = 0;
149 clamp_max_cpus();
128 } else { 150 } else {
129 /* This cpu is using the registered vcpu info, even if 151 /* This cpu is using the registered vcpu info, even if
130 later ones fail to. */ 152 later ones fail to. */
@@ -731,7 +753,6 @@ static void set_xen_basic_apic_ops(void)
731 753
732#endif 754#endif
733 755
734
735static void xen_clts(void) 756static void xen_clts(void)
736{ 757{
737 struct multicall_space mcs; 758 struct multicall_space mcs;
@@ -926,10 +947,6 @@ static const struct pv_init_ops xen_init_ops __initdata = {
926 .patch = xen_patch, 947 .patch = xen_patch,
927}; 948};
928 949
929static const struct pv_time_ops xen_time_ops __initdata = {
930 .sched_clock = xen_sched_clock,
931};
932
933static const struct pv_cpu_ops xen_cpu_ops __initdata = { 950static const struct pv_cpu_ops xen_cpu_ops __initdata = {
934 .cpuid = xen_cpuid, 951 .cpuid = xen_cpuid,
935 952
@@ -1028,6 +1045,23 @@ static void xen_crash_shutdown(struct pt_regs *regs)
1028 xen_reboot(SHUTDOWN_crash); 1045 xen_reboot(SHUTDOWN_crash);
1029} 1046}
1030 1047
1048static int
1049xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1050{
1051 xen_reboot(SHUTDOWN_crash);
1052 return NOTIFY_DONE;
1053}
1054
1055static struct notifier_block xen_panic_block = {
1056 .notifier_call= xen_panic_event,
1057};
1058
1059int xen_panic_handler_init(void)
1060{
1061 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1062 return 0;
1063}
1064
1031static const struct machine_ops __initdata xen_machine_ops = { 1065static const struct machine_ops __initdata xen_machine_ops = {
1032 .restart = xen_restart, 1066 .restart = xen_restart,
1033 .halt = xen_machine_halt, 1067 .halt = xen_machine_halt,
@@ -1067,7 +1101,6 @@ asmlinkage void __init xen_start_kernel(void)
1067 /* Install Xen paravirt ops */ 1101 /* Install Xen paravirt ops */
1068 pv_info = xen_info; 1102 pv_info = xen_info;
1069 pv_init_ops = xen_init_ops; 1103 pv_init_ops = xen_init_ops;
1070 pv_time_ops = xen_time_ops;
1071 pv_cpu_ops = xen_cpu_ops; 1104 pv_cpu_ops = xen_cpu_ops;
1072 pv_apic_ops = xen_apic_ops; 1105 pv_apic_ops = xen_apic_ops;
1073 1106
@@ -1075,13 +1108,7 @@ asmlinkage void __init xen_start_kernel(void)
1075 x86_init.oem.arch_setup = xen_arch_setup; 1108 x86_init.oem.arch_setup = xen_arch_setup;
1076 x86_init.oem.banner = xen_banner; 1109 x86_init.oem.banner = xen_banner;
1077 1110
1078 x86_init.timers.timer_init = xen_time_init; 1111 xen_init_time_ops();
1079 x86_init.timers.setup_percpu_clockev = x86_init_noop;
1080 x86_cpuinit.setup_percpu_clockev = x86_init_noop;
1081
1082 x86_platform.calibrate_tsc = xen_tsc_khz;
1083 x86_platform.get_wallclock = xen_get_wallclock;
1084 x86_platform.set_wallclock = xen_set_wallclock;
1085 1112
1086 /* 1113 /*
1087 * Set up some pagetable state before starting to set any ptes. 1114 * Set up some pagetable state before starting to set any ptes.
@@ -1145,6 +1172,10 @@ asmlinkage void __init xen_start_kernel(void)
1145 1172
1146 pgd = (pgd_t *)xen_start_info->pt_base; 1173 pgd = (pgd_t *)xen_start_info->pt_base;
1147 1174
1175 if (!xen_initial_domain())
1176 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1177
1178 __supported_pte_mask |= _PAGE_IOMAP;
1148 /* Don't do the full vcpu_info placement stuff until we have a 1179 /* Don't do the full vcpu_info placement stuff until we have a
1149 possible map and a non-dummy shared_info. */ 1180 possible map and a non-dummy shared_info. */
1150 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1181 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@ -1206,3 +1237,139 @@ asmlinkage void __init xen_start_kernel(void)
1206 x86_64_start_reservations((char *)__pa_symbol(&boot_params)); 1237 x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1207#endif 1238#endif
1208} 1239}
1240
1241static uint32_t xen_cpuid_base(void)
1242{
1243 uint32_t base, eax, ebx, ecx, edx;
1244 char signature[13];
1245
1246 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1247 cpuid(base, &eax, &ebx, &ecx, &edx);
1248 *(uint32_t *)(signature + 0) = ebx;
1249 *(uint32_t *)(signature + 4) = ecx;
1250 *(uint32_t *)(signature + 8) = edx;
1251 signature[12] = 0;
1252
1253 if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
1254 return base;
1255 }
1256
1257 return 0;
1258}
1259
1260static int init_hvm_pv_info(int *major, int *minor)
1261{
1262 uint32_t eax, ebx, ecx, edx, pages, msr, base;
1263 u64 pfn;
1264
1265 base = xen_cpuid_base();
1266 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1267
1268 *major = eax >> 16;
1269 *minor = eax & 0xffff;
1270 printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor);
1271
1272 cpuid(base + 2, &pages, &msr, &ecx, &edx);
1273
1274 pfn = __pa(hypercall_page);
1275 wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
1276
1277 xen_setup_features();
1278
1279 pv_info = xen_info;
1280 pv_info.kernel_rpl = 0;
1281
1282 xen_domain_type = XEN_HVM_DOMAIN;
1283
1284 return 0;
1285}
1286
1287void xen_hvm_init_shared_info(void)
1288{
1289 int cpu;
1290 struct xen_add_to_physmap xatp;
1291 static struct shared_info *shared_info_page = 0;
1292
1293 if (!shared_info_page)
1294 shared_info_page = (struct shared_info *)
1295 extend_brk(PAGE_SIZE, PAGE_SIZE);
1296 xatp.domid = DOMID_SELF;
1297 xatp.idx = 0;
1298 xatp.space = XENMAPSPACE_shared_info;
1299 xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
1300 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1301 BUG();
1302
1303 HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
1304
1305 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1306 * page, we use it in the event channel upcall and in some pvclock
1307 * related functions. We don't need the vcpu_info placement
1308 * optimizations because we don't use any pv_mmu or pv_irq op on
1309 * HVM.
1310 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
1311 * online but xen_hvm_init_shared_info is run at resume time too and
1312 * in that case multiple vcpus might be online. */
1313 for_each_online_cpu(cpu) {
1314 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1315 }
1316}
1317
1318#ifdef CONFIG_XEN_PVHVM
1319static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1320 unsigned long action, void *hcpu)
1321{
1322 int cpu = (long)hcpu;
1323 switch (action) {
1324 case CPU_UP_PREPARE:
1325 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1326 break;
1327 default:
1328 break;
1329 }
1330 return NOTIFY_OK;
1331}
1332
1333static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
1334 .notifier_call = xen_hvm_cpu_notify,
1335};
1336
1337static void __init xen_hvm_guest_init(void)
1338{
1339 int r;
1340 int major, minor;
1341
1342 r = init_hvm_pv_info(&major, &minor);
1343 if (r < 0)
1344 return;
1345
1346 xen_hvm_init_shared_info();
1347
1348 if (xen_feature(XENFEAT_hvm_callback_vector))
1349 xen_have_vector_callback = 1;
1350 register_cpu_notifier(&xen_hvm_cpu_notifier);
1351 xen_unplug_emulated_devices();
1352 have_vcpu_info_placement = 0;
1353 x86_init.irqs.intr_init = xen_init_IRQ;
1354 xen_hvm_init_time_ops();
1355 xen_hvm_init_mmu_ops();
1356}
1357
1358static bool __init xen_hvm_platform(void)
1359{
1360 if (xen_pv_domain())
1361 return false;
1362
1363 if (!xen_cpuid_base())
1364 return false;
1365
1366 return true;
1367}
1368
1369const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = {
1370 .name = "Xen HVM",
1371 .detect = xen_hvm_platform,
1372 .init_platform = xen_hvm_guest_init,
1373};
1374EXPORT_SYMBOL(x86_hyper_xen_hvm);
1375#endif
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b511f1986911..4fe04ac0bae0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,6 +42,7 @@
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/debugfs.h> 43#include <linux/debugfs.h>
44#include <linux/bug.h> 44#include <linux/bug.h>
45#include <linux/vmalloc.h>
45#include <linux/module.h> 46#include <linux/module.h>
46#include <linux/gfp.h> 47#include <linux/gfp.h>
47#include <linux/memblock.h> 48#include <linux/memblock.h>
@@ -52,14 +53,19 @@
52#include <asm/mmu_context.h> 53#include <asm/mmu_context.h>
53#include <asm/setup.h> 54#include <asm/setup.h>
54#include <asm/paravirt.h> 55#include <asm/paravirt.h>
56#include <asm/e820.h>
55#include <asm/linkage.h> 57#include <asm/linkage.h>
58#include <asm/page.h>
56 59
57#include <asm/xen/hypercall.h> 60#include <asm/xen/hypercall.h>
58#include <asm/xen/hypervisor.h> 61#include <asm/xen/hypervisor.h>
59 62
63#include <xen/xen.h>
60#include <xen/page.h> 64#include <xen/page.h>
61#include <xen/interface/xen.h> 65#include <xen/interface/xen.h>
66#include <xen/interface/hvm/hvm_op.h>
62#include <xen/interface/version.h> 67#include <xen/interface/version.h>
68#include <xen/interface/memory.h>
63#include <xen/hvc-console.h> 69#include <xen/hvc-console.h>
64 70
65#include "multicalls.h" 71#include "multicalls.h"
@@ -68,6 +74,13 @@
68 74
69#define MMU_UPDATE_HISTO 30 75#define MMU_UPDATE_HISTO 30
70 76
77/*
78 * Protects atomic reservation decrease/increase against concurrent increases.
79 * Also protects non-atomic updates of current_pages and driver_pages, and
80 * balloon lists.
81 */
82DEFINE_SPINLOCK(xen_reservation_lock);
83
71#ifdef CONFIG_XEN_DEBUG_FS 84#ifdef CONFIG_XEN_DEBUG_FS
72 85
73static struct { 86static struct {
@@ -378,6 +391,28 @@ static bool xen_page_pinned(void *ptr)
378 return PagePinned(page); 391 return PagePinned(page);
379} 392}
380 393
394static bool xen_iomap_pte(pte_t pte)
395{
396 return pte_flags(pte) & _PAGE_IOMAP;
397}
398
399static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
400{
401 struct multicall_space mcs;
402 struct mmu_update *u;
403
404 mcs = xen_mc_entry(sizeof(*u));
405 u = mcs.args;
406
407 /* ptep might be kmapped when using 32-bit HIGHPTE */
408 u->ptr = arbitrary_virt_to_machine(ptep).maddr;
409 u->val = pte_val_ma(pteval);
410
411 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
412
413 xen_mc_issue(PARAVIRT_LAZY_MMU);
414}
415
381static void xen_extend_mmu_update(const struct mmu_update *update) 416static void xen_extend_mmu_update(const struct mmu_update *update)
382{ 417{
383 struct multicall_space mcs; 418 struct multicall_space mcs;
@@ -454,6 +489,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
454void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 489void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
455 pte_t *ptep, pte_t pteval) 490 pte_t *ptep, pte_t pteval)
456{ 491{
492 if (xen_iomap_pte(pteval)) {
493 xen_set_iomap_pte(ptep, pteval);
494 goto out;
495 }
496
457 ADD_STATS(set_pte_at, 1); 497 ADD_STATS(set_pte_at, 1);
458// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); 498// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
459 ADD_STATS(set_pte_at_current, mm == current->mm); 499 ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -524,8 +564,25 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
524 return val; 564 return val;
525} 565}
526 566
567static pteval_t iomap_pte(pteval_t val)
568{
569 if (val & _PAGE_PRESENT) {
570 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
571 pteval_t flags = val & PTE_FLAGS_MASK;
572
573 /* We assume the pte frame number is a MFN, so
574 just use it as-is. */
575 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
576 }
577
578 return val;
579}
580
527pteval_t xen_pte_val(pte_t pte) 581pteval_t xen_pte_val(pte_t pte)
528{ 582{
583 if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
584 return pte.pte;
585
529 return pte_mfn_to_pfn(pte.pte); 586 return pte_mfn_to_pfn(pte.pte);
530} 587}
531PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 588PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -538,7 +595,22 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
538 595
539pte_t xen_make_pte(pteval_t pte) 596pte_t xen_make_pte(pteval_t pte)
540{ 597{
541 pte = pte_pfn_to_mfn(pte); 598 phys_addr_t addr = (pte & PTE_PFN_MASK);
599
600 /*
601 * Unprivileged domains are allowed to do IOMAPpings for
602 * PCI passthrough, but not map ISA space. The ISA
603 * mappings are just dummy local mappings to keep other
604 * parts of the kernel happy.
605 */
606 if (unlikely(pte & _PAGE_IOMAP) &&
607 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
608 pte = iomap_pte(pte);
609 } else {
610 pte &= ~_PAGE_IOMAP;
611 pte = pte_pfn_to_mfn(pte);
612 }
613
542 return native_make_pte(pte); 614 return native_make_pte(pte);
543} 615}
544PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 616PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
@@ -594,6 +666,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
594 666
595void xen_set_pte(pte_t *ptep, pte_t pte) 667void xen_set_pte(pte_t *ptep, pte_t pte)
596{ 668{
669 if (xen_iomap_pte(pte)) {
670 xen_set_iomap_pte(ptep, pte);
671 return;
672 }
673
597 ADD_STATS(pte_update, 1); 674 ADD_STATS(pte_update, 1);
598// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); 675// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
599 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); 676 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
@@ -610,6 +687,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
610#ifdef CONFIG_X86_PAE 687#ifdef CONFIG_X86_PAE
611void xen_set_pte_atomic(pte_t *ptep, pte_t pte) 688void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
612{ 689{
690 if (xen_iomap_pte(pte)) {
691 xen_set_iomap_pte(ptep, pte);
692 return;
693 }
694
613 set_64bit((u64 *)ptep, native_pte_val(pte)); 695 set_64bit((u64 *)ptep, native_pte_val(pte));
614} 696}
615 697
@@ -936,8 +1018,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
936 read-only, and can be pinned. */ 1018 read-only, and can be pinned. */
937static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) 1019static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
938{ 1020{
939 vm_unmap_aliases();
940
941 xen_mc_batch(); 1021 xen_mc_batch();
942 1022
943 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { 1023 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
@@ -1501,7 +1581,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
1501 if (PagePinned(virt_to_page(mm->pgd))) { 1581 if (PagePinned(virt_to_page(mm->pgd))) {
1502 SetPagePinned(page); 1582 SetPagePinned(page);
1503 1583
1504 vm_unmap_aliases();
1505 if (!PageHighMem(page)) { 1584 if (!PageHighMem(page)) {
1506 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); 1585 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
1507 if (level == PT_PTE && USE_SPLIT_PTLOCKS) 1586 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
@@ -1812,9 +1891,16 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1812 pte = pfn_pte(phys, prot); 1891 pte = pfn_pte(phys, prot);
1813 break; 1892 break;
1814 1893
1815 default: 1894 case FIX_PARAVIRT_BOOTMAP:
1895 /* This is an MFN, but it isn't an IO mapping from the
1896 IO domain */
1816 pte = mfn_pte(phys, prot); 1897 pte = mfn_pte(phys, prot);
1817 break; 1898 break;
1899
1900 default:
1901 /* By default, set_fixmap is used for hardware mappings */
1902 pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
1903 break;
1818 } 1904 }
1819 1905
1820 __native_set_fixmap(idx, pte); 1906 __native_set_fixmap(idx, pte);
@@ -1940,8 +2026,240 @@ void __init xen_init_mmu_ops(void)
1940 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; 2026 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
1941 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2027 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
1942 pv_mmu_ops = xen_mmu_ops; 2028 pv_mmu_ops = xen_mmu_ops;
2029
2030 vmap_lazy_unmap = false;
2031}
2032
2033/* Protected by xen_reservation_lock. */
2034#define MAX_CONTIG_ORDER 9 /* 2MB */
2035static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
2036
2037#define VOID_PTE (mfn_pte(0, __pgprot(0)))
2038static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2039 unsigned long *in_frames,
2040 unsigned long *out_frames)
2041{
2042 int i;
2043 struct multicall_space mcs;
2044
2045 xen_mc_batch();
2046 for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2047 mcs = __xen_mc_entry(0);
2048
2049 if (in_frames)
2050 in_frames[i] = virt_to_mfn(vaddr);
2051
2052 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2053 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2054
2055 if (out_frames)
2056 out_frames[i] = virt_to_pfn(vaddr);
2057 }
2058 xen_mc_issue(0);
2059}
2060
2061/*
2062 * Update the pfn-to-mfn mappings for a virtual address range, either to
2063 * point to an array of mfns, or contiguously from a single starting
2064 * mfn.
2065 */
2066static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2067 unsigned long *mfns,
2068 unsigned long first_mfn)
2069{
2070 unsigned i, limit;
2071 unsigned long mfn;
2072
2073 xen_mc_batch();
2074
2075 limit = 1u << order;
2076 for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2077 struct multicall_space mcs;
2078 unsigned flags;
2079
2080 mcs = __xen_mc_entry(0);
2081 if (mfns)
2082 mfn = mfns[i];
2083 else
2084 mfn = first_mfn + i;
2085
2086 if (i < (limit - 1))
2087 flags = 0;
2088 else {
2089 if (order == 0)
2090 flags = UVMF_INVLPG | UVMF_ALL;
2091 else
2092 flags = UVMF_TLB_FLUSH | UVMF_ALL;
2093 }
2094
2095 MULTI_update_va_mapping(mcs.mc, vaddr,
2096 mfn_pte(mfn, PAGE_KERNEL), flags);
2097
2098 set_phys_to_machine(virt_to_pfn(vaddr), mfn);
2099 }
2100
2101 xen_mc_issue(0);
2102}
2103
2104/*
2105 * Perform the hypercall to exchange a region of our pfns to point to
2106 * memory with the required contiguous alignment. Takes the pfns as
2107 * input, and populates mfns as output.
2108 *
2109 * Returns a success code indicating whether the hypervisor was able to
2110 * satisfy the request or not.
2111 */
2112static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2113 unsigned long *pfns_in,
2114 unsigned long extents_out,
2115 unsigned int order_out,
2116 unsigned long *mfns_out,
2117 unsigned int address_bits)
2118{
2119 long rc;
2120 int success;
2121
2122 struct xen_memory_exchange exchange = {
2123 .in = {
2124 .nr_extents = extents_in,
2125 .extent_order = order_in,
2126 .extent_start = pfns_in,
2127 .domid = DOMID_SELF
2128 },
2129 .out = {
2130 .nr_extents = extents_out,
2131 .extent_order = order_out,
2132 .extent_start = mfns_out,
2133 .address_bits = address_bits,
2134 .domid = DOMID_SELF
2135 }
2136 };
2137
2138 BUG_ON(extents_in << order_in != extents_out << order_out);
2139
2140 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2141 success = (exchange.nr_exchanged == extents_in);
2142
2143 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2144 BUG_ON(success && (rc != 0));
2145
2146 return success;
1943} 2147}
1944 2148
2149int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
2150 unsigned int address_bits)
2151{
2152 unsigned long *in_frames = discontig_frames, out_frame;
2153 unsigned long flags;
2154 int success;
2155
2156 /*
2157 * Currently an auto-translated guest will not perform I/O, nor will
2158 * it require PAE page directories below 4GB. Therefore any calls to
2159 * this function are redundant and can be ignored.
2160 */
2161
2162 if (xen_feature(XENFEAT_auto_translated_physmap))
2163 return 0;
2164
2165 if (unlikely(order > MAX_CONTIG_ORDER))
2166 return -ENOMEM;
2167
2168 memset((void *) vstart, 0, PAGE_SIZE << order);
2169
2170 spin_lock_irqsave(&xen_reservation_lock, flags);
2171
2172 /* 1. Zap current PTEs, remembering MFNs. */
2173 xen_zap_pfn_range(vstart, order, in_frames, NULL);
2174
2175 /* 2. Get a new contiguous memory extent. */
2176 out_frame = virt_to_pfn(vstart);
2177 success = xen_exchange_memory(1UL << order, 0, in_frames,
2178 1, order, &out_frame,
2179 address_bits);
2180
2181 /* 3. Map the new extent in place of old pages. */
2182 if (success)
2183 xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2184 else
2185 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2186
2187 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2188
2189 return success ? 0 : -ENOMEM;
2190}
2191EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
2192
2193void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
2194{
2195 unsigned long *out_frames = discontig_frames, in_frame;
2196 unsigned long flags;
2197 int success;
2198
2199 if (xen_feature(XENFEAT_auto_translated_physmap))
2200 return;
2201
2202 if (unlikely(order > MAX_CONTIG_ORDER))
2203 return;
2204
2205 memset((void *) vstart, 0, PAGE_SIZE << order);
2206
2207 spin_lock_irqsave(&xen_reservation_lock, flags);
2208
2209 /* 1. Find start MFN of contiguous extent. */
2210 in_frame = virt_to_mfn(vstart);
2211
2212 /* 2. Zap current PTEs. */
2213 xen_zap_pfn_range(vstart, order, NULL, out_frames);
2214
2215 /* 3. Do the exchange for non-contiguous MFNs. */
2216 success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2217 0, out_frames, 0);
2218
2219 /* 4. Map new pages in place of old pages. */
2220 if (success)
2221 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2222 else
2223 xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2224
2225 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2226}
2227EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
2228
2229#ifdef CONFIG_XEN_PVHVM
2230static void xen_hvm_exit_mmap(struct mm_struct *mm)
2231{
2232 struct xen_hvm_pagetable_dying a;
2233 int rc;
2234
2235 a.domid = DOMID_SELF;
2236 a.gpa = __pa(mm->pgd);
2237 rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
2238 WARN_ON_ONCE(rc < 0);
2239}
2240
2241static int is_pagetable_dying_supported(void)
2242{
2243 struct xen_hvm_pagetable_dying a;
2244 int rc = 0;
2245
2246 a.domid = DOMID_SELF;
2247 a.gpa = 0x00;
2248 rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
2249 if (rc < 0) {
2250 printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
2251 return 0;
2252 }
2253 return 1;
2254}
2255
2256void __init xen_hvm_init_mmu_ops(void)
2257{
2258 if (is_pagetable_dying_supported())
2259 pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
2260}
2261#endif
2262
1945#ifdef CONFIG_XEN_DEBUG_FS 2263#ifdef CONFIG_XEN_DEBUG_FS
1946 2264
1947static struct dentry *d_mmu_debug; 2265static struct dentry *d_mmu_debug;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 5fe6bc7f5ecf..fa938c4aa2f7 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -60,4 +60,5 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
60unsigned long xen_read_cr2_direct(void); 60unsigned long xen_read_cr2_direct(void);
61 61
62extern void xen_init_mmu_ops(void); 62extern void xen_init_mmu_ops(void);
63extern void xen_hvm_init_mmu_ops(void);
63#endif /* _XEN_MMU_H */ 64#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
new file mode 100644
index 000000000000..a013ec9d0c54
--- /dev/null
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -0,0 +1,58 @@
1/* Glue code to lib/swiotlb-xen.c */
2
3#include <linux/dma-mapping.h>
4#include <xen/swiotlb-xen.h>
5
6#include <asm/xen/hypervisor.h>
7#include <xen/xen.h>
8
9int xen_swiotlb __read_mostly;
10
11static struct dma_map_ops xen_swiotlb_dma_ops = {
12 .mapping_error = xen_swiotlb_dma_mapping_error,
13 .alloc_coherent = xen_swiotlb_alloc_coherent,
14 .free_coherent = xen_swiotlb_free_coherent,
15 .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
16 .sync_single_for_device = xen_swiotlb_sync_single_for_device,
17 .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
18 .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
19 .map_sg = xen_swiotlb_map_sg_attrs,
20 .unmap_sg = xen_swiotlb_unmap_sg_attrs,
21 .map_page = xen_swiotlb_map_page,
22 .unmap_page = xen_swiotlb_unmap_page,
23 .dma_supported = xen_swiotlb_dma_supported,
24};
25
26/*
27 * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary
28 *
29 * This returns non-zero if we are forced to use xen_swiotlb (by the boot
30 * option).
31 */
32int __init pci_xen_swiotlb_detect(void)
33{
34
35 /* If running as PV guest, either iommu=soft, or swiotlb=force will
36 * activate this IOMMU. If running as PV privileged, activate it
37 * irregardlesss.
38 */
39 if ((xen_initial_domain() || swiotlb || swiotlb_force) &&
40 (xen_pv_domain()))
41 xen_swiotlb = 1;
42
43 /* If we are running under Xen, we MUST disable the native SWIOTLB.
44 * Don't worry about swiotlb_force flag activating the native, as
45 * the 'swiotlb' flag is the only one turning it on. */
46 if (xen_pv_domain())
47 swiotlb = 0;
48
49 return xen_swiotlb;
50}
51
52void __init pci_xen_swiotlb_init(void)
53{
54 if (xen_swiotlb) {
55 xen_swiotlb_init(1);
56 dma_ops = &xen_swiotlb_dma_ops;
57 }
58}
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
new file mode 100644
index 000000000000..0f456386cce5
--- /dev/null
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -0,0 +1,143 @@
1/******************************************************************************
2 * platform-pci-unplug.c
3 *
4 * Xen platform PCI device driver
5 * Copyright (c) 2010, Citrix
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18 * Place - Suite 330, Boston, MA 02111-1307 USA.
19 *
20 */
21
22#include <linux/init.h>
23#include <linux/io.h>
24#include <linux/module.h>
25
26#include <xen/platform_pci.h>
27
28#define XEN_PLATFORM_ERR_MAGIC -1
29#define XEN_PLATFORM_ERR_PROTOCOL -2
30#define XEN_PLATFORM_ERR_BLACKLIST -3
31
32/* store the value of xen_emul_unplug after the unplug is done */
33int xen_platform_pci_unplug;
34EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
35#ifdef CONFIG_XEN_PVHVM
36static int xen_emul_unplug;
37
38static int __init check_platform_magic(void)
39{
40 short magic;
41 char protocol;
42
43 magic = inw(XEN_IOPORT_MAGIC);
44 if (magic != XEN_IOPORT_MAGIC_VAL) {
45 printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
46 return XEN_PLATFORM_ERR_MAGIC;
47 }
48
49 protocol = inb(XEN_IOPORT_PROTOVER);
50
51 printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
52 protocol);
53
54 switch (protocol) {
55 case 1:
56 outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
57 outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
58 if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
59 printk(KERN_ERR "Xen Platform: blacklisted by host\n");
60 return XEN_PLATFORM_ERR_BLACKLIST;
61 }
62 break;
63 default:
64 printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version");
65 return XEN_PLATFORM_ERR_PROTOCOL;
66 }
67
68 return 0;
69}
70
71void __init xen_unplug_emulated_devices(void)
72{
73 int r;
74
75 /* user explicitly requested no unplug */
76 if (xen_emul_unplug & XEN_UNPLUG_NEVER)
77 return;
78 /* check the version of the xen platform PCI device */
79 r = check_platform_magic();
80 /* If the version matches enable the Xen platform PCI driver.
81 * Also enable the Xen platform PCI driver if the host does
82 * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC)
83 * but the user told us that unplugging is unnecessary. */
84 if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
85 (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)))
86 return;
87 /* Set the default value of xen_emul_unplug depending on whether or
88 * not the Xen PV frontends and the Xen platform PCI driver have
89 * been compiled for this kernel (modules or built-in are both OK). */
90 if (!xen_emul_unplug) {
91 if (xen_must_unplug_nics()) {
92 printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
93 "been compiled for this kernel: unplug emulated NICs.\n");
94 xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
95 }
96 if (xen_must_unplug_disks()) {
97 printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
98 "been compiled for this kernel: unplug emulated disks.\n"
99 "You might have to change the root device\n"
100 "from /dev/hd[a-d] to /dev/xvd[a-d]\n"
101 "in your root= kernel command line option\n");
102 xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
103 }
104 }
105 /* Now unplug the emulated devices */
106 if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))
107 outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
108 xen_platform_pci_unplug = xen_emul_unplug;
109}
110
111static int __init parse_xen_emul_unplug(char *arg)
112{
113 char *p, *q;
114 int l;
115
116 for (p = arg; p; p = q) {
117 q = strchr(p, ',');
118 if (q) {
119 l = q - p;
120 q++;
121 } else {
122 l = strlen(p);
123 }
124 if (!strncmp(p, "all", l))
125 xen_emul_unplug |= XEN_UNPLUG_ALL;
126 else if (!strncmp(p, "ide-disks", l))
127 xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
128 else if (!strncmp(p, "aux-ide-disks", l))
129 xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
130 else if (!strncmp(p, "nics", l))
131 xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
132 else if (!strncmp(p, "unnecessary", l))
133 xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY;
134 else if (!strncmp(p, "never", l))
135 xen_emul_unplug |= XEN_UNPLUG_NEVER;
136 else
137 printk(KERN_WARNING "unrecognised option '%s' "
138 "in parameter 'xen_emul_unplug'\n", p);
139 }
140 return 0;
141}
142early_param("xen_emul_unplug", parse_xen_emul_unplug);
143#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2ac8f29f89cb..9729c903404b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -21,6 +21,7 @@
21#include <xen/page.h> 21#include <xen/page.h>
22#include <xen/interface/callback.h> 22#include <xen/interface/callback.h>
23#include <xen/interface/physdev.h> 23#include <xen/interface/physdev.h>
24#include <xen/interface/memory.h>
24#include <xen/features.h> 25#include <xen/features.h>
25 26
26#include "xen-ops.h" 27#include "xen-ops.h"
@@ -33,6 +34,73 @@ extern void xen_sysenter_target(void);
33extern void xen_syscall_target(void); 34extern void xen_syscall_target(void);
34extern void xen_syscall32_target(void); 35extern void xen_syscall32_target(void);
35 36
37static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
38 phys_addr_t end_addr)
39{
40 struct xen_memory_reservation reservation = {
41 .address_bits = 0,
42 .extent_order = 0,
43 .domid = DOMID_SELF
44 };
45 unsigned long start, end;
46 unsigned long len = 0;
47 unsigned long pfn;
48 int ret;
49
50 start = PFN_UP(start_addr);
51 end = PFN_DOWN(end_addr);
52
53 if (end <= start)
54 return 0;
55
56 printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
57 start, end);
58 for(pfn = start; pfn < end; pfn++) {
59 unsigned long mfn = pfn_to_mfn(pfn);
60
61 /* Make sure pfn exists to start with */
62 if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
63 continue;
64
65 set_xen_guest_handle(reservation.extent_start, &mfn);
66 reservation.nr_extents = 1;
67
68 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
69 &reservation);
70 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
71 start, end, ret);
72 if (ret == 1) {
73 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
74 len++;
75 }
76 }
77 printk(KERN_CONT "%ld pages freed\n", len);
78
79 return len;
80}
81
82static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
83 const struct e820map *e820)
84{
85 phys_addr_t max_addr = PFN_PHYS(max_pfn);
86 phys_addr_t last_end = 0;
87 unsigned long released = 0;
88 int i;
89
90 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
91 phys_addr_t end = e820->map[i].addr;
92 end = min(max_addr, end);
93
94 released += xen_release_chunk(last_end, end);
95 last_end = e820->map[i].addr + e820->map[i].size;
96 }
97
98 if (last_end < max_addr)
99 released += xen_release_chunk(last_end, max_addr);
100
101 printk(KERN_INFO "released %ld pages of unused memory\n", released);
102 return released;
103}
36 104
37/** 105/**
38 * machine_specific_memory_setup - Hook for machine specific memory setup. 106 * machine_specific_memory_setup - Hook for machine specific memory setup.
@@ -68,6 +136,8 @@ char * __init xen_memory_setup(void)
68 136
69 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 137 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
70 138
139 xen_return_unused_memory(xen_start_info->nr_pages, &e820);
140
71 return "Xen"; 141 return "Xen";
72} 142}
73 143
@@ -157,6 +227,8 @@ void __init xen_arch_setup(void)
157 struct physdev_set_iopl set_iopl; 227 struct physdev_set_iopl set_iopl;
158 int rc; 228 int rc;
159 229
230 xen_panic_handler_init();
231
160 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 232 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
161 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 233 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
162 234
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index a29693fd3138..25f232b18a82 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -394,6 +394,8 @@ static void stop_self(void *v)
394 load_cr3(swapper_pg_dir); 394 load_cr3(swapper_pg_dir);
395 /* should set up a minimal gdt */ 395 /* should set up a minimal gdt */
396 396
397 set_cpu_online(cpu, false);
398
397 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); 399 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
398 BUG(); 400 BUG();
399} 401}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index a9c661108034..1d789d56877c 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -26,6 +26,18 @@ void xen_pre_suspend(void)
26 BUG(); 26 BUG();
27} 27}
28 28
29void xen_hvm_post_suspend(int suspend_cancelled)
30{
31 int cpu;
32 xen_hvm_init_shared_info();
33 xen_callback_vector();
34 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
35 for_each_online_cpu(cpu) {
36 xen_setup_runstate_info(cpu);
37 }
38 }
39}
40
29void xen_post_suspend(int suspend_cancelled) 41void xen_post_suspend(int suspend_cancelled)
30{ 42{
31 xen_build_mfn_list_list(); 43 xen_build_mfn_list_list();
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b3c6c59ed302..1a5353a753fc 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -20,6 +20,7 @@
20#include <asm/xen/hypercall.h> 20#include <asm/xen/hypercall.h>
21 21
22#include <xen/events.h> 22#include <xen/events.h>
23#include <xen/features.h>
23#include <xen/interface/xen.h> 24#include <xen/interface/xen.h>
24#include <xen/interface/vcpu.h> 25#include <xen/interface/vcpu.h>
25 26
@@ -155,47 +156,8 @@ static void do_stolen_accounting(void)
155 account_idle_ticks(ticks); 156 account_idle_ticks(ticks);
156} 157}
157 158
158/*
159 * Xen sched_clock implementation. Returns the number of unstolen
160 * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
161 * states.
162 */
163unsigned long long xen_sched_clock(void)
164{
165 struct vcpu_runstate_info state;
166 cycle_t now;
167 u64 ret;
168 s64 offset;
169
170 /*
171 * Ideally sched_clock should be called on a per-cpu basis
172 * anyway, so preempt should already be disabled, but that's
173 * not current practice at the moment.
174 */
175 preempt_disable();
176
177 now = xen_clocksource_read();
178
179 get_runstate_snapshot(&state);
180
181 WARN_ON(state.state != RUNSTATE_running);
182
183 offset = now - state.state_entry_time;
184 if (offset < 0)
185 offset = 0;
186
187 ret = state.time[RUNSTATE_blocked] +
188 state.time[RUNSTATE_running] +
189 offset;
190
191 preempt_enable();
192
193 return ret;
194}
195
196
197/* Get the TSC speed from Xen */ 159/* Get the TSC speed from Xen */
198unsigned long xen_tsc_khz(void) 160static unsigned long xen_tsc_khz(void)
199{ 161{
200 struct pvclock_vcpu_time_info *info = 162 struct pvclock_vcpu_time_info *info =
201 &HYPERVISOR_shared_info->vcpu_info[0].time; 163 &HYPERVISOR_shared_info->vcpu_info[0].time;
@@ -230,7 +192,7 @@ static void xen_read_wallclock(struct timespec *ts)
230 put_cpu_var(xen_vcpu); 192 put_cpu_var(xen_vcpu);
231} 193}
232 194
233unsigned long xen_get_wallclock(void) 195static unsigned long xen_get_wallclock(void)
234{ 196{
235 struct timespec ts; 197 struct timespec ts;
236 198
@@ -238,7 +200,7 @@ unsigned long xen_get_wallclock(void)
238 return ts.tv_sec; 200 return ts.tv_sec;
239} 201}
240 202
241int xen_set_wallclock(unsigned long now) 203static int xen_set_wallclock(unsigned long now)
242{ 204{
243 /* do nothing for domU */ 205 /* do nothing for domU */
244 return -1; 206 return -1;
@@ -473,7 +435,11 @@ void xen_timer_resume(void)
473 } 435 }
474} 436}
475 437
476__init void xen_time_init(void) 438static const struct pv_time_ops xen_time_ops __initdata = {
439 .sched_clock = xen_clocksource_read,
440};
441
442static __init void xen_time_init(void)
477{ 443{
478 int cpu = smp_processor_id(); 444 int cpu = smp_processor_id();
479 struct timespec tp; 445 struct timespec tp;
@@ -497,3 +463,47 @@ __init void xen_time_init(void)
497 xen_setup_timer(cpu); 463 xen_setup_timer(cpu);
498 xen_setup_cpu_clockevents(); 464 xen_setup_cpu_clockevents();
499} 465}
466
467__init void xen_init_time_ops(void)
468{
469 pv_time_ops = xen_time_ops;
470
471 x86_init.timers.timer_init = xen_time_init;
472 x86_init.timers.setup_percpu_clockev = x86_init_noop;
473 x86_cpuinit.setup_percpu_clockev = x86_init_noop;
474
475 x86_platform.calibrate_tsc = xen_tsc_khz;
476 x86_platform.get_wallclock = xen_get_wallclock;
477 x86_platform.set_wallclock = xen_set_wallclock;
478}
479
480#ifdef CONFIG_XEN_PVHVM
481static void xen_hvm_setup_cpu_clockevents(void)
482{
483 int cpu = smp_processor_id();
484 xen_setup_runstate_info(cpu);
485 xen_setup_timer(cpu);
486 xen_setup_cpu_clockevents();
487}
488
489__init void xen_hvm_init_time_ops(void)
490{
491 /* vector callback is needed otherwise we cannot receive interrupts
492 * on cpu > 0 */
493 if (!xen_have_vector_callback && num_present_cpus() > 1)
494 return;
495 if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
496 printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
497 "disable pv timer\n");
498 return;
499 }
500
501 pv_time_ops = xen_time_ops;
502 x86_init.timers.setup_percpu_clockev = xen_time_init;
503 x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
504
505 x86_platform.calibrate_tsc = xen_tsc_khz;
506 x86_platform.get_wallclock = xen_get_wallclock;
507 x86_platform.set_wallclock = xen_set_wallclock;
508}
509#endif
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f9153a300bce..7c8ab86163e9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -38,6 +38,10 @@ void xen_enable_sysenter(void);
38void xen_enable_syscall(void); 38void xen_enable_syscall(void);
39void xen_vcpu_restore(void); 39void xen_vcpu_restore(void);
40 40
41void xen_callback_vector(void);
42void xen_hvm_init_shared_info(void);
43void __init xen_unplug_emulated_devices(void);
44
41void __init xen_build_dynamic_phys_to_machine(void); 45void __init xen_build_dynamic_phys_to_machine(void);
42 46
43void xen_init_irq_ops(void); 47void xen_init_irq_ops(void);
@@ -46,11 +50,8 @@ void xen_setup_runstate_info(int cpu);
46void xen_teardown_timer(int cpu); 50void xen_teardown_timer(int cpu);
47cycle_t xen_clocksource_read(void); 51cycle_t xen_clocksource_read(void);
48void xen_setup_cpu_clockevents(void); 52void xen_setup_cpu_clockevents(void);
49unsigned long xen_tsc_khz(void); 53void __init xen_init_time_ops(void);
50void __init xen_time_init(void); 54void __init xen_hvm_init_time_ops(void);
51unsigned long xen_get_wallclock(void);
52int xen_set_wallclock(unsigned long time);
53unsigned long long xen_sched_clock(void);
54 55
55irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 56irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
56 57
@@ -101,4 +102,6 @@ void xen_sysret32(void);
101void xen_sysret64(void); 102void xen_sysret64(void);
102void xen_adjust_exception_frame(void); 103void xen_adjust_exception_frame(void);
103 104
105extern int xen_panic_handler_init(void);
106
104#endif /* XEN_OPS_H */ 107#endif /* XEN_OPS_H */