aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-11-28 12:46:22 -0500
committerTejun Heo <tj@kernel.org>2011-11-28 12:46:22 -0500
commitd4bbf7e7759afc172e2bfbc5c416324590049cdd (patch)
tree7eab5ee5481cd3dcf1162329fec827177640018a /arch/x86/xen
parenta150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff)
parent401d0069cb344f401bc9d264c31db55876ff78c0 (diff)
Merge branch 'master' into x86/memblock
Conflicts & resolutions: * arch/x86/xen/setup.c dc91c728fd "xen: allow extra memory to be in multiple regions" 24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..." conflicted on xen_add_extra_mem() updates. The resolution is trivial as the latter just want to replace memblock_x86_reserve_range() with memblock_reserve(). * drivers/pci/intel-iommu.c 166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/" 5dfe8660a3d "bootmem: Replace work_with_active_regions() with..." conflicted as the former moved the file under drivers/iommu/. Resolved by applying the chnages from the latter on the moved file. * mm/Kconfig 6661672053a "memblock: add NO_BOOTMEM config symbol" c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option" conflicted trivially. Both added config options. Just letting both add their own options resolves the conflict. * mm/memblock.c d1f0ece6cdc "mm/memblock.c: small function definition fixes" ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()" confliected. The former updates function removed by the latter. Resolution is trivial. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Kconfig11
-rw-r--r--arch/x86/xen/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c36
-rw-r--r--arch/x86/xen/grant-table.c2
-rw-r--r--arch/x86/xen/mmu.c205
-rw-r--r--arch/x86/xen/multicalls.c169
-rw-r--r--arch/x86/xen/multicalls.h6
-rw-r--r--arch/x86/xen/p2m.c128
-rw-r--r--arch/x86/xen/platform-pci-unplug.c2
-rw-r--r--arch/x86/xen/setup.c294
-rw-r--r--arch/x86/xen/smp.c15
-rw-r--r--arch/x86/xen/time.c21
-rw-r--r--arch/x86/xen/trace.c62
-rw-r--r--arch/x86/xen/vga.c67
-rw-r--r--arch/x86/xen/xen-asm_32.S8
-rw-r--r--arch/x86/xen/xen-ops.h11
16 files changed, 634 insertions, 407 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5cc821cb2e09..26c731a106af 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST
25 25
26config XEN_PVHVM 26config XEN_PVHVM
27 def_bool y 27 def_bool y
28 depends on XEN 28 depends on XEN && PCI && X86_LOCAL_APIC
29 depends on X86_LOCAL_APIC
30 29
31config XEN_MAX_DOMAIN_MEMORY 30config XEN_MAX_DOMAIN_MEMORY
32 int 31 int
@@ -49,11 +48,3 @@ config XEN_DEBUG_FS
49 help 48 help
50 Enable statistics output and various tuning options in debugfs. 49 Enable statistics output and various tuning options in debugfs.
51 Enabling this option may incur a significant performance overhead. 50 Enabling this option may incur a significant performance overhead.
52
53config XEN_DEBUG
54 bool "Enable Xen debug checks"
55 depends on XEN
56 default n
57 help
58 Enable various WARN_ON checks in the Xen MMU code.
59 Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 17c565de3d64..add2c2d729ce 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,8 +15,10 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
15 grant-table.o suspend.o platform-pci-unplug.o \ 15 grant-table.o suspend.o platform-pci-unplug.o \
16 p2m.o 16 p2m.o
17 17
18obj-$(CONFIG_EVENT_TRACING) += trace.o
19
18obj-$(CONFIG_SMP) += smp.o 20obj-$(CONFIG_SMP) += smp.o
19obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
20obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o 22obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
21 23obj-$(CONFIG_XEN_DOM0) += vga.o
22obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o 24obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5525163a0398..1f928659c338 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type);
77 77
78unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; 78unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
79EXPORT_SYMBOL(machine_to_phys_mapping); 79EXPORT_SYMBOL(machine_to_phys_mapping);
80unsigned int machine_to_phys_order; 80unsigned long machine_to_phys_nr;
81EXPORT_SYMBOL(machine_to_phys_order); 81EXPORT_SYMBOL(machine_to_phys_nr);
82 82
83struct start_info *xen_start_info; 83struct start_info *xen_start_info;
84EXPORT_SYMBOL_GPL(xen_start_info); 84EXPORT_SYMBOL_GPL(xen_start_info);
@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void)
251 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ 251 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
252 (1 << X86_FEATURE_ACPI)); /* disable ACPI */ 252 (1 << X86_FEATURE_ACPI)); /* disable ACPI */
253 ax = 1; 253 ax = 1;
254 cx = 0;
254 xen_cpuid(&ax, &bx, &cx, &dx); 255 xen_cpuid(&ax, &bx, &cx, &dx);
255 256
256 xsave_mask = 257 xsave_mask =
@@ -341,6 +342,8 @@ static void xen_set_ldt(const void *addr, unsigned entries)
341 struct mmuext_op *op; 342 struct mmuext_op *op;
342 struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 343 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
343 344
345 trace_xen_cpu_set_ldt(addr, entries);
346
344 op = mcs.args; 347 op = mcs.args;
345 op->cmd = MMUEXT_SET_LDT; 348 op->cmd = MMUEXT_SET_LDT;
346 op->arg1.linear_addr = (unsigned long)addr; 349 op->arg1.linear_addr = (unsigned long)addr;
@@ -496,6 +499,8 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
496 xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); 499 xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
497 u64 entry = *(u64 *)ptr; 500 u64 entry = *(u64 *)ptr;
498 501
502 trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
503
499 preempt_disable(); 504 preempt_disable();
500 505
501 xen_mc_flush(); 506 xen_mc_flush();
@@ -565,6 +570,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
565 unsigned long p = (unsigned long)&dt[entrynum]; 570 unsigned long p = (unsigned long)&dt[entrynum];
566 unsigned long start, end; 571 unsigned long start, end;
567 572
573 trace_xen_cpu_write_idt_entry(dt, entrynum, g);
574
568 preempt_disable(); 575 preempt_disable();
569 576
570 start = __this_cpu_read(idt_desc.address); 577 start = __this_cpu_read(idt_desc.address);
@@ -619,6 +626,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
619 static DEFINE_SPINLOCK(lock); 626 static DEFINE_SPINLOCK(lock);
620 static struct trap_info traps[257]; 627 static struct trap_info traps[257];
621 628
629 trace_xen_cpu_load_idt(desc);
630
622 spin_lock(&lock); 631 spin_lock(&lock);
623 632
624 __get_cpu_var(idt_desc) = *desc; 633 __get_cpu_var(idt_desc) = *desc;
@@ -637,6 +646,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
637static void xen_write_gdt_entry(struct desc_struct *dt, int entry, 646static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
638 const void *desc, int type) 647 const void *desc, int type)
639{ 648{
649 trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
650
640 preempt_disable(); 651 preempt_disable();
641 652
642 switch (type) { 653 switch (type) {
@@ -665,6 +676,8 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
665static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, 676static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
666 const void *desc, int type) 677 const void *desc, int type)
667{ 678{
679 trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
680
668 switch (type) { 681 switch (type) {
669 case DESC_LDT: 682 case DESC_LDT:
670 case DESC_TSS: 683 case DESC_TSS:
@@ -684,7 +697,9 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
684static void xen_load_sp0(struct tss_struct *tss, 697static void xen_load_sp0(struct tss_struct *tss,
685 struct thread_struct *thread) 698 struct thread_struct *thread)
686{ 699{
687 struct multicall_space mcs = xen_mc_entry(0); 700 struct multicall_space mcs;
701
702 mcs = xen_mc_entry(0);
688 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); 703 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
689 xen_mc_issue(PARAVIRT_LAZY_CPU); 704 xen_mc_issue(PARAVIRT_LAZY_CPU);
690} 705}
@@ -937,6 +952,10 @@ static const struct pv_info xen_info __initconst = {
937 .paravirt_enabled = 1, 952 .paravirt_enabled = 1,
938 .shared_kernel_pmd = 0, 953 .shared_kernel_pmd = 0,
939 954
955#ifdef CONFIG_X86_64
956 .extra_user_64bit_cs = FLAT_USER_CS64,
957#endif
958
940 .name = "Xen", 959 .name = "Xen",
941}; 960};
942 961
@@ -1248,6 +1267,14 @@ asmlinkage void __init xen_start_kernel(void)
1248 if (pci_xen) 1267 if (pci_xen)
1249 x86_init.pci.arch_init = pci_xen_init; 1268 x86_init.pci.arch_init = pci_xen_init;
1250 } else { 1269 } else {
1270 const struct dom0_vga_console_info *info =
1271 (void *)((char *)xen_start_info +
1272 xen_start_info->console.dom0.info_off);
1273
1274 xen_init_vga(info, xen_start_info->console.dom0.info_size);
1275 xen_start_info->console.domU.mfn = 0;
1276 xen_start_info->console.domU.evtchn = 0;
1277
1251 /* Make sure ACS will be enabled */ 1278 /* Make sure ACS will be enabled */
1252 pci_request_acs(); 1279 pci_request_acs();
1253 } 1280 }
@@ -1329,7 +1356,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1329 int cpu = (long)hcpu; 1356 int cpu = (long)hcpu;
1330 switch (action) { 1357 switch (action) {
1331 case CPU_UP_PREPARE: 1358 case CPU_UP_PREPARE:
1332 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1359 xen_vcpu_setup(cpu);
1333 if (xen_have_vector_callback) 1360 if (xen_have_vector_callback)
1334 xen_init_lock_cpu(cpu); 1361 xen_init_lock_cpu(cpu);
1335 break; 1362 break;
@@ -1359,7 +1386,6 @@ static void __init xen_hvm_guest_init(void)
1359 xen_hvm_smp_init(); 1386 xen_hvm_smp_init();
1360 register_cpu_notifier(&xen_hvm_cpu_notifier); 1387 register_cpu_notifier(&xen_hvm_cpu_notifier);
1361 xen_unplug_emulated_devices(); 1388 xen_unplug_emulated_devices();
1362 have_vcpu_info_placement = 0;
1363 x86_init.irqs.intr_init = xen_init_IRQ; 1389 x86_init.irqs.intr_init = xen_init_IRQ;
1364 xen_hvm_init_time_ops(); 1390 xen_hvm_init_time_ops();
1365 xen_hvm_init_mmu_ops(); 1391 xen_hvm_init_mmu_ops();
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 49ba9b5224d1..5a40d24ba331 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -71,7 +71,7 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
71 71
72 if (shared == NULL) { 72 if (shared == NULL) {
73 struct vm_struct *area = 73 struct vm_struct *area =
74 xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes); 74 alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
75 BUG_ON(area == NULL); 75 BUG_ON(area == NULL);
76 shared = area->addr; 76 shared = area->addr;
77 *__shared = shared; 77 *__shared = shared;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ad54fa10f8a2..f4bf8aa574f4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -48,6 +48,8 @@
48#include <linux/memblock.h> 48#include <linux/memblock.h>
49#include <linux/seq_file.h> 49#include <linux/seq_file.h>
50 50
51#include <trace/events/xen.h>
52
51#include <asm/pgtable.h> 53#include <asm/pgtable.h>
52#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
53#include <asm/fixmap.h> 55#include <asm/fixmap.h>
@@ -194,6 +196,8 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
194 struct multicall_space mcs; 196 struct multicall_space mcs;
195 struct mmu_update *u; 197 struct mmu_update *u;
196 198
199 trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
200
197 mcs = xen_mc_entry(sizeof(*u)); 201 mcs = xen_mc_entry(sizeof(*u));
198 u = mcs.args; 202 u = mcs.args;
199 203
@@ -225,6 +229,24 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
225 *u = *update; 229 *u = *update;
226} 230}
227 231
232static void xen_extend_mmuext_op(const struct mmuext_op *op)
233{
234 struct multicall_space mcs;
235 struct mmuext_op *u;
236
237 mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
238
239 if (mcs.mc != NULL) {
240 mcs.mc->args[1]++;
241 } else {
242 mcs = __xen_mc_entry(sizeof(*u));
243 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
244 }
245
246 u = mcs.args;
247 *u = *op;
248}
249
228static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) 250static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
229{ 251{
230 struct mmu_update u; 252 struct mmu_update u;
@@ -245,6 +267,8 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
245 267
246static void xen_set_pmd(pmd_t *ptr, pmd_t val) 268static void xen_set_pmd(pmd_t *ptr, pmd_t val)
247{ 269{
270 trace_xen_mmu_set_pmd(ptr, val);
271
248 /* If page is not pinned, we can just update the entry 272 /* If page is not pinned, we can just update the entry
249 directly */ 273 directly */
250 if (!xen_page_pinned(ptr)) { 274 if (!xen_page_pinned(ptr)) {
@@ -282,22 +306,30 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
282 return true; 306 return true;
283} 307}
284 308
285static void xen_set_pte(pte_t *ptep, pte_t pteval) 309static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
286{ 310{
287 if (!xen_batched_set_pte(ptep, pteval)) 311 if (!xen_batched_set_pte(ptep, pteval))
288 native_set_pte(ptep, pteval); 312 native_set_pte(ptep, pteval);
289} 313}
290 314
315static void xen_set_pte(pte_t *ptep, pte_t pteval)
316{
317 trace_xen_mmu_set_pte(ptep, pteval);
318 __xen_set_pte(ptep, pteval);
319}
320
291static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 321static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
292 pte_t *ptep, pte_t pteval) 322 pte_t *ptep, pte_t pteval)
293{ 323{
294 xen_set_pte(ptep, pteval); 324 trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
325 __xen_set_pte(ptep, pteval);
295} 326}
296 327
297pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, 328pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
298 unsigned long addr, pte_t *ptep) 329 unsigned long addr, pte_t *ptep)
299{ 330{
300 /* Just return the pte as-is. We preserve the bits on commit */ 331 /* Just return the pte as-is. We preserve the bits on commit */
332 trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
301 return *ptep; 333 return *ptep;
302} 334}
303 335
@@ -306,6 +338,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
306{ 338{
307 struct mmu_update u; 339 struct mmu_update u;
308 340
341 trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
309 xen_mc_batch(); 342 xen_mc_batch();
310 343
311 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; 344 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
@@ -462,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte)
462} 495}
463PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 496PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
464 497
465#ifdef CONFIG_XEN_DEBUG
466pte_t xen_make_pte_debug(pteval_t pte)
467{
468 phys_addr_t addr = (pte & PTE_PFN_MASK);
469 phys_addr_t other_addr;
470 bool io_page = false;
471 pte_t _pte;
472
473 if (pte & _PAGE_IOMAP)
474 io_page = true;
475
476 _pte = xen_make_pte(pte);
477
478 if (!addr)
479 return _pte;
480
481 if (io_page &&
482 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
483 other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
484 WARN_ONCE(addr != other_addr,
485 "0x%lx is using VM_IO, but it is 0x%lx!\n",
486 (unsigned long)addr, (unsigned long)other_addr);
487 } else {
488 pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
489 other_addr = (_pte.pte & PTE_PFN_MASK);
490 WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
491 "0x%lx is missing VM_IO (and wasn't fixed)!\n",
492 (unsigned long)addr);
493 }
494
495 return _pte;
496}
497PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
498#endif
499
500static pgd_t xen_make_pgd(pgdval_t pgd) 498static pgd_t xen_make_pgd(pgdval_t pgd)
501{ 499{
502 pgd = pte_pfn_to_mfn(pgd); 500 pgd = pte_pfn_to_mfn(pgd);
@@ -530,6 +528,8 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
530 528
531static void xen_set_pud(pud_t *ptr, pud_t val) 529static void xen_set_pud(pud_t *ptr, pud_t val)
532{ 530{
531 trace_xen_mmu_set_pud(ptr, val);
532
533 /* If page is not pinned, we can just update the entry 533 /* If page is not pinned, we can just update the entry
534 directly */ 534 directly */
535 if (!xen_page_pinned(ptr)) { 535 if (!xen_page_pinned(ptr)) {
@@ -543,17 +543,20 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
543#ifdef CONFIG_X86_PAE 543#ifdef CONFIG_X86_PAE
544static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) 544static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
545{ 545{
546 trace_xen_mmu_set_pte_atomic(ptep, pte);
546 set_64bit((u64 *)ptep, native_pte_val(pte)); 547 set_64bit((u64 *)ptep, native_pte_val(pte));
547} 548}
548 549
549static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 550static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
550{ 551{
552 trace_xen_mmu_pte_clear(mm, addr, ptep);
551 if (!xen_batched_set_pte(ptep, native_make_pte(0))) 553 if (!xen_batched_set_pte(ptep, native_make_pte(0)))
552 native_pte_clear(mm, addr, ptep); 554 native_pte_clear(mm, addr, ptep);
553} 555}
554 556
555static void xen_pmd_clear(pmd_t *pmdp) 557static void xen_pmd_clear(pmd_t *pmdp)
556{ 558{
559 trace_xen_mmu_pmd_clear(pmdp);
557 set_pmd(pmdp, __pmd(0)); 560 set_pmd(pmdp, __pmd(0));
558} 561}
559#endif /* CONFIG_X86_PAE */ 562#endif /* CONFIG_X86_PAE */
@@ -629,6 +632,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
629{ 632{
630 pgd_t *user_ptr = xen_get_user_pgd(ptr); 633 pgd_t *user_ptr = xen_get_user_pgd(ptr);
631 634
635 trace_xen_mmu_set_pgd(ptr, user_ptr, val);
636
632 /* If page is not pinned, we can just update the entry 637 /* If page is not pinned, we can just update the entry
633 directly */ 638 directly */
634 if (!xen_page_pinned(ptr)) { 639 if (!xen_page_pinned(ptr)) {
@@ -788,14 +793,12 @@ static void xen_pte_unlock(void *v)
788 793
789static void xen_do_pin(unsigned level, unsigned long pfn) 794static void xen_do_pin(unsigned level, unsigned long pfn)
790{ 795{
791 struct mmuext_op *op; 796 struct mmuext_op op;
792 struct multicall_space mcs;
793 797
794 mcs = __xen_mc_entry(sizeof(*op)); 798 op.cmd = level;
795 op = mcs.args; 799 op.arg1.mfn = pfn_to_mfn(pfn);
796 op->cmd = level; 800
797 op->arg1.mfn = pfn_to_mfn(pfn); 801 xen_extend_mmuext_op(&op);
798 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
799} 802}
800 803
801static int xen_pin_page(struct mm_struct *mm, struct page *page, 804static int xen_pin_page(struct mm_struct *mm, struct page *page,
@@ -863,6 +866,8 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
863 read-only, and can be pinned. */ 866 read-only, and can be pinned. */
864static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) 867static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
865{ 868{
869 trace_xen_mmu_pgd_pin(mm, pgd);
870
866 xen_mc_batch(); 871 xen_mc_batch();
867 872
868 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { 873 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
@@ -988,6 +993,8 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page,
988/* Release a pagetables pages back as normal RW */ 993/* Release a pagetables pages back as normal RW */
989static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) 994static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
990{ 995{
996 trace_xen_mmu_pgd_unpin(mm, pgd);
997
991 xen_mc_batch(); 998 xen_mc_batch();
992 999
993 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 1000 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
@@ -1196,6 +1203,8 @@ static void xen_flush_tlb(void)
1196 struct mmuext_op *op; 1203 struct mmuext_op *op;
1197 struct multicall_space mcs; 1204 struct multicall_space mcs;
1198 1205
1206 trace_xen_mmu_flush_tlb(0);
1207
1199 preempt_disable(); 1208 preempt_disable();
1200 1209
1201 mcs = xen_mc_entry(sizeof(*op)); 1210 mcs = xen_mc_entry(sizeof(*op));
@@ -1214,6 +1223,8 @@ static void xen_flush_tlb_single(unsigned long addr)
1214 struct mmuext_op *op; 1223 struct mmuext_op *op;
1215 struct multicall_space mcs; 1224 struct multicall_space mcs;
1216 1225
1226 trace_xen_mmu_flush_tlb_single(addr);
1227
1217 preempt_disable(); 1228 preempt_disable();
1218 1229
1219 mcs = xen_mc_entry(sizeof(*op)); 1230 mcs = xen_mc_entry(sizeof(*op));
@@ -1240,6 +1251,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1240 } *args; 1251 } *args;
1241 struct multicall_space mcs; 1252 struct multicall_space mcs;
1242 1253
1254 trace_xen_mmu_flush_tlb_others(cpus, mm, va);
1255
1243 if (cpumask_empty(cpus)) 1256 if (cpumask_empty(cpus))
1244 return; /* nothing to do */ 1257 return; /* nothing to do */
1245 1258
@@ -1275,10 +1288,11 @@ static void set_current_cr3(void *v)
1275 1288
1276static void __xen_write_cr3(bool kernel, unsigned long cr3) 1289static void __xen_write_cr3(bool kernel, unsigned long cr3)
1277{ 1290{
1278 struct mmuext_op *op; 1291 struct mmuext_op op;
1279 struct multicall_space mcs;
1280 unsigned long mfn; 1292 unsigned long mfn;
1281 1293
1294 trace_xen_mmu_write_cr3(kernel, cr3);
1295
1282 if (cr3) 1296 if (cr3)
1283 mfn = pfn_to_mfn(PFN_DOWN(cr3)); 1297 mfn = pfn_to_mfn(PFN_DOWN(cr3));
1284 else 1298 else
@@ -1286,13 +1300,10 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
1286 1300
1287 WARN_ON(mfn == 0 && kernel); 1301 WARN_ON(mfn == 0 && kernel);
1288 1302
1289 mcs = __xen_mc_entry(sizeof(*op)); 1303 op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
1290 1304 op.arg1.mfn = mfn;
1291 op = mcs.args;
1292 op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
1293 op->arg1.mfn = mfn;
1294 1305
1295 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 1306 xen_extend_mmuext_op(&op);
1296 1307
1297 if (kernel) { 1308 if (kernel) {
1298 percpu_write(xen_cr3, cr3); 1309 percpu_write(xen_cr3, cr3);
@@ -1451,19 +1462,52 @@ static void __init xen_release_pmd_init(unsigned long pfn)
1451 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1462 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1452} 1463}
1453 1464
1465static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1466{
1467 struct multicall_space mcs;
1468 struct mmuext_op *op;
1469
1470 mcs = __xen_mc_entry(sizeof(*op));
1471 op = mcs.args;
1472 op->cmd = cmd;
1473 op->arg1.mfn = pfn_to_mfn(pfn);
1474
1475 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
1476}
1477
1478static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
1479{
1480 struct multicall_space mcs;
1481 unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
1482
1483 mcs = __xen_mc_entry(0);
1484 MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
1485 pfn_pte(pfn, prot), 0);
1486}
1487
1454/* This needs to make sure the new pte page is pinned iff its being 1488/* This needs to make sure the new pte page is pinned iff its being
1455 attached to a pinned pagetable. */ 1489 attached to a pinned pagetable. */
1456static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) 1490static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
1491 unsigned level)
1457{ 1492{
1458 struct page *page = pfn_to_page(pfn); 1493 bool pinned = PagePinned(virt_to_page(mm->pgd));
1494
1495 trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
1496
1497 if (pinned) {
1498 struct page *page = pfn_to_page(pfn);
1459 1499
1460 if (PagePinned(virt_to_page(mm->pgd))) {
1461 SetPagePinned(page); 1500 SetPagePinned(page);
1462 1501
1463 if (!PageHighMem(page)) { 1502 if (!PageHighMem(page)) {
1464 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); 1503 xen_mc_batch();
1504
1505 __set_pfn_prot(pfn, PAGE_KERNEL_RO);
1506
1465 if (level == PT_PTE && USE_SPLIT_PTLOCKS) 1507 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
1466 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); 1508 __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1509
1510 xen_mc_issue(PARAVIRT_LAZY_MMU);
1467 } else { 1511 } else {
1468 /* make sure there are no stray mappings of 1512 /* make sure there are no stray mappings of
1469 this page */ 1513 this page */
@@ -1483,15 +1527,23 @@ static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
1483} 1527}
1484 1528
1485/* This should never happen until we're OK to use struct page */ 1529/* This should never happen until we're OK to use struct page */
1486static void xen_release_ptpage(unsigned long pfn, unsigned level) 1530static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
1487{ 1531{
1488 struct page *page = pfn_to_page(pfn); 1532 struct page *page = pfn_to_page(pfn);
1533 bool pinned = PagePinned(page);
1534
1535 trace_xen_mmu_release_ptpage(pfn, level, pinned);
1489 1536
1490 if (PagePinned(page)) { 1537 if (pinned) {
1491 if (!PageHighMem(page)) { 1538 if (!PageHighMem(page)) {
1539 xen_mc_batch();
1540
1492 if (level == PT_PTE && USE_SPLIT_PTLOCKS) 1541 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
1493 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); 1542 __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1494 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1543
1544 __set_pfn_prot(pfn, PAGE_KERNEL);
1545
1546 xen_mc_issue(PARAVIRT_LAZY_MMU);
1495 } 1547 }
1496 ClearPagePinned(page); 1548 ClearPagePinned(page);
1497 } 1549 }
@@ -1626,15 +1678,17 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1626void __init xen_setup_machphys_mapping(void) 1678void __init xen_setup_machphys_mapping(void)
1627{ 1679{
1628 struct xen_machphys_mapping mapping; 1680 struct xen_machphys_mapping mapping;
1629 unsigned long machine_to_phys_nr_ents;
1630 1681
1631 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { 1682 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
1632 machine_to_phys_mapping = (unsigned long *)mapping.v_start; 1683 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
1633 machine_to_phys_nr_ents = mapping.max_mfn + 1; 1684 machine_to_phys_nr = mapping.max_mfn + 1;
1634 } else { 1685 } else {
1635 machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; 1686 machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
1636 } 1687 }
1637 machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); 1688#ifdef CONFIG_X86_32
1689 WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
1690 < machine_to_phys_mapping);
1691#endif
1638} 1692}
1639 1693
1640#ifdef CONFIG_X86_64 1694#ifdef CONFIG_X86_64
@@ -1825,6 +1879,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1825# endif 1879# endif
1826#else 1880#else
1827 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: 1881 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1882 case VVAR_PAGE:
1828#endif 1883#endif
1829 case FIX_TEXT_POKE0: 1884 case FIX_TEXT_POKE0:
1830 case FIX_TEXT_POKE1: 1885 case FIX_TEXT_POKE1:
@@ -1865,7 +1920,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1865#ifdef CONFIG_X86_64 1920#ifdef CONFIG_X86_64
1866 /* Replicate changes to map the vsyscall page into the user 1921 /* Replicate changes to map the vsyscall page into the user
1867 pagetable vsyscall mapping. */ 1922 pagetable vsyscall mapping. */
1868 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { 1923 if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
1924 idx == VVAR_PAGE) {
1869 unsigned long vaddr = __fix_to_virt(idx); 1925 unsigned long vaddr = __fix_to_virt(idx);
1870 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); 1926 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
1871 } 1927 }
@@ -1897,9 +1953,6 @@ void __init xen_ident_map_ISA(void)
1897 1953
1898static void __init xen_post_allocator_init(void) 1954static void __init xen_post_allocator_init(void)
1899{ 1955{
1900#ifdef CONFIG_XEN_DEBUG
1901 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
1902#endif
1903 pv_mmu_ops.set_pte = xen_set_pte; 1956 pv_mmu_ops.set_pte = xen_set_pte;
1904 pv_mmu_ops.set_pmd = xen_set_pmd; 1957 pv_mmu_ops.set_pmd = xen_set_pmd;
1905 pv_mmu_ops.set_pud = xen_set_pud; 1958 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2309,17 +2362,3 @@ out:
2309 return err; 2362 return err;
2310} 2363}
2311EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); 2364EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2312
2313#ifdef CONFIG_XEN_DEBUG_FS
2314static int p2m_dump_open(struct inode *inode, struct file *filp)
2315{
2316 return single_open(filp, p2m_dump_show, NULL);
2317}
2318
2319static const struct file_operations p2m_dump_fops = {
2320 .open = p2m_dump_open,
2321 .read = seq_read,
2322 .llseek = seq_lseek,
2323 .release = single_release,
2324};
2325#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 1b2b73ff0a6e..0d82003e76ad 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -30,12 +30,13 @@
30 30
31#define MC_BATCH 32 31#define MC_BATCH 32
32 32
33#define MC_DEBUG 1 33#define MC_DEBUG 0
34 34
35#define MC_ARGS (MC_BATCH * 16) 35#define MC_ARGS (MC_BATCH * 16)
36 36
37 37
38struct mc_buffer { 38struct mc_buffer {
39 unsigned mcidx, argidx, cbidx;
39 struct multicall_entry entries[MC_BATCH]; 40 struct multicall_entry entries[MC_BATCH];
40#if MC_DEBUG 41#if MC_DEBUG
41 struct multicall_entry debug[MC_BATCH]; 42 struct multicall_entry debug[MC_BATCH];
@@ -46,85 +47,15 @@ struct mc_buffer {
46 void (*fn)(void *); 47 void (*fn)(void *);
47 void *data; 48 void *data;
48 } callbacks[MC_BATCH]; 49 } callbacks[MC_BATCH];
49 unsigned mcidx, argidx, cbidx;
50}; 50};
51 51
52static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); 52static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
53DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); 53DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
54 54
55/* flush reasons 0- slots, 1- args, 2- callbacks */
56enum flush_reasons
57{
58 FL_SLOTS,
59 FL_ARGS,
60 FL_CALLBACKS,
61
62 FL_N_REASONS
63};
64
65#ifdef CONFIG_XEN_DEBUG_FS
66#define NHYPERCALLS 40 /* not really */
67
68static struct {
69 unsigned histo[MC_BATCH+1];
70
71 unsigned issued;
72 unsigned arg_total;
73 unsigned hypercalls;
74 unsigned histo_hypercalls[NHYPERCALLS];
75
76 unsigned flush[FL_N_REASONS];
77} mc_stats;
78
79static u8 zero_stats;
80
81static inline void check_zero(void)
82{
83 if (unlikely(zero_stats)) {
84 memset(&mc_stats, 0, sizeof(mc_stats));
85 zero_stats = 0;
86 }
87}
88
89static void mc_add_stats(const struct mc_buffer *mc)
90{
91 int i;
92
93 check_zero();
94
95 mc_stats.issued++;
96 mc_stats.hypercalls += mc->mcidx;
97 mc_stats.arg_total += mc->argidx;
98
99 mc_stats.histo[mc->mcidx]++;
100 for(i = 0; i < mc->mcidx; i++) {
101 unsigned op = mc->entries[i].op;
102 if (op < NHYPERCALLS)
103 mc_stats.histo_hypercalls[op]++;
104 }
105}
106
107static void mc_stats_flush(enum flush_reasons idx)
108{
109 check_zero();
110
111 mc_stats.flush[idx]++;
112}
113
114#else /* !CONFIG_XEN_DEBUG_FS */
115
116static inline void mc_add_stats(const struct mc_buffer *mc)
117{
118}
119
120static inline void mc_stats_flush(enum flush_reasons idx)
121{
122}
123#endif /* CONFIG_XEN_DEBUG_FS */
124
125void xen_mc_flush(void) 55void xen_mc_flush(void)
126{ 56{
127 struct mc_buffer *b = &__get_cpu_var(mc_buffer); 57 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
58 struct multicall_entry *mc;
128 int ret = 0; 59 int ret = 0;
129 unsigned long flags; 60 unsigned long flags;
130 int i; 61 int i;
@@ -135,9 +66,26 @@ void xen_mc_flush(void)
135 something in the middle */ 66 something in the middle */
136 local_irq_save(flags); 67 local_irq_save(flags);
137 68
138 mc_add_stats(b); 69 trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
70
71 switch (b->mcidx) {
72 case 0:
73 /* no-op */
74 BUG_ON(b->argidx != 0);
75 break;
76
77 case 1:
78 /* Singleton multicall - bypass multicall machinery
79 and just do the call directly. */
80 mc = &b->entries[0];
81
82 mc->result = privcmd_call(mc->op,
83 mc->args[0], mc->args[1], mc->args[2],
84 mc->args[3], mc->args[4]);
85 ret = mc->result < 0;
86 break;
139 87
140 if (b->mcidx) { 88 default:
141#if MC_DEBUG 89#if MC_DEBUG
142 memcpy(b->debug, b->entries, 90 memcpy(b->debug, b->entries,
143 b->mcidx * sizeof(struct multicall_entry)); 91 b->mcidx * sizeof(struct multicall_entry));
@@ -164,11 +112,10 @@ void xen_mc_flush(void)
164 } 112 }
165 } 113 }
166#endif 114#endif
115 }
167 116
168 b->mcidx = 0; 117 b->mcidx = 0;
169 b->argidx = 0; 118 b->argidx = 0;
170 } else
171 BUG_ON(b->argidx != 0);
172 119
173 for (i = 0; i < b->cbidx; i++) { 120 for (i = 0; i < b->cbidx; i++) {
174 struct callback *cb = &b->callbacks[i]; 121 struct callback *cb = &b->callbacks[i];
@@ -188,18 +135,21 @@ struct multicall_space __xen_mc_entry(size_t args)
188 struct multicall_space ret; 135 struct multicall_space ret;
189 unsigned argidx = roundup(b->argidx, sizeof(u64)); 136 unsigned argidx = roundup(b->argidx, sizeof(u64));
190 137
138 trace_xen_mc_entry_alloc(args);
139
191 BUG_ON(preemptible()); 140 BUG_ON(preemptible());
192 BUG_ON(b->argidx >= MC_ARGS); 141 BUG_ON(b->argidx >= MC_ARGS);
193 142
194 if (b->mcidx == MC_BATCH || 143 if (unlikely(b->mcidx == MC_BATCH ||
195 (argidx + args) >= MC_ARGS) { 144 (argidx + args) >= MC_ARGS)) {
196 mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS); 145 trace_xen_mc_flush_reason((b->mcidx == MC_BATCH) ?
146 XEN_MC_FL_BATCH : XEN_MC_FL_ARGS);
197 xen_mc_flush(); 147 xen_mc_flush();
198 argidx = roundup(b->argidx, sizeof(u64)); 148 argidx = roundup(b->argidx, sizeof(u64));
199 } 149 }
200 150
201 ret.mc = &b->entries[b->mcidx]; 151 ret.mc = &b->entries[b->mcidx];
202#ifdef MC_DEBUG 152#if MC_DEBUG
203 b->caller[b->mcidx] = __builtin_return_address(0); 153 b->caller[b->mcidx] = __builtin_return_address(0);
204#endif 154#endif
205 b->mcidx++; 155 b->mcidx++;
@@ -218,20 +168,25 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
218 BUG_ON(preemptible()); 168 BUG_ON(preemptible());
219 BUG_ON(b->argidx >= MC_ARGS); 169 BUG_ON(b->argidx >= MC_ARGS);
220 170
221 if (b->mcidx == 0) 171 if (unlikely(b->mcidx == 0 ||
222 return ret; 172 b->entries[b->mcidx - 1].op != op)) {
223 173 trace_xen_mc_extend_args(op, size, XEN_MC_XE_BAD_OP);
224 if (b->entries[b->mcidx - 1].op != op) 174 goto out;
225 return ret; 175 }
226 176
227 if ((b->argidx + size) >= MC_ARGS) 177 if (unlikely((b->argidx + size) >= MC_ARGS)) {
228 return ret; 178 trace_xen_mc_extend_args(op, size, XEN_MC_XE_NO_SPACE);
179 goto out;
180 }
229 181
230 ret.mc = &b->entries[b->mcidx - 1]; 182 ret.mc = &b->entries[b->mcidx - 1];
231 ret.args = &b->args[b->argidx]; 183 ret.args = &b->args[b->argidx];
232 b->argidx += size; 184 b->argidx += size;
233 185
234 BUG_ON(b->argidx >= MC_ARGS); 186 BUG_ON(b->argidx >= MC_ARGS);
187
188 trace_xen_mc_extend_args(op, size, XEN_MC_XE_OK);
189out:
235 return ret; 190 return ret;
236} 191}
237 192
@@ -241,43 +196,13 @@ void xen_mc_callback(void (*fn)(void *), void *data)
241 struct callback *cb; 196 struct callback *cb;
242 197
243 if (b->cbidx == MC_BATCH) { 198 if (b->cbidx == MC_BATCH) {
244 mc_stats_flush(FL_CALLBACKS); 199 trace_xen_mc_flush_reason(XEN_MC_FL_CALLBACK);
245 xen_mc_flush(); 200 xen_mc_flush();
246 } 201 }
247 202
203 trace_xen_mc_callback(fn, data);
204
248 cb = &b->callbacks[b->cbidx++]; 205 cb = &b->callbacks[b->cbidx++];
249 cb->fn = fn; 206 cb->fn = fn;
250 cb->data = data; 207 cb->data = data;
251} 208}
252
253#ifdef CONFIG_XEN_DEBUG_FS
254
255static struct dentry *d_mc_debug;
256
257static int __init xen_mc_debugfs(void)
258{
259 struct dentry *d_xen = xen_init_debugfs();
260
261 if (d_xen == NULL)
262 return -ENOMEM;
263
264 d_mc_debug = debugfs_create_dir("multicalls", d_xen);
265
266 debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats);
267
268 debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued);
269 debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls);
270 debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total);
271
272 xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug,
273 mc_stats.histo, MC_BATCH);
274 xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug,
275 mc_stats.histo_hypercalls, NHYPERCALLS);
276 xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug,
277 mc_stats.flush, FL_N_REASONS);
278
279 return 0;
280}
281fs_initcall(xen_mc_debugfs);
282
283#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 4ec8035e3216..dee79b78a90f 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -1,6 +1,8 @@
1#ifndef _XEN_MULTICALLS_H 1#ifndef _XEN_MULTICALLS_H
2#define _XEN_MULTICALLS_H 2#define _XEN_MULTICALLS_H
3 3
4#include <trace/events/xen.h>
5
4#include "xen-ops.h" 6#include "xen-ops.h"
5 7
6/* Multicalls */ 8/* Multicalls */
@@ -20,8 +22,10 @@ DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags);
20static inline void xen_mc_batch(void) 22static inline void xen_mc_batch(void)
21{ 23{
22 unsigned long flags; 24 unsigned long flags;
25
23 /* need to disable interrupts until this entry is complete */ 26 /* need to disable interrupts until this entry is complete */
24 local_irq_save(flags); 27 local_irq_save(flags);
28 trace_xen_mc_batch(paravirt_get_lazy_mode());
25 __this_cpu_write(xen_mc_irq_flags, flags); 29 __this_cpu_write(xen_mc_irq_flags, flags);
26} 30}
27 31
@@ -37,6 +41,8 @@ void xen_mc_flush(void);
37/* Issue a multicall if we're not in a lazy mode */ 41/* Issue a multicall if we're not in a lazy mode */
38static inline void xen_mc_issue(unsigned mode) 42static inline void xen_mc_issue(unsigned mode)
39{ 43{
44 trace_xen_mc_issue(mode);
45
40 if ((paravirt_get_lazy_mode() & mode) == 0) 46 if ((paravirt_get_lazy_mode() & mode) == 0)
41 xen_mc_flush(); 47 xen_mc_flush();
42 48
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58efeb9d5440..1b267e75158d 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,7 +161,9 @@
161#include <asm/xen/page.h> 161#include <asm/xen/page.h>
162#include <asm/xen/hypercall.h> 162#include <asm/xen/hypercall.h>
163#include <asm/xen/hypervisor.h> 163#include <asm/xen/hypervisor.h>
164#include <xen/grant_table.h>
164 165
166#include "multicalls.h"
165#include "xen-ops.h" 167#include "xen-ops.h"
166 168
167static void __init m2p_override_init(void); 169static void __init m2p_override_init(void);
@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn)
676} 678}
677 679
678/* Add an MFN override for a particular page */ 680/* Add an MFN override for a particular page */
679int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) 681int m2p_add_override(unsigned long mfn, struct page *page,
682 struct gnttab_map_grant_ref *kmap_op)
680{ 683{
681 unsigned long flags; 684 unsigned long flags;
682 unsigned long pfn; 685 unsigned long pfn;
@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
692 "m2p_add_override: pfn %lx not mapped", pfn)) 695 "m2p_add_override: pfn %lx not mapped", pfn))
693 return -EINVAL; 696 return -EINVAL;
694 } 697 }
695 698 WARN_ON(PagePrivate(page));
696 page->private = mfn; 699 SetPagePrivate(page);
700 set_page_private(page, mfn);
697 page->index = pfn_to_mfn(pfn); 701 page->index = pfn_to_mfn(pfn);
698 702
699 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) 703 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
700 return -ENOMEM; 704 return -ENOMEM;
701 705
702 if (clear_pte && !PageHighMem(page)) 706 if (kmap_op != NULL) {
703 /* Just zap old mapping for now */ 707 if (!PageHighMem(page)) {
704 pte_clear(&init_mm, address, ptep); 708 struct multicall_space mcs =
709 xen_mc_entry(sizeof(*kmap_op));
710
711 MULTI_grant_table_op(mcs.mc,
712 GNTTABOP_map_grant_ref, kmap_op, 1);
713
714 xen_mc_issue(PARAVIRT_LAZY_MMU);
715 }
716 /* let's use dev_bus_addr to record the old mfn instead */
717 kmap_op->dev_bus_addr = page->index;
718 page->index = (unsigned long) kmap_op;
719 }
705 spin_lock_irqsave(&m2p_override_lock, flags); 720 spin_lock_irqsave(&m2p_override_lock, flags);
706 list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); 721 list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
707 spin_unlock_irqrestore(&m2p_override_lock, flags); 722 spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte)
735 spin_lock_irqsave(&m2p_override_lock, flags); 750 spin_lock_irqsave(&m2p_override_lock, flags);
736 list_del(&page->lru); 751 list_del(&page->lru);
737 spin_unlock_irqrestore(&m2p_override_lock, flags); 752 spin_unlock_irqrestore(&m2p_override_lock, flags);
738 set_phys_to_machine(pfn, page->index); 753 WARN_ON(!PagePrivate(page));
754 ClearPagePrivate(page);
739 755
740 if (clear_pte && !PageHighMem(page)) 756 if (clear_pte) {
741 set_pte_at(&init_mm, address, ptep, 757 struct gnttab_map_grant_ref *map_op =
742 pfn_pte(pfn, PAGE_KERNEL)); 758 (struct gnttab_map_grant_ref *) page->index;
743 /* No tlb flush necessary because the caller already 759 set_phys_to_machine(pfn, map_op->dev_bus_addr);
744 * left the pte unmapped. */ 760 if (!PageHighMem(page)) {
761 struct multicall_space mcs;
762 struct gnttab_unmap_grant_ref *unmap_op;
763
764 /*
765 * It might be that we queued all the m2p grant table
766 * hypercalls in a multicall, then m2p_remove_override
767 * get called before the multicall has actually been
768 * issued. In this case handle is going to -1 because
769 * it hasn't been modified yet.
770 */
771 if (map_op->handle == -1)
772 xen_mc_flush();
773 /*
774 * Now if map_op->handle is negative it means that the
775 * hypercall actually returned an error.
776 */
777 if (map_op->handle == GNTST_general_error) {
778 printk(KERN_WARNING "m2p_remove_override: "
779 "pfn %lx mfn %lx, failed to modify kernel mappings",
780 pfn, mfn);
781 return -1;
782 }
783
784 mcs = xen_mc_entry(
785 sizeof(struct gnttab_unmap_grant_ref));
786 unmap_op = mcs.args;
787 unmap_op->host_addr = map_op->host_addr;
788 unmap_op->handle = map_op->handle;
789 unmap_op->dev_bus_addr = 0;
790
791 MULTI_grant_table_op(mcs.mc,
792 GNTTABOP_unmap_grant_ref, unmap_op, 1);
793
794 xen_mc_issue(PARAVIRT_LAZY_MMU);
795
796 set_pte_at(&init_mm, address, ptep,
797 pfn_pte(pfn, PAGE_KERNEL));
798 __flush_tlb_single(address);
799 map_op->host_addr = 0;
800 }
801 } else
802 set_phys_to_machine(pfn, page->index);
745 803
746 return 0; 804 return 0;
747} 805}
@@ -758,7 +816,7 @@ struct page *m2p_find_override(unsigned long mfn)
758 spin_lock_irqsave(&m2p_override_lock, flags); 816 spin_lock_irqsave(&m2p_override_lock, flags);
759 817
760 list_for_each_entry(p, bucket, lru) { 818 list_for_each_entry(p, bucket, lru) {
761 if (p->private == mfn) { 819 if (page_private(p) == mfn) {
762 ret = p; 820 ret = p;
763 break; 821 break;
764 } 822 }
@@ -782,17 +840,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
782EXPORT_SYMBOL_GPL(m2p_find_override_pfn); 840EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
783 841
784#ifdef CONFIG_XEN_DEBUG_FS 842#ifdef CONFIG_XEN_DEBUG_FS
785 843#include <linux/debugfs.h>
786int p2m_dump_show(struct seq_file *m, void *v) 844#include "debugfs.h"
845static int p2m_dump_show(struct seq_file *m, void *v)
787{ 846{
788 static const char * const level_name[] = { "top", "middle", 847 static const char * const level_name[] = { "top", "middle",
789 "entry", "abnormal" }; 848 "entry", "abnormal", "error"};
790 static const char * const type_name[] = { "identity", "missing",
791 "pfn", "abnormal"};
792#define TYPE_IDENTITY 0 849#define TYPE_IDENTITY 0
793#define TYPE_MISSING 1 850#define TYPE_MISSING 1
794#define TYPE_PFN 2 851#define TYPE_PFN 2
795#define TYPE_UNKNOWN 3 852#define TYPE_UNKNOWN 3
853 static const char * const type_name[] = {
854 [TYPE_IDENTITY] = "identity",
855 [TYPE_MISSING] = "missing",
856 [TYPE_PFN] = "pfn",
857 [TYPE_UNKNOWN] = "abnormal"};
796 unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; 858 unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
797 unsigned int uninitialized_var(prev_level); 859 unsigned int uninitialized_var(prev_level);
798 unsigned int uninitialized_var(prev_type); 860 unsigned int uninitialized_var(prev_type);
@@ -856,4 +918,32 @@ int p2m_dump_show(struct seq_file *m, void *v)
856#undef TYPE_PFN 918#undef TYPE_PFN
857#undef TYPE_UNKNOWN 919#undef TYPE_UNKNOWN
858} 920}
859#endif 921
922static int p2m_dump_open(struct inode *inode, struct file *filp)
923{
924 return single_open(filp, p2m_dump_show, NULL);
925}
926
927static const struct file_operations p2m_dump_fops = {
928 .open = p2m_dump_open,
929 .read = seq_read,
930 .llseek = seq_lseek,
931 .release = single_release,
932};
933
934static struct dentry *d_mmu_debug;
935
936static int __init xen_p2m_debugfs(void)
937{
938 struct dentry *d_xen = xen_init_debugfs();
939
940 if (d_xen == NULL)
941 return -ENOMEM;
942
943 d_mmu_debug = debugfs_create_dir("mmu", d_xen);
944
945 debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
946 return 0;
947}
948fs_initcall(xen_p2m_debugfs);
949#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 25c52f94a27c..ffcf2615640b 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
35#ifdef CONFIG_XEN_PVHVM 35#ifdef CONFIG_XEN_PVHVM
36static int xen_emul_unplug; 36static int xen_emul_unplug;
37 37
38static int __init check_platform_magic(void) 38static int check_platform_magic(void)
39{ 39{
40 short magic; 40 short magic;
41 char protocol; 41 char protocol;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 73daaf75801a..f5e1362550e7 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/memblock.h> 11#include <linux/memblock.h>
12#include <linux/cpuidle.h>
12 13
13#include <asm/elf.h> 14#include <asm/elf.h>
14#include <asm/vdso.h> 15#include <asm/vdso.h>
@@ -36,7 +37,10 @@ extern void xen_syscall_target(void);
36extern void xen_syscall32_target(void); 37extern void xen_syscall32_target(void);
37 38
38/* Amount of extra memory space we add to the e820 ranges */ 39/* Amount of extra memory space we add to the e820 ranges */
39phys_addr_t xen_extra_mem_start, xen_extra_mem_size; 40struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
41
42/* Number of pages released from the initial allocation. */
43unsigned long xen_released_pages;
40 44
41/* 45/*
42 * The maximum amount of extra memory compared to the base size. The 46 * The maximum amount of extra memory compared to the base size. The
@@ -50,50 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
50 */ 54 */
51#define EXTRA_MEM_RATIO (10) 55#define EXTRA_MEM_RATIO (10)
52 56
53static void __init xen_add_extra_mem(unsigned long pages) 57static void __init xen_add_extra_mem(u64 start, u64 size)
54{ 58{
55 unsigned long pfn; 59 unsigned long pfn;
60 int i;
56 61
57 u64 size = (u64)pages * PAGE_SIZE; 62 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
58 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; 63 /* Add new region. */
59 64 if (xen_extra_mem[i].size == 0) {
60 if (!pages) 65 xen_extra_mem[i].start = start;
61 return; 66 xen_extra_mem[i].size = size;
62 67 break;
63 e820_add_region(extra_start, size, E820_RAM); 68 }
64 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 69 /* Append to existing region. */
65 70 if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
66 memblock_reserve(extra_start, size); 71 xen_extra_mem[i].size += size;
72 break;
73 }
74 }
75 if (i == XEN_EXTRA_MEM_MAX_REGIONS)
76 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
67 77
68 xen_extra_mem_size += size; 78 memblock_reserve(start, size);
69 79
70 xen_max_p2m_pfn = PFN_DOWN(extra_start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
71 81
72 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) 82 for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
73 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 83 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
74} 84}
75 85
76static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 86static unsigned long __init xen_release_chunk(unsigned long start,
77 phys_addr_t end_addr) 87 unsigned long end)
78{ 88{
79 struct xen_memory_reservation reservation = { 89 struct xen_memory_reservation reservation = {
80 .address_bits = 0, 90 .address_bits = 0,
81 .extent_order = 0, 91 .extent_order = 0,
82 .domid = DOMID_SELF 92 .domid = DOMID_SELF
83 }; 93 };
84 unsigned long start, end;
85 unsigned long len = 0; 94 unsigned long len = 0;
86 unsigned long pfn; 95 unsigned long pfn;
87 int ret; 96 int ret;
88 97
89 start = PFN_UP(start_addr);
90 end = PFN_DOWN(end_addr);
91
92 if (end <= start)
93 return 0;
94
95 printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
96 start, end);
97 for(pfn = start; pfn < end; pfn++) { 98 for(pfn = start; pfn < end; pfn++) {
98 unsigned long mfn = pfn_to_mfn(pfn); 99 unsigned long mfn = pfn_to_mfn(pfn);
99 100
@@ -106,100 +107,104 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
106 107
107 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 108 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
108 &reservation); 109 &reservation);
109 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 110 WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
110 start, end, ret);
111 if (ret == 1) { 111 if (ret == 1) {
112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
113 len++; 113 len++;
114 } 114 }
115 } 115 }
116 printk(KERN_CONT "%ld pages freed\n", len); 116 printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n",
117 start, end, len);
117 118
118 return len; 119 return len;
119} 120}
120 121
121static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, 122static unsigned long __init xen_set_identity_and_release(
122 const struct e820map *e820) 123 const struct e820entry *list, size_t map_size, unsigned long nr_pages)
123{ 124{
124 phys_addr_t max_addr = PFN_PHYS(max_pfn); 125 phys_addr_t start = 0;
125 phys_addr_t last_end = ISA_END_ADDRESS;
126 unsigned long released = 0; 126 unsigned long released = 0;
127 unsigned long identity = 0;
128 const struct e820entry *entry;
127 int i; 129 int i;
128 130
129 /* Free any unused memory above the low 1Mbyte. */ 131 /*
130 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { 132 * Combine non-RAM regions and gaps until a RAM region (or the
131 phys_addr_t end = e820->map[i].addr; 133 * end of the map) is reached, then set the 1:1 map and
132 end = min(max_addr, end); 134 * release the pages (if available) in those non-RAM regions.
135 *
136 * The combined non-RAM regions are rounded to a whole number
137 * of pages so any partial pages are accessible via the 1:1
138 * mapping. This is needed for some BIOSes that put (for
139 * example) the DMI tables in a reserved region that begins on
140 * a non-page boundary.
141 */
142 for (i = 0, entry = list; i < map_size; i++, entry++) {
143 phys_addr_t end = entry->addr + entry->size;
144
145 if (entry->type == E820_RAM || i == map_size - 1) {
146 unsigned long start_pfn = PFN_DOWN(start);
147 unsigned long end_pfn = PFN_UP(end);
133 148
134 if (last_end < end) 149 if (entry->type == E820_RAM)
135 released += xen_release_chunk(last_end, end); 150 end_pfn = PFN_UP(entry->addr);
136 last_end = max(last_end, e820->map[i].addr + e820->map[i].size); 151
152 if (start_pfn < end_pfn) {
153 if (start_pfn < nr_pages)
154 released += xen_release_chunk(
155 start_pfn, min(end_pfn, nr_pages));
156
157 identity += set_phys_range_identity(
158 start_pfn, end_pfn);
159 }
160 start = end;
161 }
137 } 162 }
138 163
139 if (last_end < max_addr) 164 printk(KERN_INFO "Released %lu pages of unused memory\n", released);
140 released += xen_release_chunk(last_end, max_addr); 165 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
141 166
142 printk(KERN_INFO "released %ld pages of unused memory\n", released);
143 return released; 167 return released;
144} 168}
145 169
146static unsigned long __init xen_set_identity(const struct e820entry *list, 170static unsigned long __init xen_get_max_pages(void)
147 ssize_t map_size)
148{ 171{
149 phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; 172 unsigned long max_pages = MAX_DOMAIN_PAGES;
150 phys_addr_t start_pci = last; 173 domid_t domid = DOMID_SELF;
151 const struct e820entry *entry; 174 int ret;
152 unsigned long identity = 0;
153 int i;
154
155 for (i = 0, entry = list; i < map_size; i++, entry++) {
156 phys_addr_t start = entry->addr;
157 phys_addr_t end = start + entry->size;
158
159 if (start < last)
160 start = last;
161
162 if (end <= start)
163 continue;
164 175
165 /* Skip over the 1MB region. */ 176 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
166 if (last > end) 177 if (ret > 0)
167 continue; 178 max_pages = ret;
179 return min(max_pages, MAX_DOMAIN_PAGES);
180}
168 181
169 if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { 182static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
170 if (start > start_pci) 183{
171 identity += set_phys_range_identity( 184 u64 end = start + size;
172 PFN_UP(start_pci), PFN_DOWN(start));
173 185
174 /* Without saving 'last' we would gooble RAM too 186 /* Align RAM regions to page boundaries. */
175 * at the end of the loop. */ 187 if (type == E820_RAM) {
176 last = end; 188 start = PAGE_ALIGN(start);
177 start_pci = end; 189 end &= ~((u64)PAGE_SIZE - 1);
178 continue;
179 }
180 start_pci = min(start, start_pci);
181 last = end;
182 } 190 }
183 if (last > start_pci) 191
184 identity += set_phys_range_identity( 192 e820_add_region(start, end - start, type);
185 PFN_UP(start_pci), PFN_DOWN(last));
186 return identity;
187} 193}
194
188/** 195/**
189 * machine_specific_memory_setup - Hook for machine specific memory setup. 196 * machine_specific_memory_setup - Hook for machine specific memory setup.
190 **/ 197 **/
191char * __init xen_memory_setup(void) 198char * __init xen_memory_setup(void)
192{ 199{
193 static struct e820entry map[E820MAX] __initdata; 200 static struct e820entry map[E820MAX] __initdata;
194 static struct e820entry map_raw[E820MAX] __initdata;
195 201
196 unsigned long max_pfn = xen_start_info->nr_pages; 202 unsigned long max_pfn = xen_start_info->nr_pages;
197 unsigned long long mem_end; 203 unsigned long long mem_end;
198 int rc; 204 int rc;
199 struct xen_memory_map memmap; 205 struct xen_memory_map memmap;
206 unsigned long max_pages;
200 unsigned long extra_pages = 0; 207 unsigned long extra_pages = 0;
201 unsigned long extra_limit;
202 unsigned long identity_pages = 0;
203 int i; 208 int i;
204 int op; 209 int op;
205 210
@@ -225,58 +230,65 @@ char * __init xen_memory_setup(void)
225 } 230 }
226 BUG_ON(rc); 231 BUG_ON(rc);
227 232
228 memcpy(map_raw, map, sizeof(map)); 233 /* Make sure the Xen-supplied memory map is well-ordered. */
229 e820.nr_map = 0; 234 sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
230 xen_extra_mem_start = mem_end; 235
231 for (i = 0; i < memmap.nr_entries; i++) { 236 max_pages = xen_get_max_pages();
232 unsigned long long end; 237 if (max_pages > max_pfn)
233 238 extra_pages += max_pages - max_pfn;
234 /* Guard against non-page aligned E820 entries. */ 239
235 if (map[i].type == E820_RAM) 240 /*
236 map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; 241 * Set P2M for all non-RAM pages and E820 gaps to be identity
237 242 * type PFNs. Any RAM pages that would be made inaccesible by
238 end = map[i].addr + map[i].size; 243 * this are first released.
239 if (map[i].type == E820_RAM && end > mem_end) { 244 */
240 /* RAM off the end - may be partially included */ 245 xen_released_pages = xen_set_identity_and_release(
241 u64 delta = min(map[i].size, end - mem_end); 246 map, memmap.nr_entries, max_pfn);
242 247 extra_pages += xen_released_pages;
243 map[i].size -= delta; 248
244 end -= delta; 249 /*
245 250 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
246 extra_pages += PFN_DOWN(delta); 251 * factor the base size. On non-highmem systems, the base
247 /* 252 * size is the full initial memory allocation; on highmem it
248 * Set RAM below 4GB that is not for us to be unusable. 253 * is limited to the max size of lowmem, so that it doesn't
249 * This prevents "System RAM" address space from being 254 * get completely filled.
250 * used as potential resource for I/O address (happens 255 *
251 * when 'allocate_resource' is called). 256 * In principle there could be a problem in lowmem systems if
252 */ 257 * the initial memory is also very large with respect to
253 if (delta && 258 * lowmem, but we won't try to deal with that here.
254 (xen_initial_domain() && end < 0x100000000ULL)) 259 */
255 e820_add_region(end, delta, E820_UNUSABLE); 260 extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
261 extra_pages);
262
263 i = 0;
264 while (i < memmap.nr_entries) {
265 u64 addr = map[i].addr;
266 u64 size = map[i].size;
267 u32 type = map[i].type;
268
269 if (type == E820_RAM) {
270 if (addr < mem_end) {
271 size = min(size, mem_end - addr);
272 } else if (extra_pages) {
273 size = min(size, (u64)extra_pages * PAGE_SIZE);
274 extra_pages -= size / PAGE_SIZE;
275 xen_add_extra_mem(addr, size);
276 } else
277 type = E820_UNUSABLE;
256 } 278 }
257 279
258 if (map[i].size > 0 && end > xen_extra_mem_start) 280 xen_align_and_add_e820_region(addr, size, type);
259 xen_extra_mem_start = end;
260 281
261 /* Add region if any remains */ 282 map[i].addr += size;
262 if (map[i].size > 0) 283 map[i].size -= size;
263 e820_add_region(map[i].addr, map[i].size, map[i].type); 284 if (map[i].size == 0)
285 i++;
264 } 286 }
265 /* Align the balloon area so that max_low_pfn does not get set
266 * to be at the _end_ of the PCI gap at the far end (fee01000).
267 * Note that xen_extra_mem_start gets set in the loop above to be
268 * past the last E820 region. */
269 if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
270 xen_extra_mem_start = (1ULL<<32);
271 287
272 /* 288 /*
273 * In domU, the ISA region is normal, usable memory, but we 289 * In domU, the ISA region is normal, usable memory, but we
274 * reserve ISA memory anyway because too many things poke 290 * reserve ISA memory anyway because too many things poke
275 * about in there. 291 * about in there.
276 *
277 * In Dom0, the host E820 information can leave gaps in the
278 * ISA range, which would cause us to release those pages. To
279 * avoid this, we unconditionally reserve them here.
280 */ 292 */
281 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 293 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
282 E820_RESERVED); 294 E820_RESERVED);
@@ -292,36 +304,6 @@ char * __init xen_memory_setup(void)
292 304
293 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 305 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
294 306
295 extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
296
297 /*
298 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
299 * factor the base size. On non-highmem systems, the base
300 * size is the full initial memory allocation; on highmem it
301 * is limited to the max size of lowmem, so that it doesn't
302 * get completely filled.
303 *
304 * In principle there could be a problem in lowmem systems if
305 * the initial memory is also very large with respect to
306 * lowmem, but we won't try to deal with that here.
307 */
308 extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
309 max_pfn + extra_pages);
310
311 if (extra_limit >= max_pfn)
312 extra_pages = extra_limit - max_pfn;
313 else
314 extra_pages = 0;
315
316 xen_add_extra_mem(extra_pages);
317
318 /*
319 * Set P2M for all non-RAM pages and E820 gaps to be identity
320 * type PFNs. We supply it with the non-sanitized version
321 * of the E820.
322 */
323 identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
324 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
325 return "Xen"; 307 return "Xen";
326} 308}
327 309
@@ -425,7 +407,7 @@ void __init xen_arch_setup(void)
425#ifdef CONFIG_X86_32 407#ifdef CONFIG_X86_32
426 boot_cpu_data.hlt_works_ok = 1; 408 boot_cpu_data.hlt_works_ok = 1;
427#endif 409#endif
428 pm_idle = default_idle; 410 disable_cpuidle();
429 boot_option_idle_override = IDLE_HALT; 411 boot_option_idle_override = IDLE_HALT;
430 412
431 fiddle_vdso(); 413 fiddle_vdso();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b4533a86d7e4..041d4fe9dfe4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,6 +32,7 @@
32#include <xen/page.h> 32#include <xen/page.h>
33#include <xen/events.h> 33#include <xen/events.h>
34 34
35#include <xen/hvc-console.h>
35#include "xen-ops.h" 36#include "xen-ops.h"
36#include "mmu.h" 37#include "mmu.h"
37 38
@@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
207 unsigned cpu; 208 unsigned cpu;
208 unsigned int i; 209 unsigned int i;
209 210
211 if (skip_ioapic_setup) {
212 char *m = (max_cpus == 0) ?
213 "The nosmp parameter is incompatible with Xen; " \
214 "use Xen dom0_max_vcpus=1 parameter" :
215 "The noapic parameter is incompatible with Xen";
216
217 xen_raw_printk(m);
218 panic(m);
219 }
210 xen_init_lock_cpu(0); 220 xen_init_lock_cpu(0);
211 221
212 smp_store_cpu_info(0); 222 smp_store_cpu_info(0);
@@ -521,10 +531,7 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
521 native_smp_prepare_cpus(max_cpus); 531 native_smp_prepare_cpus(max_cpus);
522 WARN_ON(xen_smp_intr_init(0)); 532 WARN_ON(xen_smp_intr_init(0));
523 533
524 if (!xen_have_vector_callback)
525 return;
526 xen_init_lock_cpu(0); 534 xen_init_lock_cpu(0);
527 xen_init_spinlocks();
528} 535}
529 536
530static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) 537static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
@@ -546,6 +553,8 @@ static void xen_hvm_cpu_die(unsigned int cpu)
546 553
547void __init xen_hvm_smp_init(void) 554void __init xen_hvm_smp_init(void)
548{ 555{
556 if (!xen_have_vector_callback)
557 return;
549 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; 558 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
550 smp_ops.smp_send_reschedule = xen_smp_send_reschedule; 559 smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
551 smp_ops.cpu_up = xen_hvm_cpu_up; 560 smp_ops.cpu_up = xen_hvm_cpu_up;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 5158c505bef9..0296a9522501 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void)
168 struct pvclock_vcpu_time_info *src; 168 struct pvclock_vcpu_time_info *src;
169 cycle_t ret; 169 cycle_t ret;
170 170
171 src = &get_cpu_var(xen_vcpu)->time; 171 preempt_disable_notrace();
172 src = &__get_cpu_var(xen_vcpu)->time;
172 ret = pvclock_clocksource_read(src); 173 ret = pvclock_clocksource_read(src);
173 put_cpu_var(xen_vcpu); 174 preempt_enable_notrace();
174 return ret; 175 return ret;
175} 176}
176 177
@@ -200,8 +201,22 @@ static unsigned long xen_get_wallclock(void)
200 201
201static int xen_set_wallclock(unsigned long now) 202static int xen_set_wallclock(unsigned long now)
202{ 203{
204 struct xen_platform_op op;
205 int rc;
206
203 /* do nothing for domU */ 207 /* do nothing for domU */
204 return -1; 208 if (!xen_initial_domain())
209 return -1;
210
211 op.cmd = XENPF_settime;
212 op.u.settime.secs = now;
213 op.u.settime.nsecs = 0;
214 op.u.settime.system_time = xen_clocksource_read();
215
216 rc = HYPERVISOR_dom0_op(&op);
217 WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now);
218
219 return rc;
205} 220}
206 221
207static struct clocksource xen_clocksource __read_mostly = { 222static struct clocksource xen_clocksource __read_mostly = {
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c
new file mode 100644
index 000000000000..520022d1a181
--- /dev/null
+++ b/arch/x86/xen/trace.c
@@ -0,0 +1,62 @@
1#include <linux/ftrace.h>
2#include <xen/interface/xen.h>
3
4#define N(x) [__HYPERVISOR_##x] = "("#x")"
5static const char *xen_hypercall_names[] = {
6 N(set_trap_table),
7 N(mmu_update),
8 N(set_gdt),
9 N(stack_switch),
10 N(set_callbacks),
11 N(fpu_taskswitch),
12 N(sched_op_compat),
13 N(dom0_op),
14 N(set_debugreg),
15 N(get_debugreg),
16 N(update_descriptor),
17 N(memory_op),
18 N(multicall),
19 N(update_va_mapping),
20 N(set_timer_op),
21 N(event_channel_op_compat),
22 N(xen_version),
23 N(console_io),
24 N(physdev_op_compat),
25 N(grant_table_op),
26 N(vm_assist),
27 N(update_va_mapping_otherdomain),
28 N(iret),
29 N(vcpu_op),
30 N(set_segment_base),
31 N(mmuext_op),
32 N(acm_op),
33 N(nmi_op),
34 N(sched_op),
35 N(callback_op),
36 N(xenoprof_op),
37 N(event_channel_op),
38 N(physdev_op),
39 N(hvm_op),
40
41/* Architecture-specific hypercall definitions. */
42 N(arch_0),
43 N(arch_1),
44 N(arch_2),
45 N(arch_3),
46 N(arch_4),
47 N(arch_5),
48 N(arch_6),
49 N(arch_7),
50};
51#undef N
52
53static const char *xen_hypercall_name(unsigned op)
54{
55 if (op < ARRAY_SIZE(xen_hypercall_names) && xen_hypercall_names[op] != NULL)
56 return xen_hypercall_names[op];
57
58 return "";
59}
60
61#define CREATE_TRACE_POINTS
62#include <trace/events/xen.h>
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
new file mode 100644
index 000000000000..1cd7f4d11e29
--- /dev/null
+++ b/arch/x86/xen/vga.c
@@ -0,0 +1,67 @@
1#include <linux/screen_info.h>
2#include <linux/init.h>
3
4#include <asm/bootparam.h>
5#include <asm/setup.h>
6
7#include <xen/interface/xen.h>
8
9#include "xen-ops.h"
10
11void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
12{
13 struct screen_info *screen_info = &boot_params.screen_info;
14
15 /* This is drawn from a dump from vgacon:startup in
16 * standard Linux. */
17 screen_info->orig_video_mode = 3;
18 screen_info->orig_video_isVGA = 1;
19 screen_info->orig_video_lines = 25;
20 screen_info->orig_video_cols = 80;
21 screen_info->orig_video_ega_bx = 3;
22 screen_info->orig_video_points = 16;
23 screen_info->orig_y = screen_info->orig_video_lines - 1;
24
25 switch (info->video_type) {
26 case XEN_VGATYPE_TEXT_MODE_3:
27 if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
28 + sizeof(info->u.text_mode_3))
29 break;
30 screen_info->orig_video_lines = info->u.text_mode_3.rows;
31 screen_info->orig_video_cols = info->u.text_mode_3.columns;
32 screen_info->orig_x = info->u.text_mode_3.cursor_x;
33 screen_info->orig_y = info->u.text_mode_3.cursor_y;
34 screen_info->orig_video_points =
35 info->u.text_mode_3.font_height;
36 break;
37
38 case XEN_VGATYPE_VESA_LFB:
39 if (size < offsetof(struct dom0_vga_console_info,
40 u.vesa_lfb.gbl_caps))
41 break;
42 screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB;
43 screen_info->lfb_width = info->u.vesa_lfb.width;
44 screen_info->lfb_height = info->u.vesa_lfb.height;
45 screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel;
46 screen_info->lfb_base = info->u.vesa_lfb.lfb_base;
47 screen_info->lfb_size = info->u.vesa_lfb.lfb_size;
48 screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line;
49 screen_info->red_size = info->u.vesa_lfb.red_size;
50 screen_info->red_pos = info->u.vesa_lfb.red_pos;
51 screen_info->green_size = info->u.vesa_lfb.green_size;
52 screen_info->green_pos = info->u.vesa_lfb.green_pos;
53 screen_info->blue_size = info->u.vesa_lfb.blue_size;
54 screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
55 screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
56 screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
57 if (size >= offsetof(struct dom0_vga_console_info,
58 u.vesa_lfb.gbl_caps)
59 + sizeof(info->u.vesa_lfb.gbl_caps))
60 screen_info->capabilities = info->u.vesa_lfb.gbl_caps;
61 if (size >= offsetof(struct dom0_vga_console_info,
62 u.vesa_lfb.mode_attrs)
63 + sizeof(info->u.vesa_lfb.mode_attrs))
64 screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
65 break;
66 }
67}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 22a2093b5862..b040b0e518ca 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -113,11 +113,13 @@ xen_iret_start_crit:
113 113
114 /* 114 /*
115 * If there's something pending, mask events again so we can 115 * If there's something pending, mask events again so we can
116 * jump back into xen_hypervisor_callback 116 * jump back into xen_hypervisor_callback. Otherwise do not
117 * touch XEN_vcpu_info_mask.
117 */ 118 */
118 sete XEN_vcpu_info_mask(%eax) 119 jne 1f
120 movb $1, XEN_vcpu_info_mask(%eax)
119 121
120 popl %eax 1221: popl %eax
121 123
122 /* 124 /*
123 * From this point on the registers are restored and the stack 125 * From this point on the registers are restored and the stack
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 97dfdc8757b3..b095739ccd4c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -88,6 +88,17 @@ static inline void xen_uninit_lock_cpu(int cpu)
88} 88}
89#endif 89#endif
90 90
91struct dom0_vga_console_info;
92
93#ifdef CONFIG_XEN_DOM0
94void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
95#else
96static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
97 size_t size)
98{
99}
100#endif
101
91/* Declare an asm function, along with symbols needed to make it 102/* Declare an asm function, along with symbols needed to make it
92 inlineable */ 103 inlineable */
93#define DECL_ASM(ret, name, ...) \ 104#define DECL_ASM(ret, name, ...) \