diff options
author | Tejun Heo <tj@kernel.org> | 2011-11-28 12:46:22 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-11-28 12:46:22 -0500 |
commit | d4bbf7e7759afc172e2bfbc5c416324590049cdd (patch) | |
tree | 7eab5ee5481cd3dcf1162329fec827177640018a /arch/x86/xen | |
parent | a150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff) | |
parent | 401d0069cb344f401bc9d264c31db55876ff78c0 (diff) |
Merge branch 'master' into x86/memblock
Conflicts & resolutions:
* arch/x86/xen/setup.c
dc91c728fd "xen: allow extra memory to be in multiple regions"
24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..."
conflicted on xen_add_extra_mem() updates. The resolution is
trivial as the latter just want to replace
memblock_x86_reserve_range() with memblock_reserve().
* drivers/pci/intel-iommu.c
166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/"
5dfe8660a3d "bootmem: Replace work_with_active_regions() with..."
conflicted as the former moved the file under drivers/iommu/.
Resolved by applying the chnages from the latter on the moved
file.
* mm/Kconfig
6661672053a "memblock: add NO_BOOTMEM config symbol"
c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option"
conflicted trivially. Both added config options. Just
letting both add their own options resolves the conflict.
* mm/memblock.c
d1f0ece6cdc "mm/memblock.c: small function definition fixes"
ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()"
confliected. The former updates function removed by the
latter. Resolution is trivial.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 11 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 36 | ||||
-rw-r--r-- | arch/x86/xen/grant-table.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 205 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.c | 169 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.h | 6 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 128 | ||||
-rw-r--r-- | arch/x86/xen/platform-pci-unplug.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 294 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 15 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 21 | ||||
-rw-r--r-- | arch/x86/xen/trace.c | 62 | ||||
-rw-r--r-- | arch/x86/xen/vga.c | 67 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_32.S | 8 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 11 |
16 files changed, 634 insertions, 407 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 5cc821cb2e09..26c731a106af 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST | |||
25 | 25 | ||
26 | config XEN_PVHVM | 26 | config XEN_PVHVM |
27 | def_bool y | 27 | def_bool y |
28 | depends on XEN | 28 | depends on XEN && PCI && X86_LOCAL_APIC |
29 | depends on X86_LOCAL_APIC | ||
30 | 29 | ||
31 | config XEN_MAX_DOMAIN_MEMORY | 30 | config XEN_MAX_DOMAIN_MEMORY |
32 | int | 31 | int |
@@ -49,11 +48,3 @@ config XEN_DEBUG_FS | |||
49 | help | 48 | help |
50 | Enable statistics output and various tuning options in debugfs. | 49 | Enable statistics output and various tuning options in debugfs. |
51 | Enabling this option may incur a significant performance overhead. | 50 | Enabling this option may incur a significant performance overhead. |
52 | |||
53 | config XEN_DEBUG | ||
54 | bool "Enable Xen debug checks" | ||
55 | depends on XEN | ||
56 | default n | ||
57 | help | ||
58 | Enable various WARN_ON checks in the Xen MMU code. | ||
59 | Enabling this option WILL incur a significant performance overhead. | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 17c565de3d64..add2c2d729ce 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -15,8 +15,10 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | |||
15 | grant-table.o suspend.o platform-pci-unplug.o \ | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o | 16 | p2m.o |
17 | 17 | ||
18 | obj-$(CONFIG_EVENT_TRACING) += trace.o | ||
19 | |||
18 | obj-$(CONFIG_SMP) += smp.o | 20 | obj-$(CONFIG_SMP) += smp.o |
19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 21 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
20 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o | 22 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o |
21 | 23 | obj-$(CONFIG_XEN_DOM0) += vga.o | |
22 | obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o | 24 | obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5525163a0398..1f928659c338 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type); | |||
77 | 77 | ||
78 | unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; | 78 | unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; |
79 | EXPORT_SYMBOL(machine_to_phys_mapping); | 79 | EXPORT_SYMBOL(machine_to_phys_mapping); |
80 | unsigned int machine_to_phys_order; | 80 | unsigned long machine_to_phys_nr; |
81 | EXPORT_SYMBOL(machine_to_phys_order); | 81 | EXPORT_SYMBOL(machine_to_phys_nr); |
82 | 82 | ||
83 | struct start_info *xen_start_info; | 83 | struct start_info *xen_start_info; |
84 | EXPORT_SYMBOL_GPL(xen_start_info); | 84 | EXPORT_SYMBOL_GPL(xen_start_info); |
@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void) | |||
251 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ | 251 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ |
252 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ | 252 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ |
253 | ax = 1; | 253 | ax = 1; |
254 | cx = 0; | ||
254 | xen_cpuid(&ax, &bx, &cx, &dx); | 255 | xen_cpuid(&ax, &bx, &cx, &dx); |
255 | 256 | ||
256 | xsave_mask = | 257 | xsave_mask = |
@@ -341,6 +342,8 @@ static void xen_set_ldt(const void *addr, unsigned entries) | |||
341 | struct mmuext_op *op; | 342 | struct mmuext_op *op; |
342 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | 343 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); |
343 | 344 | ||
345 | trace_xen_cpu_set_ldt(addr, entries); | ||
346 | |||
344 | op = mcs.args; | 347 | op = mcs.args; |
345 | op->cmd = MMUEXT_SET_LDT; | 348 | op->cmd = MMUEXT_SET_LDT; |
346 | op->arg1.linear_addr = (unsigned long)addr; | 349 | op->arg1.linear_addr = (unsigned long)addr; |
@@ -496,6 +499,8 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | |||
496 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); | 499 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); |
497 | u64 entry = *(u64 *)ptr; | 500 | u64 entry = *(u64 *)ptr; |
498 | 501 | ||
502 | trace_xen_cpu_write_ldt_entry(dt, entrynum, entry); | ||
503 | |||
499 | preempt_disable(); | 504 | preempt_disable(); |
500 | 505 | ||
501 | xen_mc_flush(); | 506 | xen_mc_flush(); |
@@ -565,6 +570,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) | |||
565 | unsigned long p = (unsigned long)&dt[entrynum]; | 570 | unsigned long p = (unsigned long)&dt[entrynum]; |
566 | unsigned long start, end; | 571 | unsigned long start, end; |
567 | 572 | ||
573 | trace_xen_cpu_write_idt_entry(dt, entrynum, g); | ||
574 | |||
568 | preempt_disable(); | 575 | preempt_disable(); |
569 | 576 | ||
570 | start = __this_cpu_read(idt_desc.address); | 577 | start = __this_cpu_read(idt_desc.address); |
@@ -619,6 +626,8 @@ static void xen_load_idt(const struct desc_ptr *desc) | |||
619 | static DEFINE_SPINLOCK(lock); | 626 | static DEFINE_SPINLOCK(lock); |
620 | static struct trap_info traps[257]; | 627 | static struct trap_info traps[257]; |
621 | 628 | ||
629 | trace_xen_cpu_load_idt(desc); | ||
630 | |||
622 | spin_lock(&lock); | 631 | spin_lock(&lock); |
623 | 632 | ||
624 | __get_cpu_var(idt_desc) = *desc; | 633 | __get_cpu_var(idt_desc) = *desc; |
@@ -637,6 +646,8 @@ static void xen_load_idt(const struct desc_ptr *desc) | |||
637 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | 646 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, |
638 | const void *desc, int type) | 647 | const void *desc, int type) |
639 | { | 648 | { |
649 | trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); | ||
650 | |||
640 | preempt_disable(); | 651 | preempt_disable(); |
641 | 652 | ||
642 | switch (type) { | 653 | switch (type) { |
@@ -665,6 +676,8 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
665 | static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | 676 | static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, |
666 | const void *desc, int type) | 677 | const void *desc, int type) |
667 | { | 678 | { |
679 | trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); | ||
680 | |||
668 | switch (type) { | 681 | switch (type) { |
669 | case DESC_LDT: | 682 | case DESC_LDT: |
670 | case DESC_TSS: | 683 | case DESC_TSS: |
@@ -684,7 +697,9 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | |||
684 | static void xen_load_sp0(struct tss_struct *tss, | 697 | static void xen_load_sp0(struct tss_struct *tss, |
685 | struct thread_struct *thread) | 698 | struct thread_struct *thread) |
686 | { | 699 | { |
687 | struct multicall_space mcs = xen_mc_entry(0); | 700 | struct multicall_space mcs; |
701 | |||
702 | mcs = xen_mc_entry(0); | ||
688 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 703 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
689 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 704 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
690 | } | 705 | } |
@@ -937,6 +952,10 @@ static const struct pv_info xen_info __initconst = { | |||
937 | .paravirt_enabled = 1, | 952 | .paravirt_enabled = 1, |
938 | .shared_kernel_pmd = 0, | 953 | .shared_kernel_pmd = 0, |
939 | 954 | ||
955 | #ifdef CONFIG_X86_64 | ||
956 | .extra_user_64bit_cs = FLAT_USER_CS64, | ||
957 | #endif | ||
958 | |||
940 | .name = "Xen", | 959 | .name = "Xen", |
941 | }; | 960 | }; |
942 | 961 | ||
@@ -1248,6 +1267,14 @@ asmlinkage void __init xen_start_kernel(void) | |||
1248 | if (pci_xen) | 1267 | if (pci_xen) |
1249 | x86_init.pci.arch_init = pci_xen_init; | 1268 | x86_init.pci.arch_init = pci_xen_init; |
1250 | } else { | 1269 | } else { |
1270 | const struct dom0_vga_console_info *info = | ||
1271 | (void *)((char *)xen_start_info + | ||
1272 | xen_start_info->console.dom0.info_off); | ||
1273 | |||
1274 | xen_init_vga(info, xen_start_info->console.dom0.info_size); | ||
1275 | xen_start_info->console.domU.mfn = 0; | ||
1276 | xen_start_info->console.domU.evtchn = 0; | ||
1277 | |||
1251 | /* Make sure ACS will be enabled */ | 1278 | /* Make sure ACS will be enabled */ |
1252 | pci_request_acs(); | 1279 | pci_request_acs(); |
1253 | } | 1280 | } |
@@ -1329,7 +1356,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | |||
1329 | int cpu = (long)hcpu; | 1356 | int cpu = (long)hcpu; |
1330 | switch (action) { | 1357 | switch (action) { |
1331 | case CPU_UP_PREPARE: | 1358 | case CPU_UP_PREPARE: |
1332 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1359 | xen_vcpu_setup(cpu); |
1333 | if (xen_have_vector_callback) | 1360 | if (xen_have_vector_callback) |
1334 | xen_init_lock_cpu(cpu); | 1361 | xen_init_lock_cpu(cpu); |
1335 | break; | 1362 | break; |
@@ -1359,7 +1386,6 @@ static void __init xen_hvm_guest_init(void) | |||
1359 | xen_hvm_smp_init(); | 1386 | xen_hvm_smp_init(); |
1360 | register_cpu_notifier(&xen_hvm_cpu_notifier); | 1387 | register_cpu_notifier(&xen_hvm_cpu_notifier); |
1361 | xen_unplug_emulated_devices(); | 1388 | xen_unplug_emulated_devices(); |
1362 | have_vcpu_info_placement = 0; | ||
1363 | x86_init.irqs.intr_init = xen_init_IRQ; | 1389 | x86_init.irqs.intr_init = xen_init_IRQ; |
1364 | xen_hvm_init_time_ops(); | 1390 | xen_hvm_init_time_ops(); |
1365 | xen_hvm_init_mmu_ops(); | 1391 | xen_hvm_init_mmu_ops(); |
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 49ba9b5224d1..5a40d24ba331 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -71,7 +71,7 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | |||
71 | 71 | ||
72 | if (shared == NULL) { | 72 | if (shared == NULL) { |
73 | struct vm_struct *area = | 73 | struct vm_struct *area = |
74 | xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes); | 74 | alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); |
75 | BUG_ON(area == NULL); | 75 | BUG_ON(area == NULL); |
76 | shared = area->addr; | 76 | shared = area->addr; |
77 | *__shared = shared; | 77 | *__shared = shared; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ad54fa10f8a2..f4bf8aa574f4 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -48,6 +48,8 @@ | |||
48 | #include <linux/memblock.h> | 48 | #include <linux/memblock.h> |
49 | #include <linux/seq_file.h> | 49 | #include <linux/seq_file.h> |
50 | 50 | ||
51 | #include <trace/events/xen.h> | ||
52 | |||
51 | #include <asm/pgtable.h> | 53 | #include <asm/pgtable.h> |
52 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
53 | #include <asm/fixmap.h> | 55 | #include <asm/fixmap.h> |
@@ -194,6 +196,8 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) | |||
194 | struct multicall_space mcs; | 196 | struct multicall_space mcs; |
195 | struct mmu_update *u; | 197 | struct mmu_update *u; |
196 | 198 | ||
199 | trace_xen_mmu_set_domain_pte(ptep, pteval, domid); | ||
200 | |||
197 | mcs = xen_mc_entry(sizeof(*u)); | 201 | mcs = xen_mc_entry(sizeof(*u)); |
198 | u = mcs.args; | 202 | u = mcs.args; |
199 | 203 | ||
@@ -225,6 +229,24 @@ static void xen_extend_mmu_update(const struct mmu_update *update) | |||
225 | *u = *update; | 229 | *u = *update; |
226 | } | 230 | } |
227 | 231 | ||
232 | static void xen_extend_mmuext_op(const struct mmuext_op *op) | ||
233 | { | ||
234 | struct multicall_space mcs; | ||
235 | struct mmuext_op *u; | ||
236 | |||
237 | mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u)); | ||
238 | |||
239 | if (mcs.mc != NULL) { | ||
240 | mcs.mc->args[1]++; | ||
241 | } else { | ||
242 | mcs = __xen_mc_entry(sizeof(*u)); | ||
243 | MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
244 | } | ||
245 | |||
246 | u = mcs.args; | ||
247 | *u = *op; | ||
248 | } | ||
249 | |||
228 | static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | 250 | static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) |
229 | { | 251 | { |
230 | struct mmu_update u; | 252 | struct mmu_update u; |
@@ -245,6 +267,8 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
245 | 267 | ||
246 | static void xen_set_pmd(pmd_t *ptr, pmd_t val) | 268 | static void xen_set_pmd(pmd_t *ptr, pmd_t val) |
247 | { | 269 | { |
270 | trace_xen_mmu_set_pmd(ptr, val); | ||
271 | |||
248 | /* If page is not pinned, we can just update the entry | 272 | /* If page is not pinned, we can just update the entry |
249 | directly */ | 273 | directly */ |
250 | if (!xen_page_pinned(ptr)) { | 274 | if (!xen_page_pinned(ptr)) { |
@@ -282,22 +306,30 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
282 | return true; | 306 | return true; |
283 | } | 307 | } |
284 | 308 | ||
285 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
286 | { | 310 | { |
287 | if (!xen_batched_set_pte(ptep, pteval)) | 311 | if (!xen_batched_set_pte(ptep, pteval)) |
288 | native_set_pte(ptep, pteval); | 312 | native_set_pte(ptep, pteval); |
289 | } | 313 | } |
290 | 314 | ||
315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | ||
316 | { | ||
317 | trace_xen_mmu_set_pte(ptep, pteval); | ||
318 | __xen_set_pte(ptep, pteval); | ||
319 | } | ||
320 | |||
291 | static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 321 | static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
292 | pte_t *ptep, pte_t pteval) | 322 | pte_t *ptep, pte_t pteval) |
293 | { | 323 | { |
294 | xen_set_pte(ptep, pteval); | 324 | trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval); |
325 | __xen_set_pte(ptep, pteval); | ||
295 | } | 326 | } |
296 | 327 | ||
297 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, | 328 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
298 | unsigned long addr, pte_t *ptep) | 329 | unsigned long addr, pte_t *ptep) |
299 | { | 330 | { |
300 | /* Just return the pte as-is. We preserve the bits on commit */ | 331 | /* Just return the pte as-is. We preserve the bits on commit */ |
332 | trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep); | ||
301 | return *ptep; | 333 | return *ptep; |
302 | } | 334 | } |
303 | 335 | ||
@@ -306,6 +338,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
306 | { | 338 | { |
307 | struct mmu_update u; | 339 | struct mmu_update u; |
308 | 340 | ||
341 | trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte); | ||
309 | xen_mc_batch(); | 342 | xen_mc_batch(); |
310 | 343 | ||
311 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | 344 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
@@ -462,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte) | |||
462 | } | 495 | } |
463 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); | 496 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); |
464 | 497 | ||
465 | #ifdef CONFIG_XEN_DEBUG | ||
466 | pte_t xen_make_pte_debug(pteval_t pte) | ||
467 | { | ||
468 | phys_addr_t addr = (pte & PTE_PFN_MASK); | ||
469 | phys_addr_t other_addr; | ||
470 | bool io_page = false; | ||
471 | pte_t _pte; | ||
472 | |||
473 | if (pte & _PAGE_IOMAP) | ||
474 | io_page = true; | ||
475 | |||
476 | _pte = xen_make_pte(pte); | ||
477 | |||
478 | if (!addr) | ||
479 | return _pte; | ||
480 | |||
481 | if (io_page && | ||
482 | (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { | ||
483 | other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; | ||
484 | WARN_ONCE(addr != other_addr, | ||
485 | "0x%lx is using VM_IO, but it is 0x%lx!\n", | ||
486 | (unsigned long)addr, (unsigned long)other_addr); | ||
487 | } else { | ||
488 | pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; | ||
489 | other_addr = (_pte.pte & PTE_PFN_MASK); | ||
490 | WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set), | ||
491 | "0x%lx is missing VM_IO (and wasn't fixed)!\n", | ||
492 | (unsigned long)addr); | ||
493 | } | ||
494 | |||
495 | return _pte; | ||
496 | } | ||
497 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); | ||
498 | #endif | ||
499 | |||
500 | static pgd_t xen_make_pgd(pgdval_t pgd) | 498 | static pgd_t xen_make_pgd(pgdval_t pgd) |
501 | { | 499 | { |
502 | pgd = pte_pfn_to_mfn(pgd); | 500 | pgd = pte_pfn_to_mfn(pgd); |
@@ -530,6 +528,8 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
530 | 528 | ||
531 | static void xen_set_pud(pud_t *ptr, pud_t val) | 529 | static void xen_set_pud(pud_t *ptr, pud_t val) |
532 | { | 530 | { |
531 | trace_xen_mmu_set_pud(ptr, val); | ||
532 | |||
533 | /* If page is not pinned, we can just update the entry | 533 | /* If page is not pinned, we can just update the entry |
534 | directly */ | 534 | directly */ |
535 | if (!xen_page_pinned(ptr)) { | 535 | if (!xen_page_pinned(ptr)) { |
@@ -543,17 +543,20 @@ static void xen_set_pud(pud_t *ptr, pud_t val) | |||
543 | #ifdef CONFIG_X86_PAE | 543 | #ifdef CONFIG_X86_PAE |
544 | static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | 544 | static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
545 | { | 545 | { |
546 | trace_xen_mmu_set_pte_atomic(ptep, pte); | ||
546 | set_64bit((u64 *)ptep, native_pte_val(pte)); | 547 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
547 | } | 548 | } |
548 | 549 | ||
549 | static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 550 | static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
550 | { | 551 | { |
552 | trace_xen_mmu_pte_clear(mm, addr, ptep); | ||
551 | if (!xen_batched_set_pte(ptep, native_make_pte(0))) | 553 | if (!xen_batched_set_pte(ptep, native_make_pte(0))) |
552 | native_pte_clear(mm, addr, ptep); | 554 | native_pte_clear(mm, addr, ptep); |
553 | } | 555 | } |
554 | 556 | ||
555 | static void xen_pmd_clear(pmd_t *pmdp) | 557 | static void xen_pmd_clear(pmd_t *pmdp) |
556 | { | 558 | { |
559 | trace_xen_mmu_pmd_clear(pmdp); | ||
557 | set_pmd(pmdp, __pmd(0)); | 560 | set_pmd(pmdp, __pmd(0)); |
558 | } | 561 | } |
559 | #endif /* CONFIG_X86_PAE */ | 562 | #endif /* CONFIG_X86_PAE */ |
@@ -629,6 +632,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
629 | { | 632 | { |
630 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | 633 | pgd_t *user_ptr = xen_get_user_pgd(ptr); |
631 | 634 | ||
635 | trace_xen_mmu_set_pgd(ptr, user_ptr, val); | ||
636 | |||
632 | /* If page is not pinned, we can just update the entry | 637 | /* If page is not pinned, we can just update the entry |
633 | directly */ | 638 | directly */ |
634 | if (!xen_page_pinned(ptr)) { | 639 | if (!xen_page_pinned(ptr)) { |
@@ -788,14 +793,12 @@ static void xen_pte_unlock(void *v) | |||
788 | 793 | ||
789 | static void xen_do_pin(unsigned level, unsigned long pfn) | 794 | static void xen_do_pin(unsigned level, unsigned long pfn) |
790 | { | 795 | { |
791 | struct mmuext_op *op; | 796 | struct mmuext_op op; |
792 | struct multicall_space mcs; | ||
793 | 797 | ||
794 | mcs = __xen_mc_entry(sizeof(*op)); | 798 | op.cmd = level; |
795 | op = mcs.args; | 799 | op.arg1.mfn = pfn_to_mfn(pfn); |
796 | op->cmd = level; | 800 | |
797 | op->arg1.mfn = pfn_to_mfn(pfn); | 801 | xen_extend_mmuext_op(&op); |
798 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
799 | } | 802 | } |
800 | 803 | ||
801 | static int xen_pin_page(struct mm_struct *mm, struct page *page, | 804 | static int xen_pin_page(struct mm_struct *mm, struct page *page, |
@@ -863,6 +866,8 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, | |||
863 | read-only, and can be pinned. */ | 866 | read-only, and can be pinned. */ |
864 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) | 867 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) |
865 | { | 868 | { |
869 | trace_xen_mmu_pgd_pin(mm, pgd); | ||
870 | |||
866 | xen_mc_batch(); | 871 | xen_mc_batch(); |
867 | 872 | ||
868 | if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { | 873 | if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { |
@@ -988,6 +993,8 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page, | |||
988 | /* Release a pagetables pages back as normal RW */ | 993 | /* Release a pagetables pages back as normal RW */ |
989 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) | 994 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) |
990 | { | 995 | { |
996 | trace_xen_mmu_pgd_unpin(mm, pgd); | ||
997 | |||
991 | xen_mc_batch(); | 998 | xen_mc_batch(); |
992 | 999 | ||
993 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1000 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
@@ -1196,6 +1203,8 @@ static void xen_flush_tlb(void) | |||
1196 | struct mmuext_op *op; | 1203 | struct mmuext_op *op; |
1197 | struct multicall_space mcs; | 1204 | struct multicall_space mcs; |
1198 | 1205 | ||
1206 | trace_xen_mmu_flush_tlb(0); | ||
1207 | |||
1199 | preempt_disable(); | 1208 | preempt_disable(); |
1200 | 1209 | ||
1201 | mcs = xen_mc_entry(sizeof(*op)); | 1210 | mcs = xen_mc_entry(sizeof(*op)); |
@@ -1214,6 +1223,8 @@ static void xen_flush_tlb_single(unsigned long addr) | |||
1214 | struct mmuext_op *op; | 1223 | struct mmuext_op *op; |
1215 | struct multicall_space mcs; | 1224 | struct multicall_space mcs; |
1216 | 1225 | ||
1226 | trace_xen_mmu_flush_tlb_single(addr); | ||
1227 | |||
1217 | preempt_disable(); | 1228 | preempt_disable(); |
1218 | 1229 | ||
1219 | mcs = xen_mc_entry(sizeof(*op)); | 1230 | mcs = xen_mc_entry(sizeof(*op)); |
@@ -1240,6 +1251,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1240 | } *args; | 1251 | } *args; |
1241 | struct multicall_space mcs; | 1252 | struct multicall_space mcs; |
1242 | 1253 | ||
1254 | trace_xen_mmu_flush_tlb_others(cpus, mm, va); | ||
1255 | |||
1243 | if (cpumask_empty(cpus)) | 1256 | if (cpumask_empty(cpus)) |
1244 | return; /* nothing to do */ | 1257 | return; /* nothing to do */ |
1245 | 1258 | ||
@@ -1275,10 +1288,11 @@ static void set_current_cr3(void *v) | |||
1275 | 1288 | ||
1276 | static void __xen_write_cr3(bool kernel, unsigned long cr3) | 1289 | static void __xen_write_cr3(bool kernel, unsigned long cr3) |
1277 | { | 1290 | { |
1278 | struct mmuext_op *op; | 1291 | struct mmuext_op op; |
1279 | struct multicall_space mcs; | ||
1280 | unsigned long mfn; | 1292 | unsigned long mfn; |
1281 | 1293 | ||
1294 | trace_xen_mmu_write_cr3(kernel, cr3); | ||
1295 | |||
1282 | if (cr3) | 1296 | if (cr3) |
1283 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | 1297 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); |
1284 | else | 1298 | else |
@@ -1286,13 +1300,10 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) | |||
1286 | 1300 | ||
1287 | WARN_ON(mfn == 0 && kernel); | 1301 | WARN_ON(mfn == 0 && kernel); |
1288 | 1302 | ||
1289 | mcs = __xen_mc_entry(sizeof(*op)); | 1303 | op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; |
1290 | 1304 | op.arg1.mfn = mfn; | |
1291 | op = mcs.args; | ||
1292 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; | ||
1293 | op->arg1.mfn = mfn; | ||
1294 | 1305 | ||
1295 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 1306 | xen_extend_mmuext_op(&op); |
1296 | 1307 | ||
1297 | if (kernel) { | 1308 | if (kernel) { |
1298 | percpu_write(xen_cr3, cr3); | 1309 | percpu_write(xen_cr3, cr3); |
@@ -1451,19 +1462,52 @@ static void __init xen_release_pmd_init(unsigned long pfn) | |||
1451 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1462 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1452 | } | 1463 | } |
1453 | 1464 | ||
1465 | static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | ||
1466 | { | ||
1467 | struct multicall_space mcs; | ||
1468 | struct mmuext_op *op; | ||
1469 | |||
1470 | mcs = __xen_mc_entry(sizeof(*op)); | ||
1471 | op = mcs.args; | ||
1472 | op->cmd = cmd; | ||
1473 | op->arg1.mfn = pfn_to_mfn(pfn); | ||
1474 | |||
1475 | MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
1476 | } | ||
1477 | |||
1478 | static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot) | ||
1479 | { | ||
1480 | struct multicall_space mcs; | ||
1481 | unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
1482 | |||
1483 | mcs = __xen_mc_entry(0); | ||
1484 | MULTI_update_va_mapping(mcs.mc, (unsigned long)addr, | ||
1485 | pfn_pte(pfn, prot), 0); | ||
1486 | } | ||
1487 | |||
1454 | /* This needs to make sure the new pte page is pinned iff its being | 1488 | /* This needs to make sure the new pte page is pinned iff its being |
1455 | attached to a pinned pagetable. */ | 1489 | attached to a pinned pagetable. */ |
1456 | static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) | 1490 | static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, |
1491 | unsigned level) | ||
1457 | { | 1492 | { |
1458 | struct page *page = pfn_to_page(pfn); | 1493 | bool pinned = PagePinned(virt_to_page(mm->pgd)); |
1494 | |||
1495 | trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); | ||
1496 | |||
1497 | if (pinned) { | ||
1498 | struct page *page = pfn_to_page(pfn); | ||
1459 | 1499 | ||
1460 | if (PagePinned(virt_to_page(mm->pgd))) { | ||
1461 | SetPagePinned(page); | 1500 | SetPagePinned(page); |
1462 | 1501 | ||
1463 | if (!PageHighMem(page)) { | 1502 | if (!PageHighMem(page)) { |
1464 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); | 1503 | xen_mc_batch(); |
1504 | |||
1505 | __set_pfn_prot(pfn, PAGE_KERNEL_RO); | ||
1506 | |||
1465 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 1507 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
1466 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | 1508 | __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
1509 | |||
1510 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1467 | } else { | 1511 | } else { |
1468 | /* make sure there are no stray mappings of | 1512 | /* make sure there are no stray mappings of |
1469 | this page */ | 1513 | this page */ |
@@ -1483,15 +1527,23 @@ static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) | |||
1483 | } | 1527 | } |
1484 | 1528 | ||
1485 | /* This should never happen until we're OK to use struct page */ | 1529 | /* This should never happen until we're OK to use struct page */ |
1486 | static void xen_release_ptpage(unsigned long pfn, unsigned level) | 1530 | static inline void xen_release_ptpage(unsigned long pfn, unsigned level) |
1487 | { | 1531 | { |
1488 | struct page *page = pfn_to_page(pfn); | 1532 | struct page *page = pfn_to_page(pfn); |
1533 | bool pinned = PagePinned(page); | ||
1534 | |||
1535 | trace_xen_mmu_release_ptpage(pfn, level, pinned); | ||
1489 | 1536 | ||
1490 | if (PagePinned(page)) { | 1537 | if (pinned) { |
1491 | if (!PageHighMem(page)) { | 1538 | if (!PageHighMem(page)) { |
1539 | xen_mc_batch(); | ||
1540 | |||
1492 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 1541 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
1493 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 1542 | __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
1494 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1543 | |
1544 | __set_pfn_prot(pfn, PAGE_KERNEL); | ||
1545 | |||
1546 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1495 | } | 1547 | } |
1496 | ClearPagePinned(page); | 1548 | ClearPagePinned(page); |
1497 | } | 1549 | } |
@@ -1626,15 +1678,17 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1626 | void __init xen_setup_machphys_mapping(void) | 1678 | void __init xen_setup_machphys_mapping(void) |
1627 | { | 1679 | { |
1628 | struct xen_machphys_mapping mapping; | 1680 | struct xen_machphys_mapping mapping; |
1629 | unsigned long machine_to_phys_nr_ents; | ||
1630 | 1681 | ||
1631 | if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { | 1682 | if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { |
1632 | machine_to_phys_mapping = (unsigned long *)mapping.v_start; | 1683 | machine_to_phys_mapping = (unsigned long *)mapping.v_start; |
1633 | machine_to_phys_nr_ents = mapping.max_mfn + 1; | 1684 | machine_to_phys_nr = mapping.max_mfn + 1; |
1634 | } else { | 1685 | } else { |
1635 | machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; | 1686 | machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; |
1636 | } | 1687 | } |
1637 | machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); | 1688 | #ifdef CONFIG_X86_32 |
1689 | WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1)) | ||
1690 | < machine_to_phys_mapping); | ||
1691 | #endif | ||
1638 | } | 1692 | } |
1639 | 1693 | ||
1640 | #ifdef CONFIG_X86_64 | 1694 | #ifdef CONFIG_X86_64 |
@@ -1825,6 +1879,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1825 | # endif | 1879 | # endif |
1826 | #else | 1880 | #else |
1827 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | 1881 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: |
1882 | case VVAR_PAGE: | ||
1828 | #endif | 1883 | #endif |
1829 | case FIX_TEXT_POKE0: | 1884 | case FIX_TEXT_POKE0: |
1830 | case FIX_TEXT_POKE1: | 1885 | case FIX_TEXT_POKE1: |
@@ -1865,7 +1920,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1865 | #ifdef CONFIG_X86_64 | 1920 | #ifdef CONFIG_X86_64 |
1866 | /* Replicate changes to map the vsyscall page into the user | 1921 | /* Replicate changes to map the vsyscall page into the user |
1867 | pagetable vsyscall mapping. */ | 1922 | pagetable vsyscall mapping. */ |
1868 | if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { | 1923 | if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) || |
1924 | idx == VVAR_PAGE) { | ||
1869 | unsigned long vaddr = __fix_to_virt(idx); | 1925 | unsigned long vaddr = __fix_to_virt(idx); |
1870 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); | 1926 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); |
1871 | } | 1927 | } |
@@ -1897,9 +1953,6 @@ void __init xen_ident_map_ISA(void) | |||
1897 | 1953 | ||
1898 | static void __init xen_post_allocator_init(void) | 1954 | static void __init xen_post_allocator_init(void) |
1899 | { | 1955 | { |
1900 | #ifdef CONFIG_XEN_DEBUG | ||
1901 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); | ||
1902 | #endif | ||
1903 | pv_mmu_ops.set_pte = xen_set_pte; | 1956 | pv_mmu_ops.set_pte = xen_set_pte; |
1904 | pv_mmu_ops.set_pmd = xen_set_pmd; | 1957 | pv_mmu_ops.set_pmd = xen_set_pmd; |
1905 | pv_mmu_ops.set_pud = xen_set_pud; | 1958 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -2309,17 +2362,3 @@ out: | |||
2309 | return err; | 2362 | return err; |
2310 | } | 2363 | } |
2311 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); | 2364 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); |
2312 | |||
2313 | #ifdef CONFIG_XEN_DEBUG_FS | ||
2314 | static int p2m_dump_open(struct inode *inode, struct file *filp) | ||
2315 | { | ||
2316 | return single_open(filp, p2m_dump_show, NULL); | ||
2317 | } | ||
2318 | |||
2319 | static const struct file_operations p2m_dump_fops = { | ||
2320 | .open = p2m_dump_open, | ||
2321 | .read = seq_read, | ||
2322 | .llseek = seq_lseek, | ||
2323 | .release = single_release, | ||
2324 | }; | ||
2325 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 1b2b73ff0a6e..0d82003e76ad 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -30,12 +30,13 @@ | |||
30 | 30 | ||
31 | #define MC_BATCH 32 | 31 | #define MC_BATCH 32 |
32 | 32 | ||
33 | #define MC_DEBUG 1 | 33 | #define MC_DEBUG 0 |
34 | 34 | ||
35 | #define MC_ARGS (MC_BATCH * 16) | 35 | #define MC_ARGS (MC_BATCH * 16) |
36 | 36 | ||
37 | 37 | ||
38 | struct mc_buffer { | 38 | struct mc_buffer { |
39 | unsigned mcidx, argidx, cbidx; | ||
39 | struct multicall_entry entries[MC_BATCH]; | 40 | struct multicall_entry entries[MC_BATCH]; |
40 | #if MC_DEBUG | 41 | #if MC_DEBUG |
41 | struct multicall_entry debug[MC_BATCH]; | 42 | struct multicall_entry debug[MC_BATCH]; |
@@ -46,85 +47,15 @@ struct mc_buffer { | |||
46 | void (*fn)(void *); | 47 | void (*fn)(void *); |
47 | void *data; | 48 | void *data; |
48 | } callbacks[MC_BATCH]; | 49 | } callbacks[MC_BATCH]; |
49 | unsigned mcidx, argidx, cbidx; | ||
50 | }; | 50 | }; |
51 | 51 | ||
52 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); | 52 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); |
53 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); | 53 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); |
54 | 54 | ||
55 | /* flush reasons 0- slots, 1- args, 2- callbacks */ | ||
56 | enum flush_reasons | ||
57 | { | ||
58 | FL_SLOTS, | ||
59 | FL_ARGS, | ||
60 | FL_CALLBACKS, | ||
61 | |||
62 | FL_N_REASONS | ||
63 | }; | ||
64 | |||
65 | #ifdef CONFIG_XEN_DEBUG_FS | ||
66 | #define NHYPERCALLS 40 /* not really */ | ||
67 | |||
68 | static struct { | ||
69 | unsigned histo[MC_BATCH+1]; | ||
70 | |||
71 | unsigned issued; | ||
72 | unsigned arg_total; | ||
73 | unsigned hypercalls; | ||
74 | unsigned histo_hypercalls[NHYPERCALLS]; | ||
75 | |||
76 | unsigned flush[FL_N_REASONS]; | ||
77 | } mc_stats; | ||
78 | |||
79 | static u8 zero_stats; | ||
80 | |||
81 | static inline void check_zero(void) | ||
82 | { | ||
83 | if (unlikely(zero_stats)) { | ||
84 | memset(&mc_stats, 0, sizeof(mc_stats)); | ||
85 | zero_stats = 0; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | static void mc_add_stats(const struct mc_buffer *mc) | ||
90 | { | ||
91 | int i; | ||
92 | |||
93 | check_zero(); | ||
94 | |||
95 | mc_stats.issued++; | ||
96 | mc_stats.hypercalls += mc->mcidx; | ||
97 | mc_stats.arg_total += mc->argidx; | ||
98 | |||
99 | mc_stats.histo[mc->mcidx]++; | ||
100 | for(i = 0; i < mc->mcidx; i++) { | ||
101 | unsigned op = mc->entries[i].op; | ||
102 | if (op < NHYPERCALLS) | ||
103 | mc_stats.histo_hypercalls[op]++; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static void mc_stats_flush(enum flush_reasons idx) | ||
108 | { | ||
109 | check_zero(); | ||
110 | |||
111 | mc_stats.flush[idx]++; | ||
112 | } | ||
113 | |||
114 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
115 | |||
116 | static inline void mc_add_stats(const struct mc_buffer *mc) | ||
117 | { | ||
118 | } | ||
119 | |||
120 | static inline void mc_stats_flush(enum flush_reasons idx) | ||
121 | { | ||
122 | } | ||
123 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
124 | |||
125 | void xen_mc_flush(void) | 55 | void xen_mc_flush(void) |
126 | { | 56 | { |
127 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 57 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
58 | struct multicall_entry *mc; | ||
128 | int ret = 0; | 59 | int ret = 0; |
129 | unsigned long flags; | 60 | unsigned long flags; |
130 | int i; | 61 | int i; |
@@ -135,9 +66,26 @@ void xen_mc_flush(void) | |||
135 | something in the middle */ | 66 | something in the middle */ |
136 | local_irq_save(flags); | 67 | local_irq_save(flags); |
137 | 68 | ||
138 | mc_add_stats(b); | 69 | trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); |
70 | |||
71 | switch (b->mcidx) { | ||
72 | case 0: | ||
73 | /* no-op */ | ||
74 | BUG_ON(b->argidx != 0); | ||
75 | break; | ||
76 | |||
77 | case 1: | ||
78 | /* Singleton multicall - bypass multicall machinery | ||
79 | and just do the call directly. */ | ||
80 | mc = &b->entries[0]; | ||
81 | |||
82 | mc->result = privcmd_call(mc->op, | ||
83 | mc->args[0], mc->args[1], mc->args[2], | ||
84 | mc->args[3], mc->args[4]); | ||
85 | ret = mc->result < 0; | ||
86 | break; | ||
139 | 87 | ||
140 | if (b->mcidx) { | 88 | default: |
141 | #if MC_DEBUG | 89 | #if MC_DEBUG |
142 | memcpy(b->debug, b->entries, | 90 | memcpy(b->debug, b->entries, |
143 | b->mcidx * sizeof(struct multicall_entry)); | 91 | b->mcidx * sizeof(struct multicall_entry)); |
@@ -164,11 +112,10 @@ void xen_mc_flush(void) | |||
164 | } | 112 | } |
165 | } | 113 | } |
166 | #endif | 114 | #endif |
115 | } | ||
167 | 116 | ||
168 | b->mcidx = 0; | 117 | b->mcidx = 0; |
169 | b->argidx = 0; | 118 | b->argidx = 0; |
170 | } else | ||
171 | BUG_ON(b->argidx != 0); | ||
172 | 119 | ||
173 | for (i = 0; i < b->cbidx; i++) { | 120 | for (i = 0; i < b->cbidx; i++) { |
174 | struct callback *cb = &b->callbacks[i]; | 121 | struct callback *cb = &b->callbacks[i]; |
@@ -188,18 +135,21 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
188 | struct multicall_space ret; | 135 | struct multicall_space ret; |
189 | unsigned argidx = roundup(b->argidx, sizeof(u64)); | 136 | unsigned argidx = roundup(b->argidx, sizeof(u64)); |
190 | 137 | ||
138 | trace_xen_mc_entry_alloc(args); | ||
139 | |||
191 | BUG_ON(preemptible()); | 140 | BUG_ON(preemptible()); |
192 | BUG_ON(b->argidx >= MC_ARGS); | 141 | BUG_ON(b->argidx >= MC_ARGS); |
193 | 142 | ||
194 | if (b->mcidx == MC_BATCH || | 143 | if (unlikely(b->mcidx == MC_BATCH || |
195 | (argidx + args) >= MC_ARGS) { | 144 | (argidx + args) >= MC_ARGS)) { |
196 | mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS); | 145 | trace_xen_mc_flush_reason((b->mcidx == MC_BATCH) ? |
146 | XEN_MC_FL_BATCH : XEN_MC_FL_ARGS); | ||
197 | xen_mc_flush(); | 147 | xen_mc_flush(); |
198 | argidx = roundup(b->argidx, sizeof(u64)); | 148 | argidx = roundup(b->argidx, sizeof(u64)); |
199 | } | 149 | } |
200 | 150 | ||
201 | ret.mc = &b->entries[b->mcidx]; | 151 | ret.mc = &b->entries[b->mcidx]; |
202 | #ifdef MC_DEBUG | 152 | #if MC_DEBUG |
203 | b->caller[b->mcidx] = __builtin_return_address(0); | 153 | b->caller[b->mcidx] = __builtin_return_address(0); |
204 | #endif | 154 | #endif |
205 | b->mcidx++; | 155 | b->mcidx++; |
@@ -218,20 +168,25 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) | |||
218 | BUG_ON(preemptible()); | 168 | BUG_ON(preemptible()); |
219 | BUG_ON(b->argidx >= MC_ARGS); | 169 | BUG_ON(b->argidx >= MC_ARGS); |
220 | 170 | ||
221 | if (b->mcidx == 0) | 171 | if (unlikely(b->mcidx == 0 || |
222 | return ret; | 172 | b->entries[b->mcidx - 1].op != op)) { |
223 | 173 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_BAD_OP); | |
224 | if (b->entries[b->mcidx - 1].op != op) | 174 | goto out; |
225 | return ret; | 175 | } |
226 | 176 | ||
227 | if ((b->argidx + size) >= MC_ARGS) | 177 | if (unlikely((b->argidx + size) >= MC_ARGS)) { |
228 | return ret; | 178 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_NO_SPACE); |
179 | goto out; | ||
180 | } | ||
229 | 181 | ||
230 | ret.mc = &b->entries[b->mcidx - 1]; | 182 | ret.mc = &b->entries[b->mcidx - 1]; |
231 | ret.args = &b->args[b->argidx]; | 183 | ret.args = &b->args[b->argidx]; |
232 | b->argidx += size; | 184 | b->argidx += size; |
233 | 185 | ||
234 | BUG_ON(b->argidx >= MC_ARGS); | 186 | BUG_ON(b->argidx >= MC_ARGS); |
187 | |||
188 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_OK); | ||
189 | out: | ||
235 | return ret; | 190 | return ret; |
236 | } | 191 | } |
237 | 192 | ||
@@ -241,43 +196,13 @@ void xen_mc_callback(void (*fn)(void *), void *data) | |||
241 | struct callback *cb; | 196 | struct callback *cb; |
242 | 197 | ||
243 | if (b->cbidx == MC_BATCH) { | 198 | if (b->cbidx == MC_BATCH) { |
244 | mc_stats_flush(FL_CALLBACKS); | 199 | trace_xen_mc_flush_reason(XEN_MC_FL_CALLBACK); |
245 | xen_mc_flush(); | 200 | xen_mc_flush(); |
246 | } | 201 | } |
247 | 202 | ||
203 | trace_xen_mc_callback(fn, data); | ||
204 | |||
248 | cb = &b->callbacks[b->cbidx++]; | 205 | cb = &b->callbacks[b->cbidx++]; |
249 | cb->fn = fn; | 206 | cb->fn = fn; |
250 | cb->data = data; | 207 | cb->data = data; |
251 | } | 208 | } |
252 | |||
253 | #ifdef CONFIG_XEN_DEBUG_FS | ||
254 | |||
255 | static struct dentry *d_mc_debug; | ||
256 | |||
257 | static int __init xen_mc_debugfs(void) | ||
258 | { | ||
259 | struct dentry *d_xen = xen_init_debugfs(); | ||
260 | |||
261 | if (d_xen == NULL) | ||
262 | return -ENOMEM; | ||
263 | |||
264 | d_mc_debug = debugfs_create_dir("multicalls", d_xen); | ||
265 | |||
266 | debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats); | ||
267 | |||
268 | debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued); | ||
269 | debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls); | ||
270 | debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total); | ||
271 | |||
272 | xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug, | ||
273 | mc_stats.histo, MC_BATCH); | ||
274 | xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug, | ||
275 | mc_stats.histo_hypercalls, NHYPERCALLS); | ||
276 | xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug, | ||
277 | mc_stats.flush, FL_N_REASONS); | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | fs_initcall(xen_mc_debugfs); | ||
282 | |||
283 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 4ec8035e3216..dee79b78a90f 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _XEN_MULTICALLS_H | 1 | #ifndef _XEN_MULTICALLS_H |
2 | #define _XEN_MULTICALLS_H | 2 | #define _XEN_MULTICALLS_H |
3 | 3 | ||
4 | #include <trace/events/xen.h> | ||
5 | |||
4 | #include "xen-ops.h" | 6 | #include "xen-ops.h" |
5 | 7 | ||
6 | /* Multicalls */ | 8 | /* Multicalls */ |
@@ -20,8 +22,10 @@ DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); | |||
20 | static inline void xen_mc_batch(void) | 22 | static inline void xen_mc_batch(void) |
21 | { | 23 | { |
22 | unsigned long flags; | 24 | unsigned long flags; |
25 | |||
23 | /* need to disable interrupts until this entry is complete */ | 26 | /* need to disable interrupts until this entry is complete */ |
24 | local_irq_save(flags); | 27 | local_irq_save(flags); |
28 | trace_xen_mc_batch(paravirt_get_lazy_mode()); | ||
25 | __this_cpu_write(xen_mc_irq_flags, flags); | 29 | __this_cpu_write(xen_mc_irq_flags, flags); |
26 | } | 30 | } |
27 | 31 | ||
@@ -37,6 +41,8 @@ void xen_mc_flush(void); | |||
37 | /* Issue a multicall if we're not in a lazy mode */ | 41 | /* Issue a multicall if we're not in a lazy mode */ |
38 | static inline void xen_mc_issue(unsigned mode) | 42 | static inline void xen_mc_issue(unsigned mode) |
39 | { | 43 | { |
44 | trace_xen_mc_issue(mode); | ||
45 | |||
40 | if ((paravirt_get_lazy_mode() & mode) == 0) | 46 | if ((paravirt_get_lazy_mode() & mode) == 0) |
41 | xen_mc_flush(); | 47 | xen_mc_flush(); |
42 | 48 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58efeb9d5440..1b267e75158d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -161,7 +161,9 @@ | |||
161 | #include <asm/xen/page.h> | 161 | #include <asm/xen/page.h> |
162 | #include <asm/xen/hypercall.h> | 162 | #include <asm/xen/hypercall.h> |
163 | #include <asm/xen/hypervisor.h> | 163 | #include <asm/xen/hypervisor.h> |
164 | #include <xen/grant_table.h> | ||
164 | 165 | ||
166 | #include "multicalls.h" | ||
165 | #include "xen-ops.h" | 167 | #include "xen-ops.h" |
166 | 168 | ||
167 | static void __init m2p_override_init(void); | 169 | static void __init m2p_override_init(void); |
@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn) | |||
676 | } | 678 | } |
677 | 679 | ||
678 | /* Add an MFN override for a particular page */ | 680 | /* Add an MFN override for a particular page */ |
679 | int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) | 681 | int m2p_add_override(unsigned long mfn, struct page *page, |
682 | struct gnttab_map_grant_ref *kmap_op) | ||
680 | { | 683 | { |
681 | unsigned long flags; | 684 | unsigned long flags; |
682 | unsigned long pfn; | 685 | unsigned long pfn; |
@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) | |||
692 | "m2p_add_override: pfn %lx not mapped", pfn)) | 695 | "m2p_add_override: pfn %lx not mapped", pfn)) |
693 | return -EINVAL; | 696 | return -EINVAL; |
694 | } | 697 | } |
695 | 698 | WARN_ON(PagePrivate(page)); | |
696 | page->private = mfn; | 699 | SetPagePrivate(page); |
700 | set_page_private(page, mfn); | ||
697 | page->index = pfn_to_mfn(pfn); | 701 | page->index = pfn_to_mfn(pfn); |
698 | 702 | ||
699 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) | 703 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) |
700 | return -ENOMEM; | 704 | return -ENOMEM; |
701 | 705 | ||
702 | if (clear_pte && !PageHighMem(page)) | 706 | if (kmap_op != NULL) { |
703 | /* Just zap old mapping for now */ | 707 | if (!PageHighMem(page)) { |
704 | pte_clear(&init_mm, address, ptep); | 708 | struct multicall_space mcs = |
709 | xen_mc_entry(sizeof(*kmap_op)); | ||
710 | |||
711 | MULTI_grant_table_op(mcs.mc, | ||
712 | GNTTABOP_map_grant_ref, kmap_op, 1); | ||
713 | |||
714 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
715 | } | ||
716 | /* let's use dev_bus_addr to record the old mfn instead */ | ||
717 | kmap_op->dev_bus_addr = page->index; | ||
718 | page->index = (unsigned long) kmap_op; | ||
719 | } | ||
705 | spin_lock_irqsave(&m2p_override_lock, flags); | 720 | spin_lock_irqsave(&m2p_override_lock, flags); |
706 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | 721 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); |
707 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 722 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte) | |||
735 | spin_lock_irqsave(&m2p_override_lock, flags); | 750 | spin_lock_irqsave(&m2p_override_lock, flags); |
736 | list_del(&page->lru); | 751 | list_del(&page->lru); |
737 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 752 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
738 | set_phys_to_machine(pfn, page->index); | 753 | WARN_ON(!PagePrivate(page)); |
754 | ClearPagePrivate(page); | ||
739 | 755 | ||
740 | if (clear_pte && !PageHighMem(page)) | 756 | if (clear_pte) { |
741 | set_pte_at(&init_mm, address, ptep, | 757 | struct gnttab_map_grant_ref *map_op = |
742 | pfn_pte(pfn, PAGE_KERNEL)); | 758 | (struct gnttab_map_grant_ref *) page->index; |
743 | /* No tlb flush necessary because the caller already | 759 | set_phys_to_machine(pfn, map_op->dev_bus_addr); |
744 | * left the pte unmapped. */ | 760 | if (!PageHighMem(page)) { |
761 | struct multicall_space mcs; | ||
762 | struct gnttab_unmap_grant_ref *unmap_op; | ||
763 | |||
764 | /* | ||
765 | * It might be that we queued all the m2p grant table | ||
766 | * hypercalls in a multicall, then m2p_remove_override | ||
767 | * get called before the multicall has actually been | ||
768 | * issued. In this case handle is going to -1 because | ||
769 | * it hasn't been modified yet. | ||
770 | */ | ||
771 | if (map_op->handle == -1) | ||
772 | xen_mc_flush(); | ||
773 | /* | ||
774 | * Now if map_op->handle is negative it means that the | ||
775 | * hypercall actually returned an error. | ||
776 | */ | ||
777 | if (map_op->handle == GNTST_general_error) { | ||
778 | printk(KERN_WARNING "m2p_remove_override: " | ||
779 | "pfn %lx mfn %lx, failed to modify kernel mappings", | ||
780 | pfn, mfn); | ||
781 | return -1; | ||
782 | } | ||
783 | |||
784 | mcs = xen_mc_entry( | ||
785 | sizeof(struct gnttab_unmap_grant_ref)); | ||
786 | unmap_op = mcs.args; | ||
787 | unmap_op->host_addr = map_op->host_addr; | ||
788 | unmap_op->handle = map_op->handle; | ||
789 | unmap_op->dev_bus_addr = 0; | ||
790 | |||
791 | MULTI_grant_table_op(mcs.mc, | ||
792 | GNTTABOP_unmap_grant_ref, unmap_op, 1); | ||
793 | |||
794 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
795 | |||
796 | set_pte_at(&init_mm, address, ptep, | ||
797 | pfn_pte(pfn, PAGE_KERNEL)); | ||
798 | __flush_tlb_single(address); | ||
799 | map_op->host_addr = 0; | ||
800 | } | ||
801 | } else | ||
802 | set_phys_to_machine(pfn, page->index); | ||
745 | 803 | ||
746 | return 0; | 804 | return 0; |
747 | } | 805 | } |
@@ -758,7 +816,7 @@ struct page *m2p_find_override(unsigned long mfn) | |||
758 | spin_lock_irqsave(&m2p_override_lock, flags); | 816 | spin_lock_irqsave(&m2p_override_lock, flags); |
759 | 817 | ||
760 | list_for_each_entry(p, bucket, lru) { | 818 | list_for_each_entry(p, bucket, lru) { |
761 | if (p->private == mfn) { | 819 | if (page_private(p) == mfn) { |
762 | ret = p; | 820 | ret = p; |
763 | break; | 821 | break; |
764 | } | 822 | } |
@@ -782,17 +840,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | |||
782 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | 840 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); |
783 | 841 | ||
784 | #ifdef CONFIG_XEN_DEBUG_FS | 842 | #ifdef CONFIG_XEN_DEBUG_FS |
785 | 843 | #include <linux/debugfs.h> | |
786 | int p2m_dump_show(struct seq_file *m, void *v) | 844 | #include "debugfs.h" |
845 | static int p2m_dump_show(struct seq_file *m, void *v) | ||
787 | { | 846 | { |
788 | static const char * const level_name[] = { "top", "middle", | 847 | static const char * const level_name[] = { "top", "middle", |
789 | "entry", "abnormal" }; | 848 | "entry", "abnormal", "error"}; |
790 | static const char * const type_name[] = { "identity", "missing", | ||
791 | "pfn", "abnormal"}; | ||
792 | #define TYPE_IDENTITY 0 | 849 | #define TYPE_IDENTITY 0 |
793 | #define TYPE_MISSING 1 | 850 | #define TYPE_MISSING 1 |
794 | #define TYPE_PFN 2 | 851 | #define TYPE_PFN 2 |
795 | #define TYPE_UNKNOWN 3 | 852 | #define TYPE_UNKNOWN 3 |
853 | static const char * const type_name[] = { | ||
854 | [TYPE_IDENTITY] = "identity", | ||
855 | [TYPE_MISSING] = "missing", | ||
856 | [TYPE_PFN] = "pfn", | ||
857 | [TYPE_UNKNOWN] = "abnormal"}; | ||
796 | unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; | 858 | unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; |
797 | unsigned int uninitialized_var(prev_level); | 859 | unsigned int uninitialized_var(prev_level); |
798 | unsigned int uninitialized_var(prev_type); | 860 | unsigned int uninitialized_var(prev_type); |
@@ -856,4 +918,32 @@ int p2m_dump_show(struct seq_file *m, void *v) | |||
856 | #undef TYPE_PFN | 918 | #undef TYPE_PFN |
857 | #undef TYPE_UNKNOWN | 919 | #undef TYPE_UNKNOWN |
858 | } | 920 | } |
859 | #endif | 921 | |
922 | static int p2m_dump_open(struct inode *inode, struct file *filp) | ||
923 | { | ||
924 | return single_open(filp, p2m_dump_show, NULL); | ||
925 | } | ||
926 | |||
927 | static const struct file_operations p2m_dump_fops = { | ||
928 | .open = p2m_dump_open, | ||
929 | .read = seq_read, | ||
930 | .llseek = seq_lseek, | ||
931 | .release = single_release, | ||
932 | }; | ||
933 | |||
934 | static struct dentry *d_mmu_debug; | ||
935 | |||
936 | static int __init xen_p2m_debugfs(void) | ||
937 | { | ||
938 | struct dentry *d_xen = xen_init_debugfs(); | ||
939 | |||
940 | if (d_xen == NULL) | ||
941 | return -ENOMEM; | ||
942 | |||
943 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | ||
944 | |||
945 | debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); | ||
946 | return 0; | ||
947 | } | ||
948 | fs_initcall(xen_p2m_debugfs); | ||
949 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 25c52f94a27c..ffcf2615640b 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); | |||
35 | #ifdef CONFIG_XEN_PVHVM | 35 | #ifdef CONFIG_XEN_PVHVM |
36 | static int xen_emul_unplug; | 36 | static int xen_emul_unplug; |
37 | 37 | ||
38 | static int __init check_platform_magic(void) | 38 | static int check_platform_magic(void) |
39 | { | 39 | { |
40 | short magic; | 40 | short magic; |
41 | char protocol; | 41 | char protocol; |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 73daaf75801a..f5e1362550e7 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/pm.h> | 10 | #include <linux/pm.h> |
11 | #include <linux/memblock.h> | 11 | #include <linux/memblock.h> |
12 | #include <linux/cpuidle.h> | ||
12 | 13 | ||
13 | #include <asm/elf.h> | 14 | #include <asm/elf.h> |
14 | #include <asm/vdso.h> | 15 | #include <asm/vdso.h> |
@@ -36,7 +37,10 @@ extern void xen_syscall_target(void); | |||
36 | extern void xen_syscall32_target(void); | 37 | extern void xen_syscall32_target(void); |
37 | 38 | ||
38 | /* Amount of extra memory space we add to the e820 ranges */ | 39 | /* Amount of extra memory space we add to the e820 ranges */ |
39 | phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | 40 | struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; |
41 | |||
42 | /* Number of pages released from the initial allocation. */ | ||
43 | unsigned long xen_released_pages; | ||
40 | 44 | ||
41 | /* | 45 | /* |
42 | * The maximum amount of extra memory compared to the base size. The | 46 | * The maximum amount of extra memory compared to the base size. The |
@@ -50,50 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | |||
50 | */ | 54 | */ |
51 | #define EXTRA_MEM_RATIO (10) | 55 | #define EXTRA_MEM_RATIO (10) |
52 | 56 | ||
53 | static void __init xen_add_extra_mem(unsigned long pages) | 57 | static void __init xen_add_extra_mem(u64 start, u64 size) |
54 | { | 58 | { |
55 | unsigned long pfn; | 59 | unsigned long pfn; |
60 | int i; | ||
56 | 61 | ||
57 | u64 size = (u64)pages * PAGE_SIZE; | 62 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
58 | u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; | 63 | /* Add new region. */ |
59 | 64 | if (xen_extra_mem[i].size == 0) { | |
60 | if (!pages) | 65 | xen_extra_mem[i].start = start; |
61 | return; | 66 | xen_extra_mem[i].size = size; |
62 | 67 | break; | |
63 | e820_add_region(extra_start, size, E820_RAM); | 68 | } |
64 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 69 | /* Append to existing region. */ |
65 | 70 | if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { | |
66 | memblock_reserve(extra_start, size); | 71 | xen_extra_mem[i].size += size; |
72 | break; | ||
73 | } | ||
74 | } | ||
75 | if (i == XEN_EXTRA_MEM_MAX_REGIONS) | ||
76 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); | ||
67 | 77 | ||
68 | xen_extra_mem_size += size; | 78 | memblock_reserve(start, size); |
69 | 79 | ||
70 | xen_max_p2m_pfn = PFN_DOWN(extra_start + size); | 80 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
71 | 81 | ||
72 | for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) | 82 | for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++) |
73 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 83 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
74 | } | 84 | } |
75 | 85 | ||
76 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | 86 | static unsigned long __init xen_release_chunk(unsigned long start, |
77 | phys_addr_t end_addr) | 87 | unsigned long end) |
78 | { | 88 | { |
79 | struct xen_memory_reservation reservation = { | 89 | struct xen_memory_reservation reservation = { |
80 | .address_bits = 0, | 90 | .address_bits = 0, |
81 | .extent_order = 0, | 91 | .extent_order = 0, |
82 | .domid = DOMID_SELF | 92 | .domid = DOMID_SELF |
83 | }; | 93 | }; |
84 | unsigned long start, end; | ||
85 | unsigned long len = 0; | 94 | unsigned long len = 0; |
86 | unsigned long pfn; | 95 | unsigned long pfn; |
87 | int ret; | 96 | int ret; |
88 | 97 | ||
89 | start = PFN_UP(start_addr); | ||
90 | end = PFN_DOWN(end_addr); | ||
91 | |||
92 | if (end <= start) | ||
93 | return 0; | ||
94 | |||
95 | printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", | ||
96 | start, end); | ||
97 | for(pfn = start; pfn < end; pfn++) { | 98 | for(pfn = start; pfn < end; pfn++) { |
98 | unsigned long mfn = pfn_to_mfn(pfn); | 99 | unsigned long mfn = pfn_to_mfn(pfn); |
99 | 100 | ||
@@ -106,100 +107,104 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | |||
106 | 107 | ||
107 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | 108 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, |
108 | &reservation); | 109 | &reservation); |
109 | WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", | 110 | WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); |
110 | start, end, ret); | ||
111 | if (ret == 1) { | 111 | if (ret == 1) { |
112 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 112 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
113 | len++; | 113 | len++; |
114 | } | 114 | } |
115 | } | 115 | } |
116 | printk(KERN_CONT "%ld pages freed\n", len); | 116 | printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", |
117 | start, end, len); | ||
117 | 118 | ||
118 | return len; | 119 | return len; |
119 | } | 120 | } |
120 | 121 | ||
121 | static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, | 122 | static unsigned long __init xen_set_identity_and_release( |
122 | const struct e820map *e820) | 123 | const struct e820entry *list, size_t map_size, unsigned long nr_pages) |
123 | { | 124 | { |
124 | phys_addr_t max_addr = PFN_PHYS(max_pfn); | 125 | phys_addr_t start = 0; |
125 | phys_addr_t last_end = ISA_END_ADDRESS; | ||
126 | unsigned long released = 0; | 126 | unsigned long released = 0; |
127 | unsigned long identity = 0; | ||
128 | const struct e820entry *entry; | ||
127 | int i; | 129 | int i; |
128 | 130 | ||
129 | /* Free any unused memory above the low 1Mbyte. */ | 131 | /* |
130 | for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { | 132 | * Combine non-RAM regions and gaps until a RAM region (or the |
131 | phys_addr_t end = e820->map[i].addr; | 133 | * end of the map) is reached, then set the 1:1 map and |
132 | end = min(max_addr, end); | 134 | * release the pages (if available) in those non-RAM regions. |
135 | * | ||
136 | * The combined non-RAM regions are rounded to a whole number | ||
137 | * of pages so any partial pages are accessible via the 1:1 | ||
138 | * mapping. This is needed for some BIOSes that put (for | ||
139 | * example) the DMI tables in a reserved region that begins on | ||
140 | * a non-page boundary. | ||
141 | */ | ||
142 | for (i = 0, entry = list; i < map_size; i++, entry++) { | ||
143 | phys_addr_t end = entry->addr + entry->size; | ||
144 | |||
145 | if (entry->type == E820_RAM || i == map_size - 1) { | ||
146 | unsigned long start_pfn = PFN_DOWN(start); | ||
147 | unsigned long end_pfn = PFN_UP(end); | ||
133 | 148 | ||
134 | if (last_end < end) | 149 | if (entry->type == E820_RAM) |
135 | released += xen_release_chunk(last_end, end); | 150 | end_pfn = PFN_UP(entry->addr); |
136 | last_end = max(last_end, e820->map[i].addr + e820->map[i].size); | 151 | |
152 | if (start_pfn < end_pfn) { | ||
153 | if (start_pfn < nr_pages) | ||
154 | released += xen_release_chunk( | ||
155 | start_pfn, min(end_pfn, nr_pages)); | ||
156 | |||
157 | identity += set_phys_range_identity( | ||
158 | start_pfn, end_pfn); | ||
159 | } | ||
160 | start = end; | ||
161 | } | ||
137 | } | 162 | } |
138 | 163 | ||
139 | if (last_end < max_addr) | 164 | printk(KERN_INFO "Released %lu pages of unused memory\n", released); |
140 | released += xen_release_chunk(last_end, max_addr); | 165 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); |
141 | 166 | ||
142 | printk(KERN_INFO "released %ld pages of unused memory\n", released); | ||
143 | return released; | 167 | return released; |
144 | } | 168 | } |
145 | 169 | ||
146 | static unsigned long __init xen_set_identity(const struct e820entry *list, | 170 | static unsigned long __init xen_get_max_pages(void) |
147 | ssize_t map_size) | ||
148 | { | 171 | { |
149 | phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; | 172 | unsigned long max_pages = MAX_DOMAIN_PAGES; |
150 | phys_addr_t start_pci = last; | 173 | domid_t domid = DOMID_SELF; |
151 | const struct e820entry *entry; | 174 | int ret; |
152 | unsigned long identity = 0; | ||
153 | int i; | ||
154 | |||
155 | for (i = 0, entry = list; i < map_size; i++, entry++) { | ||
156 | phys_addr_t start = entry->addr; | ||
157 | phys_addr_t end = start + entry->size; | ||
158 | |||
159 | if (start < last) | ||
160 | start = last; | ||
161 | |||
162 | if (end <= start) | ||
163 | continue; | ||
164 | 175 | ||
165 | /* Skip over the 1MB region. */ | 176 | ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); |
166 | if (last > end) | 177 | if (ret > 0) |
167 | continue; | 178 | max_pages = ret; |
179 | return min(max_pages, MAX_DOMAIN_PAGES); | ||
180 | } | ||
168 | 181 | ||
169 | if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { | 182 | static void xen_align_and_add_e820_region(u64 start, u64 size, int type) |
170 | if (start > start_pci) | 183 | { |
171 | identity += set_phys_range_identity( | 184 | u64 end = start + size; |
172 | PFN_UP(start_pci), PFN_DOWN(start)); | ||
173 | 185 | ||
174 | /* Without saving 'last' we would gooble RAM too | 186 | /* Align RAM regions to page boundaries. */ |
175 | * at the end of the loop. */ | 187 | if (type == E820_RAM) { |
176 | last = end; | 188 | start = PAGE_ALIGN(start); |
177 | start_pci = end; | 189 | end &= ~((u64)PAGE_SIZE - 1); |
178 | continue; | ||
179 | } | ||
180 | start_pci = min(start, start_pci); | ||
181 | last = end; | ||
182 | } | 190 | } |
183 | if (last > start_pci) | 191 | |
184 | identity += set_phys_range_identity( | 192 | e820_add_region(start, end - start, type); |
185 | PFN_UP(start_pci), PFN_DOWN(last)); | ||
186 | return identity; | ||
187 | } | 193 | } |
194 | |||
188 | /** | 195 | /** |
189 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 196 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
190 | **/ | 197 | **/ |
191 | char * __init xen_memory_setup(void) | 198 | char * __init xen_memory_setup(void) |
192 | { | 199 | { |
193 | static struct e820entry map[E820MAX] __initdata; | 200 | static struct e820entry map[E820MAX] __initdata; |
194 | static struct e820entry map_raw[E820MAX] __initdata; | ||
195 | 201 | ||
196 | unsigned long max_pfn = xen_start_info->nr_pages; | 202 | unsigned long max_pfn = xen_start_info->nr_pages; |
197 | unsigned long long mem_end; | 203 | unsigned long long mem_end; |
198 | int rc; | 204 | int rc; |
199 | struct xen_memory_map memmap; | 205 | struct xen_memory_map memmap; |
206 | unsigned long max_pages; | ||
200 | unsigned long extra_pages = 0; | 207 | unsigned long extra_pages = 0; |
201 | unsigned long extra_limit; | ||
202 | unsigned long identity_pages = 0; | ||
203 | int i; | 208 | int i; |
204 | int op; | 209 | int op; |
205 | 210 | ||
@@ -225,58 +230,65 @@ char * __init xen_memory_setup(void) | |||
225 | } | 230 | } |
226 | BUG_ON(rc); | 231 | BUG_ON(rc); |
227 | 232 | ||
228 | memcpy(map_raw, map, sizeof(map)); | 233 | /* Make sure the Xen-supplied memory map is well-ordered. */ |
229 | e820.nr_map = 0; | 234 | sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); |
230 | xen_extra_mem_start = mem_end; | 235 | |
231 | for (i = 0; i < memmap.nr_entries; i++) { | 236 | max_pages = xen_get_max_pages(); |
232 | unsigned long long end; | 237 | if (max_pages > max_pfn) |
233 | 238 | extra_pages += max_pages - max_pfn; | |
234 | /* Guard against non-page aligned E820 entries. */ | 239 | |
235 | if (map[i].type == E820_RAM) | 240 | /* |
236 | map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; | 241 | * Set P2M for all non-RAM pages and E820 gaps to be identity |
237 | 242 | * type PFNs. Any RAM pages that would be made inaccesible by | |
238 | end = map[i].addr + map[i].size; | 243 | * this are first released. |
239 | if (map[i].type == E820_RAM && end > mem_end) { | 244 | */ |
240 | /* RAM off the end - may be partially included */ | 245 | xen_released_pages = xen_set_identity_and_release( |
241 | u64 delta = min(map[i].size, end - mem_end); | 246 | map, memmap.nr_entries, max_pfn); |
242 | 247 | extra_pages += xen_released_pages; | |
243 | map[i].size -= delta; | 248 | |
244 | end -= delta; | 249 | /* |
245 | 250 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | |
246 | extra_pages += PFN_DOWN(delta); | 251 | * factor the base size. On non-highmem systems, the base |
247 | /* | 252 | * size is the full initial memory allocation; on highmem it |
248 | * Set RAM below 4GB that is not for us to be unusable. | 253 | * is limited to the max size of lowmem, so that it doesn't |
249 | * This prevents "System RAM" address space from being | 254 | * get completely filled. |
250 | * used as potential resource for I/O address (happens | 255 | * |
251 | * when 'allocate_resource' is called). | 256 | * In principle there could be a problem in lowmem systems if |
252 | */ | 257 | * the initial memory is also very large with respect to |
253 | if (delta && | 258 | * lowmem, but we won't try to deal with that here. |
254 | (xen_initial_domain() && end < 0x100000000ULL)) | 259 | */ |
255 | e820_add_region(end, delta, E820_UNUSABLE); | 260 | extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), |
261 | extra_pages); | ||
262 | |||
263 | i = 0; | ||
264 | while (i < memmap.nr_entries) { | ||
265 | u64 addr = map[i].addr; | ||
266 | u64 size = map[i].size; | ||
267 | u32 type = map[i].type; | ||
268 | |||
269 | if (type == E820_RAM) { | ||
270 | if (addr < mem_end) { | ||
271 | size = min(size, mem_end - addr); | ||
272 | } else if (extra_pages) { | ||
273 | size = min(size, (u64)extra_pages * PAGE_SIZE); | ||
274 | extra_pages -= size / PAGE_SIZE; | ||
275 | xen_add_extra_mem(addr, size); | ||
276 | } else | ||
277 | type = E820_UNUSABLE; | ||
256 | } | 278 | } |
257 | 279 | ||
258 | if (map[i].size > 0 && end > xen_extra_mem_start) | 280 | xen_align_and_add_e820_region(addr, size, type); |
259 | xen_extra_mem_start = end; | ||
260 | 281 | ||
261 | /* Add region if any remains */ | 282 | map[i].addr += size; |
262 | if (map[i].size > 0) | 283 | map[i].size -= size; |
263 | e820_add_region(map[i].addr, map[i].size, map[i].type); | 284 | if (map[i].size == 0) |
285 | i++; | ||
264 | } | 286 | } |
265 | /* Align the balloon area so that max_low_pfn does not get set | ||
266 | * to be at the _end_ of the PCI gap at the far end (fee01000). | ||
267 | * Note that xen_extra_mem_start gets set in the loop above to be | ||
268 | * past the last E820 region. */ | ||
269 | if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32))) | ||
270 | xen_extra_mem_start = (1ULL<<32); | ||
271 | 287 | ||
272 | /* | 288 | /* |
273 | * In domU, the ISA region is normal, usable memory, but we | 289 | * In domU, the ISA region is normal, usable memory, but we |
274 | * reserve ISA memory anyway because too many things poke | 290 | * reserve ISA memory anyway because too many things poke |
275 | * about in there. | 291 | * about in there. |
276 | * | ||
277 | * In Dom0, the host E820 information can leave gaps in the | ||
278 | * ISA range, which would cause us to release those pages. To | ||
279 | * avoid this, we unconditionally reserve them here. | ||
280 | */ | 292 | */ |
281 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, | 293 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, |
282 | E820_RESERVED); | 294 | E820_RESERVED); |
@@ -292,36 +304,6 @@ char * __init xen_memory_setup(void) | |||
292 | 304 | ||
293 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 305 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
294 | 306 | ||
295 | extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); | ||
296 | |||
297 | /* | ||
298 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | ||
299 | * factor the base size. On non-highmem systems, the base | ||
300 | * size is the full initial memory allocation; on highmem it | ||
301 | * is limited to the max size of lowmem, so that it doesn't | ||
302 | * get completely filled. | ||
303 | * | ||
304 | * In principle there could be a problem in lowmem systems if | ||
305 | * the initial memory is also very large with respect to | ||
306 | * lowmem, but we won't try to deal with that here. | ||
307 | */ | ||
308 | extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), | ||
309 | max_pfn + extra_pages); | ||
310 | |||
311 | if (extra_limit >= max_pfn) | ||
312 | extra_pages = extra_limit - max_pfn; | ||
313 | else | ||
314 | extra_pages = 0; | ||
315 | |||
316 | xen_add_extra_mem(extra_pages); | ||
317 | |||
318 | /* | ||
319 | * Set P2M for all non-RAM pages and E820 gaps to be identity | ||
320 | * type PFNs. We supply it with the non-sanitized version | ||
321 | * of the E820. | ||
322 | */ | ||
323 | identity_pages = xen_set_identity(map_raw, memmap.nr_entries); | ||
324 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); | ||
325 | return "Xen"; | 307 | return "Xen"; |
326 | } | 308 | } |
327 | 309 | ||
@@ -425,7 +407,7 @@ void __init xen_arch_setup(void) | |||
425 | #ifdef CONFIG_X86_32 | 407 | #ifdef CONFIG_X86_32 |
426 | boot_cpu_data.hlt_works_ok = 1; | 408 | boot_cpu_data.hlt_works_ok = 1; |
427 | #endif | 409 | #endif |
428 | pm_idle = default_idle; | 410 | disable_cpuidle(); |
429 | boot_option_idle_override = IDLE_HALT; | 411 | boot_option_idle_override = IDLE_HALT; |
430 | 412 | ||
431 | fiddle_vdso(); | 413 | fiddle_vdso(); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index b4533a86d7e4..041d4fe9dfe4 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <xen/page.h> | 32 | #include <xen/page.h> |
33 | #include <xen/events.h> | 33 | #include <xen/events.h> |
34 | 34 | ||
35 | #include <xen/hvc-console.h> | ||
35 | #include "xen-ops.h" | 36 | #include "xen-ops.h" |
36 | #include "mmu.h" | 37 | #include "mmu.h" |
37 | 38 | ||
@@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
207 | unsigned cpu; | 208 | unsigned cpu; |
208 | unsigned int i; | 209 | unsigned int i; |
209 | 210 | ||
211 | if (skip_ioapic_setup) { | ||
212 | char *m = (max_cpus == 0) ? | ||
213 | "The nosmp parameter is incompatible with Xen; " \ | ||
214 | "use Xen dom0_max_vcpus=1 parameter" : | ||
215 | "The noapic parameter is incompatible with Xen"; | ||
216 | |||
217 | xen_raw_printk(m); | ||
218 | panic(m); | ||
219 | } | ||
210 | xen_init_lock_cpu(0); | 220 | xen_init_lock_cpu(0); |
211 | 221 | ||
212 | smp_store_cpu_info(0); | 222 | smp_store_cpu_info(0); |
@@ -521,10 +531,7 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | |||
521 | native_smp_prepare_cpus(max_cpus); | 531 | native_smp_prepare_cpus(max_cpus); |
522 | WARN_ON(xen_smp_intr_init(0)); | 532 | WARN_ON(xen_smp_intr_init(0)); |
523 | 533 | ||
524 | if (!xen_have_vector_callback) | ||
525 | return; | ||
526 | xen_init_lock_cpu(0); | 534 | xen_init_lock_cpu(0); |
527 | xen_init_spinlocks(); | ||
528 | } | 535 | } |
529 | 536 | ||
530 | static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) | 537 | static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) |
@@ -546,6 +553,8 @@ static void xen_hvm_cpu_die(unsigned int cpu) | |||
546 | 553 | ||
547 | void __init xen_hvm_smp_init(void) | 554 | void __init xen_hvm_smp_init(void) |
548 | { | 555 | { |
556 | if (!xen_have_vector_callback) | ||
557 | return; | ||
549 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; | 558 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; |
550 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; | 559 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; |
551 | smp_ops.cpu_up = xen_hvm_cpu_up; | 560 | smp_ops.cpu_up = xen_hvm_cpu_up; |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5158c505bef9..0296a9522501 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void) | |||
168 | struct pvclock_vcpu_time_info *src; | 168 | struct pvclock_vcpu_time_info *src; |
169 | cycle_t ret; | 169 | cycle_t ret; |
170 | 170 | ||
171 | src = &get_cpu_var(xen_vcpu)->time; | 171 | preempt_disable_notrace(); |
172 | src = &__get_cpu_var(xen_vcpu)->time; | ||
172 | ret = pvclock_clocksource_read(src); | 173 | ret = pvclock_clocksource_read(src); |
173 | put_cpu_var(xen_vcpu); | 174 | preempt_enable_notrace(); |
174 | return ret; | 175 | return ret; |
175 | } | 176 | } |
176 | 177 | ||
@@ -200,8 +201,22 @@ static unsigned long xen_get_wallclock(void) | |||
200 | 201 | ||
201 | static int xen_set_wallclock(unsigned long now) | 202 | static int xen_set_wallclock(unsigned long now) |
202 | { | 203 | { |
204 | struct xen_platform_op op; | ||
205 | int rc; | ||
206 | |||
203 | /* do nothing for domU */ | 207 | /* do nothing for domU */ |
204 | return -1; | 208 | if (!xen_initial_domain()) |
209 | return -1; | ||
210 | |||
211 | op.cmd = XENPF_settime; | ||
212 | op.u.settime.secs = now; | ||
213 | op.u.settime.nsecs = 0; | ||
214 | op.u.settime.system_time = xen_clocksource_read(); | ||
215 | |||
216 | rc = HYPERVISOR_dom0_op(&op); | ||
217 | WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); | ||
218 | |||
219 | return rc; | ||
205 | } | 220 | } |
206 | 221 | ||
207 | static struct clocksource xen_clocksource __read_mostly = { | 222 | static struct clocksource xen_clocksource __read_mostly = { |
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c new file mode 100644 index 000000000000..520022d1a181 --- /dev/null +++ b/arch/x86/xen/trace.c | |||
@@ -0,0 +1,62 @@ | |||
1 | #include <linux/ftrace.h> | ||
2 | #include <xen/interface/xen.h> | ||
3 | |||
4 | #define N(x) [__HYPERVISOR_##x] = "("#x")" | ||
5 | static const char *xen_hypercall_names[] = { | ||
6 | N(set_trap_table), | ||
7 | N(mmu_update), | ||
8 | N(set_gdt), | ||
9 | N(stack_switch), | ||
10 | N(set_callbacks), | ||
11 | N(fpu_taskswitch), | ||
12 | N(sched_op_compat), | ||
13 | N(dom0_op), | ||
14 | N(set_debugreg), | ||
15 | N(get_debugreg), | ||
16 | N(update_descriptor), | ||
17 | N(memory_op), | ||
18 | N(multicall), | ||
19 | N(update_va_mapping), | ||
20 | N(set_timer_op), | ||
21 | N(event_channel_op_compat), | ||
22 | N(xen_version), | ||
23 | N(console_io), | ||
24 | N(physdev_op_compat), | ||
25 | N(grant_table_op), | ||
26 | N(vm_assist), | ||
27 | N(update_va_mapping_otherdomain), | ||
28 | N(iret), | ||
29 | N(vcpu_op), | ||
30 | N(set_segment_base), | ||
31 | N(mmuext_op), | ||
32 | N(acm_op), | ||
33 | N(nmi_op), | ||
34 | N(sched_op), | ||
35 | N(callback_op), | ||
36 | N(xenoprof_op), | ||
37 | N(event_channel_op), | ||
38 | N(physdev_op), | ||
39 | N(hvm_op), | ||
40 | |||
41 | /* Architecture-specific hypercall definitions. */ | ||
42 | N(arch_0), | ||
43 | N(arch_1), | ||
44 | N(arch_2), | ||
45 | N(arch_3), | ||
46 | N(arch_4), | ||
47 | N(arch_5), | ||
48 | N(arch_6), | ||
49 | N(arch_7), | ||
50 | }; | ||
51 | #undef N | ||
52 | |||
53 | static const char *xen_hypercall_name(unsigned op) | ||
54 | { | ||
55 | if (op < ARRAY_SIZE(xen_hypercall_names) && xen_hypercall_names[op] != NULL) | ||
56 | return xen_hypercall_names[op]; | ||
57 | |||
58 | return ""; | ||
59 | } | ||
60 | |||
61 | #define CREATE_TRACE_POINTS | ||
62 | #include <trace/events/xen.h> | ||
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c new file mode 100644 index 000000000000..1cd7f4d11e29 --- /dev/null +++ b/arch/x86/xen/vga.c | |||
@@ -0,0 +1,67 @@ | |||
1 | #include <linux/screen_info.h> | ||
2 | #include <linux/init.h> | ||
3 | |||
4 | #include <asm/bootparam.h> | ||
5 | #include <asm/setup.h> | ||
6 | |||
7 | #include <xen/interface/xen.h> | ||
8 | |||
9 | #include "xen-ops.h" | ||
10 | |||
11 | void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) | ||
12 | { | ||
13 | struct screen_info *screen_info = &boot_params.screen_info; | ||
14 | |||
15 | /* This is drawn from a dump from vgacon:startup in | ||
16 | * standard Linux. */ | ||
17 | screen_info->orig_video_mode = 3; | ||
18 | screen_info->orig_video_isVGA = 1; | ||
19 | screen_info->orig_video_lines = 25; | ||
20 | screen_info->orig_video_cols = 80; | ||
21 | screen_info->orig_video_ega_bx = 3; | ||
22 | screen_info->orig_video_points = 16; | ||
23 | screen_info->orig_y = screen_info->orig_video_lines - 1; | ||
24 | |||
25 | switch (info->video_type) { | ||
26 | case XEN_VGATYPE_TEXT_MODE_3: | ||
27 | if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3) | ||
28 | + sizeof(info->u.text_mode_3)) | ||
29 | break; | ||
30 | screen_info->orig_video_lines = info->u.text_mode_3.rows; | ||
31 | screen_info->orig_video_cols = info->u.text_mode_3.columns; | ||
32 | screen_info->orig_x = info->u.text_mode_3.cursor_x; | ||
33 | screen_info->orig_y = info->u.text_mode_3.cursor_y; | ||
34 | screen_info->orig_video_points = | ||
35 | info->u.text_mode_3.font_height; | ||
36 | break; | ||
37 | |||
38 | case XEN_VGATYPE_VESA_LFB: | ||
39 | if (size < offsetof(struct dom0_vga_console_info, | ||
40 | u.vesa_lfb.gbl_caps)) | ||
41 | break; | ||
42 | screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB; | ||
43 | screen_info->lfb_width = info->u.vesa_lfb.width; | ||
44 | screen_info->lfb_height = info->u.vesa_lfb.height; | ||
45 | screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel; | ||
46 | screen_info->lfb_base = info->u.vesa_lfb.lfb_base; | ||
47 | screen_info->lfb_size = info->u.vesa_lfb.lfb_size; | ||
48 | screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line; | ||
49 | screen_info->red_size = info->u.vesa_lfb.red_size; | ||
50 | screen_info->red_pos = info->u.vesa_lfb.red_pos; | ||
51 | screen_info->green_size = info->u.vesa_lfb.green_size; | ||
52 | screen_info->green_pos = info->u.vesa_lfb.green_pos; | ||
53 | screen_info->blue_size = info->u.vesa_lfb.blue_size; | ||
54 | screen_info->blue_pos = info->u.vesa_lfb.blue_pos; | ||
55 | screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; | ||
56 | screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; | ||
57 | if (size >= offsetof(struct dom0_vga_console_info, | ||
58 | u.vesa_lfb.gbl_caps) | ||
59 | + sizeof(info->u.vesa_lfb.gbl_caps)) | ||
60 | screen_info->capabilities = info->u.vesa_lfb.gbl_caps; | ||
61 | if (size >= offsetof(struct dom0_vga_console_info, | ||
62 | u.vesa_lfb.mode_attrs) | ||
63 | + sizeof(info->u.vesa_lfb.mode_attrs)) | ||
64 | screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; | ||
65 | break; | ||
66 | } | ||
67 | } | ||
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 22a2093b5862..b040b0e518ca 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S | |||
@@ -113,11 +113,13 @@ xen_iret_start_crit: | |||
113 | 113 | ||
114 | /* | 114 | /* |
115 | * If there's something pending, mask events again so we can | 115 | * If there's something pending, mask events again so we can |
116 | * jump back into xen_hypervisor_callback | 116 | * jump back into xen_hypervisor_callback. Otherwise do not |
117 | * touch XEN_vcpu_info_mask. | ||
117 | */ | 118 | */ |
118 | sete XEN_vcpu_info_mask(%eax) | 119 | jne 1f |
120 | movb $1, XEN_vcpu_info_mask(%eax) | ||
119 | 121 | ||
120 | popl %eax | 122 | 1: popl %eax |
121 | 123 | ||
122 | /* | 124 | /* |
123 | * From this point on the registers are restored and the stack | 125 | * From this point on the registers are restored and the stack |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 97dfdc8757b3..b095739ccd4c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -88,6 +88,17 @@ static inline void xen_uninit_lock_cpu(int cpu) | |||
88 | } | 88 | } |
89 | #endif | 89 | #endif |
90 | 90 | ||
91 | struct dom0_vga_console_info; | ||
92 | |||
93 | #ifdef CONFIG_XEN_DOM0 | ||
94 | void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); | ||
95 | #else | ||
96 | static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, | ||
97 | size_t size) | ||
98 | { | ||
99 | } | ||
100 | #endif | ||
101 | |||
91 | /* Declare an asm function, along with symbols needed to make it | 102 | /* Declare an asm function, along with symbols needed to make it |
92 | inlineable */ | 103 | inlineable */ |
93 | #define DECL_ASM(ret, name, ...) \ | 104 | #define DECL_ASM(ret, name, ...) \ |