aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-25 15:32:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-25 15:32:10 -0400
commit4b7227ca321ccf447cdc04538687c895db8b77f5 (patch)
tree72712127fc56aa2579e8a1508998bcabf6bd6c60
parent5dae61b80564a5583ff4b56e357bdbc733fddb76 (diff)
parent1775826ceec51187aa868406585799b7e76ffa7d (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next: (52 commits) xen: add balloon driver xen: allow compilation with non-flat memory xen: fold xen_sysexit into xen_iret xen: allow set_pte_at on init_mm to be lockless xen: disable preemption during tlb flush xen pvfb: Para-virtual framebuffer, keyboard and pointer driver xen: Add compatibility aliases for frontend drivers xen: Module autoprobing support for frontend drivers xen blkfront: Delay wait for block devices until after the disk is added xen/blkfront: use bdget_disk xen: Make xen-blkfront write its protocol ABI to xenstore xen: import arch generic part of xencomm xen: make grant table arch portable xen: replace callers of alloc_vm_area()/free_vm_area() with xen_ prefixed one xen: make include/xen/page.h portable moving those definitions under asm dir xen: add resend_irq_on_evtchn() definition into events.c Xen: make events.c portable for ia64/xen support xen: move events.c to drivers/xen for IA64/Xen support xen: move features.c from arch/x86/xen/features.c to drivers/xen xen: add missing definitions in include/xen/interface/vcpu.h which ia64/xen needs ...
-rw-r--r--arch/x86/kernel/entry_32.S12
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/vmi_32.c22
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c4
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/init_32.c8
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/pageattr.c4
-rw-r--r--arch/x86/mm/pgtable.c276
-rw-r--r--arch/x86/mm/pgtable_32.c204
-rw-r--r--arch/x86/xen/Kconfig2
-rw-r--r--arch/x86/xen/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c54
-rw-r--r--arch/x86/xen/grant-table.c91
-rw-r--r--arch/x86/xen/mmu.c143
-rw-r--r--arch/x86/xen/setup.c21
-rw-r--r--arch/x86/xen/smp.c20
-rw-r--r--arch/x86/xen/xen-asm.S42
-rw-r--r--arch/x86/xen/xen-ops.h8
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/block/xen-blkfront.c23
-rw-r--r--drivers/input/Kconfig9
-rw-r--r--drivers/input/Makefile2
-rw-r--r--drivers/input/xen-kbdfront.c340
-rw-r--r--drivers/net/xen-netfront.c2
-rw-r--r--drivers/video/Kconfig14
-rw-r--r--drivers/video/Makefile1
-rw-r--r--drivers/video/xen-fbfront.c550
-rw-r--r--drivers/xen/Kconfig19
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/balloon.c712
-rw-r--r--drivers/xen/events.c (renamed from arch/x86/xen/events.c)121
-rw-r--r--drivers/xen/features.c (renamed from arch/x86/xen/features.c)0
-rw-r--r--drivers/xen/grant-table.c37
-rw-r--r--drivers/xen/xenbus/xenbus_client.c6
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c32
-rw-r--r--drivers/xen/xencomm.c232
-rw-r--r--include/asm-x86/paravirt.h43
-rw-r--r--include/asm-x86/pgalloc.h111
-rw-r--r--include/asm-x86/pgalloc_32.h95
-rw-r--r--include/asm-x86/pgalloc_64.h133
-rw-r--r--include/asm-x86/pgtable.h54
-rw-r--r--include/asm-x86/pgtable_32.h18
-rw-r--r--include/asm-x86/pgtable_64.h2
-rw-r--r--include/asm-x86/xen/events.h22
-rw-r--r--include/asm-x86/xen/grant_table.h7
-rw-r--r--include/asm-x86/xen/hypercall.h6
-rw-r--r--include/asm-x86/xen/interface.h28
-rw-r--r--include/asm-x86/xen/page.h168
-rw-r--r--include/xen/balloon.h61
-rw-r--r--include/xen/events.h9
-rw-r--r--include/xen/grant_table.h7
-rw-r--r--include/xen/interface/callback.h102
-rw-r--r--include/xen/interface/grant_table.h11
-rw-r--r--include/xen/interface/io/fbif.h124
-rw-r--r--include/xen/interface/io/kbdif.h114
-rw-r--r--include/xen/interface/io/protocols.h21
-rw-r--r--include/xen/interface/memory.h12
-rw-r--r--include/xen/interface/vcpu.h5
-rw-r--r--include/xen/interface/xen.h22
-rw-r--r--include/xen/interface/xencomm.h41
-rw-r--r--include/xen/page.h181
-rw-r--r--include/xen/xen-ops.h8
-rw-r--r--include/xen/xenbus.h1
-rw-r--r--include/xen/xencomm.h77
67 files changed, 3607 insertions, 921 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0f8934fc303..2a609dc3271c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,7 @@ restore_nocheck_notrace:
409irq_return: 409irq_return:
410 INTERRUPT_RETURN 410 INTERRUPT_RETURN
411.section .fixup,"ax" 411.section .fixup,"ax"
412iret_exc: 412ENTRY(iret_exc)
413 pushl $0 # no error code 413 pushl $0 # no error code
414 pushl $do_iret_error 414 pushl $do_iret_error
415 jmp error_code 415 jmp error_code
@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
1017ENDPROC(kernel_thread_helper) 1017ENDPROC(kernel_thread_helper)
1018 1018
1019#ifdef CONFIG_XEN 1019#ifdef CONFIG_XEN
1020/* Xen doesn't set %esp to be precisely what the normal sysenter
1021 entrypoint expects, so fix it up before using the normal path. */
1022ENTRY(xen_sysenter_target)
1023 RING0_INT_FRAME
1024 addl $5*4, %esp /* remove xen-provided frame */
1025 jmp sysenter_past_esp
1026
1020ENTRY(xen_hypervisor_callback) 1027ENTRY(xen_hypervisor_callback)
1021 CFI_STARTPROC 1028 CFI_STARTPROC
1022 pushl $0 1029 pushl $0
@@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback)
1035 cmpl $xen_iret_end_crit,%eax 1042 cmpl $xen_iret_end_crit,%eax
1036 jae 1f 1043 jae 1f
1037 1044
1038 call xen_iret_crit_fixup 1045 jmp xen_iret_crit_fixup
1039 1046
1047ENTRY(xen_do_upcall)
10401: mov %esp, %eax 10481: mov %esp, %eax
1041 call xen_evtchn_do_upcall 1049 call xen_evtchn_do_upcall
1042 jmp ret_from_intr 1050 jmp ret_from_intr
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3733412d1357..74f0c5ea2a03 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = {
366 .flush_tlb_single = native_flush_tlb_single, 366 .flush_tlb_single = native_flush_tlb_single,
367 .flush_tlb_others = native_flush_tlb_others, 367 .flush_tlb_others = native_flush_tlb_others,
368 368
369 .alloc_pt = paravirt_nop, 369 .alloc_pte = paravirt_nop,
370 .alloc_pd = paravirt_nop, 370 .alloc_pmd = paravirt_nop,
371 .alloc_pd_clone = paravirt_nop, 371 .alloc_pmd_clone = paravirt_nop,
372 .release_pt = paravirt_nop, 372 .alloc_pud = paravirt_nop,
373 .release_pd = paravirt_nop, 373 .release_pte = paravirt_nop,
374 .release_pmd = paravirt_nop,
375 .release_pud = paravirt_nop,
374 376
375 .set_pte = native_set_pte, 377 .set_pte = native_set_pte,
376 .set_pte_at = native_set_pte_at, 378 .set_pte_at = native_set_pte_at,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 19c9386ac118..1791a751a772 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -8,6 +8,7 @@
8#include <asm/apic.h> 8#include <asm/apic.h>
9#include <asm/desc.h> 9#include <asm/desc.h>
10#include <asm/hpet.h> 10#include <asm/hpet.h>
11#include <asm/pgtable.h>
11#include <asm/reboot_fixups.h> 12#include <asm/reboot_fixups.h>
12#include <asm/reboot.h> 13#include <asm/reboot.h>
13 14
@@ -15,7 +16,6 @@
15# include <linux/dmi.h> 16# include <linux/dmi.h>
16# include <linux/ctype.h> 17# include <linux/ctype.h>
17# include <linux/mc146818rtc.h> 18# include <linux/mc146818rtc.h>
18# include <asm/pgtable.h>
19#else 19#else
20# include <asm/iommu.h> 20# include <asm/iommu.h>
21#endif 21#endif
@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length)
275 /* Remap the kernel at virtual address zero, as well as offset zero 275 /* Remap the kernel at virtual address zero, as well as offset zero
276 from the kernel segment. This assumes the kernel segment starts at 276 from the kernel segment. This assumes the kernel segment starts at
277 virtual address PAGE_OFFSET. */ 277 virtual address PAGE_OFFSET. */
278 memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 278 memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
279 sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); 279 sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
280 280
281 /* 281 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ade371f9663a..eef79e84145f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
1039 1039
1040#ifdef CONFIG_X86_32 1040#ifdef CONFIG_X86_32
1041 /* init low mem mapping */ 1041 /* init low mem mapping */
1042 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 1042 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
1043 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); 1043 min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
1044 flush_tlb_all(); 1044 flush_tlb_all();
1045#endif 1045#endif
1046 1046
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 12affe1f9bce..956f38927aa7 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page)
320 * pdes need to be zeroed. 320 * pdes need to be zeroed.
321 */ 321 */
322 if (type & VMI_PAGE_CLONE) 322 if (type & VMI_PAGE_CLONE)
323 limit = USER_PTRS_PER_PGD; 323 limit = KERNEL_PGD_BOUNDARY;
324 for (i = 0; i < limit; i++) 324 for (i = 0; i < limit; i++)
325 BUG_ON(ptr[i]); 325 BUG_ON(ptr[i]);
326} 326}
@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
392} 392}
393#endif 393#endif
394 394
395static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) 395static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
396{ 396{
397 vmi_set_page_type(pfn, VMI_PAGE_L1); 397 vmi_set_page_type(pfn, VMI_PAGE_L1);
398 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 398 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
399} 399}
400 400
401static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) 401static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
402{ 402{
403 /* 403 /*
404 * This call comes in very early, before mem_map is setup. 404 * This call comes in very early, before mem_map is setup.
@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
409 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); 409 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
410} 410}
411 411
412static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) 412static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
413{ 413{
414 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); 414 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
415 vmi_check_page_type(clonepfn, VMI_PAGE_L2); 415 vmi_check_page_type(clonepfn, VMI_PAGE_L2);
416 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); 416 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
417} 417}
418 418
419static void vmi_release_pt(u32 pfn) 419static void vmi_release_pte(u32 pfn)
420{ 420{
421 vmi_ops.release_page(pfn, VMI_PAGE_L1); 421 vmi_ops.release_page(pfn, VMI_PAGE_L1);
422 vmi_set_page_type(pfn, VMI_PAGE_NORMAL); 422 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
423} 423}
424 424
425static void vmi_release_pd(u32 pfn) 425static void vmi_release_pmd(u32 pfn)
426{ 426{
427 vmi_ops.release_page(pfn, VMI_PAGE_L2); 427 vmi_ops.release_page(pfn, VMI_PAGE_L2);
428 vmi_set_page_type(pfn, VMI_PAGE_NORMAL); 428 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void)
871 871
872 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); 872 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
873 if (vmi_ops.allocate_page) { 873 if (vmi_ops.allocate_page) {
874 pv_mmu_ops.alloc_pt = vmi_allocate_pt; 874 pv_mmu_ops.alloc_pte = vmi_allocate_pte;
875 pv_mmu_ops.alloc_pd = vmi_allocate_pd; 875 pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
876 pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone; 876 pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
877 } 877 }
878 878
879 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); 879 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
880 if (vmi_ops.release_page) { 880 if (vmi_ops.release_page) {
881 pv_mmu_ops.release_pt = vmi_release_pt; 881 pv_mmu_ops.release_pte = vmi_release_pte;
882 pv_mmu_ops.release_pd = vmi_release_pd; 882 pv_mmu_ops.release_pmd = vmi_release_pmd;
883 } 883 }
884 884
885 /* Set linear is needed in all cases */ 885 /* Set linear is needed in all cases */
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index d05722121d24..6e2c4efce0ef 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -543,8 +543,8 @@ static void __init do_boot_cpu(__u8 cpu)
543 hijack_source.idt.Offset, stack_start.sp)); 543 hijack_source.idt.Offset, stack_start.sp));
544 544
545 /* init lowmem identity mapping */ 545 /* init lowmem identity mapping */
546 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 546 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
547 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); 547 min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
548 flush_tlb_all(); 548 flush_tlb_all();
549 549
550 if (quad_boot) { 550 if (quad_boot) {
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 20941d2954e2..b7b3e4c7cfc9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o 2 pat.o pgtable.o
3 3
4obj-$(CONFIG_X86_32) += pgtable_32.o 4obj-$(CONFIG_X86_32) += pgtable_32.o
5 5
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ec62da85fd7..08aa1878fad4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
71 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 71 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
72 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 72 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
73 73
74 paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 74 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
75 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 75 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
76 pud = pud_offset(pgd, 0); 76 pud = pud_offset(pgd, 0);
77 BUG_ON(pmd_table != pmd_offset(pud, 0)); 77 BUG_ON(pmd_table != pmd_offset(pud, 0));
@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
100 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 100 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
101 } 101 }
102 102
103 paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); 103 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
104 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 104 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
105 BUG_ON(page_table != pte_offset_kernel(pmd, 0)); 105 BUG_ON(page_table != pte_offset_kernel(pmd, 0));
106 } 106 }
@@ -365,7 +365,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
365 365
366 pte_clear(NULL, va, pte); 366 pte_clear(NULL, va, pte);
367 } 367 }
368 paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); 368 paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
369} 369}
370 370
371void __init native_pagetable_setup_done(pgd_t *base) 371void __init native_pagetable_setup_done(pgd_t *base)
@@ -457,7 +457,7 @@ void zap_low_mappings(void)
457 * Note that "pgd_clear()" doesn't do it for 457 * Note that "pgd_clear()" doesn't do it for
458 * us, because pgd_clear() is a no-op on i386. 458 * us, because pgd_clear() is a no-op on i386.
459 */ 459 */
460 for (i = 0; i < USER_PTRS_PER_PGD; i++) { 460 for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
461#ifdef CONFIG_X86_PAE 461#ifdef CONFIG_X86_PAE
462 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 462 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
463#else 463#else
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3a4baf95e24d..36a3f7ded626 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -407,7 +407,7 @@ void __init early_ioremap_clear(void)
407 407
408 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); 408 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
409 pmd_clear(pmd); 409 pmd_clear(pmd);
410 paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); 410 paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
411 __flush_tlb_all(); 411 __flush_tlb_all();
412} 412}
413 413
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index c29ebd037254..bd5e05c654dc 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
483 goto out_unlock; 483 goto out_unlock;
484 484
485 pbase = (pte_t *)page_address(base); 485 pbase = (pte_t *)page_address(base);
486#ifdef CONFIG_X86_32 486 paravirt_alloc_pte(&init_mm, page_to_pfn(base));
487 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
488#endif
489 ref_prot = pte_pgprot(pte_clrhuge(*kpte)); 487 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
490 488
491#ifdef CONFIG_X86_64 489#ifdef CONFIG_X86_64
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
new file mode 100644
index 000000000000..50159764f694
--- /dev/null
+++ b/arch/x86/mm/pgtable.c
@@ -0,0 +1,276 @@
1#include <linux/mm.h>
2#include <asm/pgalloc.h>
3#include <asm/pgtable.h>
4#include <asm/tlb.h>
5
6pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
7{
8 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
9}
10
11pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
12{
13 struct page *pte;
14
15#ifdef CONFIG_HIGHPTE
16 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
17#else
18 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
19#endif
20 if (pte)
21 pgtable_page_ctor(pte);
22 return pte;
23}
24
25void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
26{
27 pgtable_page_dtor(pte);
28 paravirt_release_pte(page_to_pfn(pte));
29 tlb_remove_page(tlb, pte);
30}
31
32#if PAGETABLE_LEVELS > 2
33void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
34{
35 paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
36 tlb_remove_page(tlb, virt_to_page(pmd));
37}
38
39#if PAGETABLE_LEVELS > 3
40void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
41{
42 paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
43 tlb_remove_page(tlb, virt_to_page(pud));
44}
45#endif /* PAGETABLE_LEVELS > 3 */
46#endif /* PAGETABLE_LEVELS > 2 */
47
48static inline void pgd_list_add(pgd_t *pgd)
49{
50 struct page *page = virt_to_page(pgd);
51
52 list_add(&page->lru, &pgd_list);
53}
54
55static inline void pgd_list_del(pgd_t *pgd)
56{
57 struct page *page = virt_to_page(pgd);
58
59 list_del(&page->lru);
60}
61
62#define UNSHARED_PTRS_PER_PGD \
63 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
64
65static void pgd_ctor(void *p)
66{
67 pgd_t *pgd = p;
68 unsigned long flags;
69
70 /* Clear usermode parts of PGD */
71 memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
72
73 spin_lock_irqsave(&pgd_lock, flags);
74
75 /* If the pgd points to a shared pagetable level (either the
76 ptes in non-PAE, or shared PMD in PAE), then just copy the
77 references from swapper_pg_dir. */
78 if (PAGETABLE_LEVELS == 2 ||
79 (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
80 PAGETABLE_LEVELS == 4) {
81 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
82 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
83 KERNEL_PGD_PTRS);
84 paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
85 __pa(swapper_pg_dir) >> PAGE_SHIFT,
86 KERNEL_PGD_BOUNDARY,
87 KERNEL_PGD_PTRS);
88 }
89
90 /* list required to sync kernel mapping updates */
91 if (!SHARED_KERNEL_PMD)
92 pgd_list_add(pgd);
93
94 spin_unlock_irqrestore(&pgd_lock, flags);
95}
96
97static void pgd_dtor(void *pgd)
98{
99 unsigned long flags; /* can be called from interrupt context */
100
101 if (SHARED_KERNEL_PMD)
102 return;
103
104 spin_lock_irqsave(&pgd_lock, flags);
105 pgd_list_del(pgd);
106 spin_unlock_irqrestore(&pgd_lock, flags);
107}
108
109/*
110 * List of all pgd's needed for non-PAE so it can invalidate entries
111 * in both cached and uncached pgd's; not needed for PAE since the
112 * kernel pmd is shared. If PAE were not to share the pmd a similar
113 * tactic would be needed. This is essentially codepath-based locking
114 * against pageattr.c; it is the unique case in which a valid change
115 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
116 * vmalloc faults work because attached pagetables are never freed.
117 * -- wli
118 */
119
120#ifdef CONFIG_X86_PAE
121/*
122 * Mop up any pmd pages which may still be attached to the pgd.
123 * Normally they will be freed by munmap/exit_mmap, but any pmd we
124 * preallocate which never got a corresponding vma will need to be
125 * freed manually.
126 */
127static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
128{
129 int i;
130
131 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
132 pgd_t pgd = pgdp[i];
133
134 if (pgd_val(pgd) != 0) {
135 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
136
137 pgdp[i] = native_make_pgd(0);
138
139 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
140 pmd_free(mm, pmd);
141 }
142 }
143}
144
145/*
146 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
147 * updating the top-level pagetable entries to guarantee the
148 * processor notices the update. Since this is expensive, and
149 * all 4 top-level entries are used almost immediately in a
150 * new process's life, we just pre-populate them here.
151 *
152 * Also, if we're in a paravirt environment where the kernel pmd is
153 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
154 * and initialize the kernel pmds here.
155 */
156static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
157{
158 pud_t *pud;
159 unsigned long addr;
160 int i;
161
162 pud = pud_offset(pgd, 0);
163 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
164 i++, pud++, addr += PUD_SIZE) {
165 pmd_t *pmd = pmd_alloc_one(mm, addr);
166
167 if (!pmd) {
168 pgd_mop_up_pmds(mm, pgd);
169 return 0;
170 }
171
172 if (i >= KERNEL_PGD_BOUNDARY)
173 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
174 sizeof(pmd_t) * PTRS_PER_PMD);
175
176 pud_populate(mm, pud, pmd);
177 }
178
179 return 1;
180}
181
182void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
183{
184 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
185
186 /* Note: almost everything apart from _PAGE_PRESENT is
187 reserved at the pmd (PDPT) level. */
188 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
189
190 /*
191 * According to Intel App note "TLBs, Paging-Structure Caches,
192 * and Their Invalidation", April 2007, document 317080-001,
193 * section 8.1: in PAE mode we explicitly have to flush the
194 * TLB via cr3 if the top-level pgd is changed...
195 */
196 if (mm == current->active_mm)
197 write_cr3(read_cr3());
198}
199#else /* !CONFIG_X86_PAE */
200/* No need to prepopulate any pagetable entries in non-PAE modes. */
201static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
202{
203 return 1;
204}
205
206static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
207{
208}
209#endif /* CONFIG_X86_PAE */
210
211pgd_t *pgd_alloc(struct mm_struct *mm)
212{
213 pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
214
215 /* so that alloc_pmd can use it */
216 mm->pgd = pgd;
217 if (pgd)
218 pgd_ctor(pgd);
219
220 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
221 pgd_dtor(pgd);
222 free_page((unsigned long)pgd);
223 pgd = NULL;
224 }
225
226 return pgd;
227}
228
229void pgd_free(struct mm_struct *mm, pgd_t *pgd)
230{
231 pgd_mop_up_pmds(mm, pgd);
232 pgd_dtor(pgd);
233 free_page((unsigned long)pgd);
234}
235
236int ptep_set_access_flags(struct vm_area_struct *vma,
237 unsigned long address, pte_t *ptep,
238 pte_t entry, int dirty)
239{
240 int changed = !pte_same(*ptep, entry);
241
242 if (changed && dirty) {
243 *ptep = entry;
244 pte_update_defer(vma->vm_mm, address, ptep);
245 flush_tlb_page(vma, address);
246 }
247
248 return changed;
249}
250
251int ptep_test_and_clear_young(struct vm_area_struct *vma,
252 unsigned long addr, pte_t *ptep)
253{
254 int ret = 0;
255
256 if (pte_young(*ptep))
257 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
258 &ptep->pte);
259
260 if (ret)
261 pte_update(vma->vm_mm, addr, ptep);
262
263 return ret;
264}
265
266int ptep_clear_flush_young(struct vm_area_struct *vma,
267 unsigned long address, pte_t *ptep)
268{
269 int young;
270
271 young = ptep_test_and_clear_young(vma, address, ptep);
272 if (young)
273 flush_tlb_page(vma, address);
274
275 return young;
276}
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6fb9e7c6893f..9ee007be9142 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve)
173 __VMALLOC_RESERVE += reserve; 173 __VMALLOC_RESERVE += reserve;
174} 174}
175 175
176pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
177{
178 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
179}
180
181pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
182{
183 struct page *pte;
184
185#ifdef CONFIG_HIGHPTE
186 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
187#else
188 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
189#endif
190 if (pte)
191 pgtable_page_ctor(pte);
192 return pte;
193}
194
195/*
196 * List of all pgd's needed for non-PAE so it can invalidate entries
197 * in both cached and uncached pgd's; not needed for PAE since the
198 * kernel pmd is shared. If PAE were not to share the pmd a similar
199 * tactic would be needed. This is essentially codepath-based locking
200 * against pageattr.c; it is the unique case in which a valid change
201 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
202 * vmalloc faults work because attached pagetables are never freed.
203 * -- wli
204 */
205static inline void pgd_list_add(pgd_t *pgd)
206{
207 struct page *page = virt_to_page(pgd);
208
209 list_add(&page->lru, &pgd_list);
210}
211
212static inline void pgd_list_del(pgd_t *pgd)
213{
214 struct page *page = virt_to_page(pgd);
215
216 list_del(&page->lru);
217}
218
219#define UNSHARED_PTRS_PER_PGD \
220 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
221
222static void pgd_ctor(void *p)
223{
224 pgd_t *pgd = p;
225 unsigned long flags;
226
227 /* Clear usermode parts of PGD */
228 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
229
230 spin_lock_irqsave(&pgd_lock, flags);
231
232 /* If the pgd points to a shared pagetable level (either the
233 ptes in non-PAE, or shared PMD in PAE), then just copy the
234 references from swapper_pg_dir. */
235 if (PAGETABLE_LEVELS == 2 ||
236 (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
237 clone_pgd_range(pgd + USER_PTRS_PER_PGD,
238 swapper_pg_dir + USER_PTRS_PER_PGD,
239 KERNEL_PGD_PTRS);
240 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
241 __pa(swapper_pg_dir) >> PAGE_SHIFT,
242 USER_PTRS_PER_PGD,
243 KERNEL_PGD_PTRS);
244 }
245
246 /* list required to sync kernel mapping updates */
247 if (!SHARED_KERNEL_PMD)
248 pgd_list_add(pgd);
249
250 spin_unlock_irqrestore(&pgd_lock, flags);
251}
252
253static void pgd_dtor(void *pgd)
254{
255 unsigned long flags; /* can be called from interrupt context */
256
257 if (SHARED_KERNEL_PMD)
258 return;
259
260 spin_lock_irqsave(&pgd_lock, flags);
261 pgd_list_del(pgd);
262 spin_unlock_irqrestore(&pgd_lock, flags);
263}
264
265#ifdef CONFIG_X86_PAE
266/*
267 * Mop up any pmd pages which may still be attached to the pgd.
268 * Normally they will be freed by munmap/exit_mmap, but any pmd we
269 * preallocate which never got a corresponding vma will need to be
270 * freed manually.
271 */
272static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
273{
274 int i;
275
276 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
277 pgd_t pgd = pgdp[i];
278
279 if (pgd_val(pgd) != 0) {
280 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
281
282 pgdp[i] = native_make_pgd(0);
283
284 paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
285 pmd_free(mm, pmd);
286 }
287 }
288}
289
290/*
291 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
292 * updating the top-level pagetable entries to guarantee the
293 * processor notices the update. Since this is expensive, and
294 * all 4 top-level entries are used almost immediately in a
295 * new process's life, we just pre-populate them here.
296 *
297 * Also, if we're in a paravirt environment where the kernel pmd is
298 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
299 * and initialize the kernel pmds here.
300 */
301static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
302{
303 pud_t *pud;
304 unsigned long addr;
305 int i;
306
307 pud = pud_offset(pgd, 0);
308 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
309 i++, pud++, addr += PUD_SIZE) {
310 pmd_t *pmd = pmd_alloc_one(mm, addr);
311
312 if (!pmd) {
313 pgd_mop_up_pmds(mm, pgd);
314 return 0;
315 }
316
317 if (i >= USER_PTRS_PER_PGD)
318 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
319 sizeof(pmd_t) * PTRS_PER_PMD);
320
321 pud_populate(mm, pud, pmd);
322 }
323
324 return 1;
325}
326#else /* !CONFIG_X86_PAE */
327/* No need to prepopulate any pagetable entries in non-PAE modes. */
328static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
329{
330 return 1;
331}
332
333static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
334{
335}
336#endif /* CONFIG_X86_PAE */
337
338pgd_t *pgd_alloc(struct mm_struct *mm)
339{
340 pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
341
342 /* so that alloc_pd can use it */
343 mm->pgd = pgd;
344 if (pgd)
345 pgd_ctor(pgd);
346
347 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
348 pgd_dtor(pgd);
349 free_page((unsigned long)pgd);
350 pgd = NULL;
351 }
352
353 return pgd;
354}
355
356void pgd_free(struct mm_struct *mm, pgd_t *pgd)
357{
358 pgd_mop_up_pmds(mm, pgd);
359 pgd_dtor(pgd);
360 free_page((unsigned long)pgd);
361}
362
363void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
364{
365 pgtable_page_dtor(pte);
366 paravirt_release_pt(page_to_pfn(pte));
367 tlb_remove_page(tlb, pte);
368}
369
370#ifdef CONFIG_X86_PAE
371
372void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
373{
374 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
375 tlb_remove_page(tlb, virt_to_page(pmd));
376}
377
378#endif
379
380int pmd_bad(pmd_t pmd) 176int pmd_bad(pmd_t pmd)
381{ 177{
382 WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); 178 WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 4d5f2649bee4..2e641be2737e 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@ config XEN
6 bool "Xen guest support" 6 bool "Xen guest support"
7 select PARAVIRT 7 select PARAVIRT
8 depends on X86_32 8 depends on X86_32
9 depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER) 9 depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
10 help 10 help
11 This is the Linux Xen port. Enabling this will allow the 11 This is the Linux Xen port. Enabling this will allow the
12 kernel to boot in a paravirtualized environment under the 12 kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 343df246bd3e..3d8df981d5fd 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ 1obj-y := enlighten.o setup.o multicalls.o mmu.o \
2 events.o time.o manage.o xen-asm.o 2 time.o manage.o xen-asm.o grant-table.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0388220cf97..c8a56e457d61 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -155,7 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
155 if (*ax == 1) 155 if (*ax == 1)
156 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ 156 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
157 (1 << X86_FEATURE_ACPI) | /* disable ACPI */ 157 (1 << X86_FEATURE_ACPI) | /* disable ACPI */
158 (1 << X86_FEATURE_SEP) | /* disable SEP */ 158 (1 << X86_FEATURE_MCE) | /* disable MCE */
159 (1 << X86_FEATURE_MCA) | /* disable MCA */
159 (1 << X86_FEATURE_ACC)); /* thermal monitoring */ 160 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
160 161
161 asm(XEN_EMULATE_PREFIX "cpuid" 162 asm(XEN_EMULATE_PREFIX "cpuid"
@@ -531,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val)
531static void xen_flush_tlb(void) 532static void xen_flush_tlb(void)
532{ 533{
533 struct mmuext_op *op; 534 struct mmuext_op *op;
534 struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 535 struct multicall_space mcs;
536
537 preempt_disable();
538
539 mcs = xen_mc_entry(sizeof(*op));
535 540
536 op = mcs.args; 541 op = mcs.args;
537 op->cmd = MMUEXT_TLB_FLUSH_LOCAL; 542 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
538 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 543 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
539 544
540 xen_mc_issue(PARAVIRT_LAZY_MMU); 545 xen_mc_issue(PARAVIRT_LAZY_MMU);
546
547 preempt_enable();
541} 548}
542 549
543static void xen_flush_tlb_single(unsigned long addr) 550static void xen_flush_tlb_single(unsigned long addr)
544{ 551{
545 struct mmuext_op *op; 552 struct mmuext_op *op;
546 struct multicall_space mcs = xen_mc_entry(sizeof(*op)); 553 struct multicall_space mcs;
554
555 preempt_disable();
547 556
557 mcs = xen_mc_entry(sizeof(*op));
548 op = mcs.args; 558 op = mcs.args;
549 op->cmd = MMUEXT_INVLPG_LOCAL; 559 op->cmd = MMUEXT_INVLPG_LOCAL;
550 op->arg1.linear_addr = addr & PAGE_MASK; 560 op->arg1.linear_addr = addr & PAGE_MASK;
551 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 561 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
552 562
553 xen_mc_issue(PARAVIRT_LAZY_MMU); 563 xen_mc_issue(PARAVIRT_LAZY_MMU);
564
565 preempt_enable();
554} 566}
555 567
556static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, 568static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
@@ -655,15 +667,17 @@ static void xen_write_cr3(unsigned long cr3)
655 667
656/* Early in boot, while setting up the initial pagetable, assume 668/* Early in boot, while setting up the initial pagetable, assume
657 everything is pinned. */ 669 everything is pinned. */
658static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) 670static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
659{ 671{
672#ifdef CONFIG_FLATMEM
660 BUG_ON(mem_map); /* should only be used early */ 673 BUG_ON(mem_map); /* should only be used early */
674#endif
661 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 675 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
662} 676}
663 677
664/* Early release_pt assumes that all pts are pinned, since there's 678/* Early release_pte assumes that all pts are pinned, since there's
665 only init_mm and anything attached to that is pinned. */ 679 only init_mm and anything attached to that is pinned. */
666static void xen_release_pt_init(u32 pfn) 680static void xen_release_pte_init(u32 pfn)
667{ 681{
668 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 682 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
669} 683}
@@ -697,12 +711,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
697 } 711 }
698} 712}
699 713
700static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) 714static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
701{ 715{
702 xen_alloc_ptpage(mm, pfn, PT_PTE); 716 xen_alloc_ptpage(mm, pfn, PT_PTE);
703} 717}
704 718
705static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) 719static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
706{ 720{
707 xen_alloc_ptpage(mm, pfn, PT_PMD); 721 xen_alloc_ptpage(mm, pfn, PT_PMD);
708} 722}
@@ -722,12 +736,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
722 } 736 }
723} 737}
724 738
725static void xen_release_pt(u32 pfn) 739static void xen_release_pte(u32 pfn)
726{ 740{
727 xen_release_ptpage(pfn, PT_PTE); 741 xen_release_ptpage(pfn, PT_PTE);
728} 742}
729 743
730static void xen_release_pd(u32 pfn) 744static void xen_release_pmd(u32 pfn)
731{ 745{
732 xen_release_ptpage(pfn, PT_PMD); 746 xen_release_ptpage(pfn, PT_PMD);
733} 747}
@@ -849,10 +863,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
849{ 863{
850 /* This will work as long as patching hasn't happened yet 864 /* This will work as long as patching hasn't happened yet
851 (which it hasn't) */ 865 (which it hasn't) */
852 pv_mmu_ops.alloc_pt = xen_alloc_pt; 866 pv_mmu_ops.alloc_pte = xen_alloc_pte;
853 pv_mmu_ops.alloc_pd = xen_alloc_pd; 867 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
854 pv_mmu_ops.release_pt = xen_release_pt; 868 pv_mmu_ops.release_pte = xen_release_pte;
855 pv_mmu_ops.release_pd = xen_release_pd; 869 pv_mmu_ops.release_pmd = xen_release_pmd;
856 pv_mmu_ops.set_pte = xen_set_pte; 870 pv_mmu_ops.set_pte = xen_set_pte;
857 871
858 setup_shared_info(); 872 setup_shared_info();
@@ -994,7 +1008,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
994 .read_pmc = native_read_pmc, 1008 .read_pmc = native_read_pmc,
995 1009
996 .iret = xen_iret, 1010 .iret = xen_iret,
997 .irq_enable_syscall_ret = NULL, /* never called */ 1011 .irq_enable_syscall_ret = xen_sysexit,
998 1012
999 .load_tr_desc = paravirt_nop, 1013 .load_tr_desc = paravirt_nop,
1000 .set_ldt = xen_set_ldt, 1014 .set_ldt = xen_set_ldt,
@@ -1059,11 +1073,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1059 .pte_update = paravirt_nop, 1073 .pte_update = paravirt_nop,
1060 .pte_update_defer = paravirt_nop, 1074 .pte_update_defer = paravirt_nop,
1061 1075
1062 .alloc_pt = xen_alloc_pt_init, 1076 .alloc_pte = xen_alloc_pte_init,
1063 .release_pt = xen_release_pt_init, 1077 .release_pte = xen_release_pte_init,
1064 .alloc_pd = xen_alloc_pt_init, 1078 .alloc_pmd = xen_alloc_pte_init,
1065 .alloc_pd_clone = paravirt_nop, 1079 .alloc_pmd_clone = paravirt_nop,
1066 .release_pd = xen_release_pt_init, 1080 .release_pmd = xen_release_pte_init,
1067 1081
1068#ifdef CONFIG_HIGHPTE 1082#ifdef CONFIG_HIGHPTE
1069 .kmap_atomic_pte = xen_kmap_atomic_pte, 1083 .kmap_atomic_pte = xen_kmap_atomic_pte,
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
new file mode 100644
index 000000000000..49ba9b5224d1
--- /dev/null
+++ b/arch/x86/xen/grant-table.c
@@ -0,0 +1,91 @@
1/******************************************************************************
2 * grant_table.c
3 * x86 specific part
4 *
5 * Granting foreign access to our memory reservation.
6 *
7 * Copyright (c) 2005-2006, Christopher Clark
8 * Copyright (c) 2004-2005, K A Fraser
9 * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
10 * VA Linux Systems Japan. Split out x86 specific part.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE.
35 */
36
37#include <linux/sched.h>
38#include <linux/mm.h>
39#include <linux/vmalloc.h>
40
41#include <xen/interface/xen.h>
42#include <xen/page.h>
43#include <xen/grant_table.h>
44
45#include <asm/pgtable.h>
46
47static int map_pte_fn(pte_t *pte, struct page *pmd_page,
48 unsigned long addr, void *data)
49{
50 unsigned long **frames = (unsigned long **)data;
51
52 set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
53 (*frames)++;
54 return 0;
55}
56
57static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
58 unsigned long addr, void *data)
59{
60
61 set_pte_at(&init_mm, addr, pte, __pte(0));
62 return 0;
63}
64
65int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
66 unsigned long max_nr_gframes,
67 struct grant_entry **__shared)
68{
69 int rc;
70 struct grant_entry *shared = *__shared;
71
72 if (shared == NULL) {
73 struct vm_struct *area =
74 xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
75 BUG_ON(area == NULL);
76 shared = area->addr;
77 *__shared = shared;
78 }
79
80 rc = apply_to_page_range(&init_mm, (unsigned long)shared,
81 PAGE_SIZE * nr_gframes,
82 map_pte_fn, &frames);
83 return rc;
84}
85
86void arch_gnttab_unmap_shared(struct grant_entry *shared,
87 unsigned long nr_gframes)
88{
89 apply_to_page_range(&init_mm, (unsigned long)shared,
90 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
91}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2a054ef2a3da..6cbcf65609ad 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
156void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 156void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
157 pte_t *ptep, pte_t pteval) 157 pte_t *ptep, pte_t pteval)
158{ 158{
159 /* updates to init_mm may be done without lock */
160 if (mm == &init_mm)
161 preempt_disable();
162
159 if (mm == current->mm || mm == &init_mm) { 163 if (mm == current->mm || mm == &init_mm) {
160 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 164 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
161 struct multicall_space mcs; 165 struct multicall_space mcs;
@@ -163,14 +167,61 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
163 167
164 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); 168 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
165 xen_mc_issue(PARAVIRT_LAZY_MMU); 169 xen_mc_issue(PARAVIRT_LAZY_MMU);
166 return; 170 goto out;
167 } else 171 } else
168 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) 172 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
169 return; 173 goto out;
170 } 174 }
171 xen_set_pte(ptep, pteval); 175 xen_set_pte(ptep, pteval);
176
177out:
178 if (mm == &init_mm)
179 preempt_enable();
180}
181
182pteval_t xen_pte_val(pte_t pte)
183{
184 pteval_t ret = pte.pte;
185
186 if (ret & _PAGE_PRESENT)
187 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
188
189 return ret;
190}
191
192pgdval_t xen_pgd_val(pgd_t pgd)
193{
194 pgdval_t ret = pgd.pgd;
195 if (ret & _PAGE_PRESENT)
196 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
197 return ret;
198}
199
200pte_t xen_make_pte(pteval_t pte)
201{
202 if (pte & _PAGE_PRESENT) {
203 pte = phys_to_machine(XPADDR(pte)).maddr;
204 pte &= ~(_PAGE_PCD | _PAGE_PWT);
205 }
206
207 return (pte_t){ .pte = pte };
172} 208}
173 209
210pgd_t xen_make_pgd(pgdval_t pgd)
211{
212 if (pgd & _PAGE_PRESENT)
213 pgd = phys_to_machine(XPADDR(pgd)).maddr;
214
215 return (pgd_t){ pgd };
216}
217
218pmdval_t xen_pmd_val(pmd_t pmd)
219{
220 pmdval_t ret = native_pmd_val(pmd);
221 if (ret & _PAGE_PRESENT)
222 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
223 return ret;
224}
174#ifdef CONFIG_X86_PAE 225#ifdef CONFIG_X86_PAE
175void xen_set_pud(pud_t *ptr, pud_t val) 226void xen_set_pud(pud_t *ptr, pud_t val)
176{ 227{
@@ -214,100 +265,18 @@ void xen_pmd_clear(pmd_t *pmdp)
214 xen_set_pmd(pmdp, __pmd(0)); 265 xen_set_pmd(pmdp, __pmd(0));
215} 266}
216 267
217unsigned long long xen_pte_val(pte_t pte) 268pmd_t xen_make_pmd(pmdval_t pmd)
218{ 269{
219 unsigned long long ret = 0; 270 if (pmd & _PAGE_PRESENT)
220
221 if (pte.pte_low) {
222 ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
223 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
224 }
225
226 return ret;
227}
228
229unsigned long long xen_pmd_val(pmd_t pmd)
230{
231 unsigned long long ret = pmd.pmd;
232 if (ret)
233 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
234 return ret;
235}
236
237unsigned long long xen_pgd_val(pgd_t pgd)
238{
239 unsigned long long ret = pgd.pgd;
240 if (ret)
241 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
242 return ret;
243}
244
245pte_t xen_make_pte(unsigned long long pte)
246{
247 if (pte & _PAGE_PRESENT) {
248 pte = phys_to_machine(XPADDR(pte)).maddr;
249 pte &= ~(_PAGE_PCD | _PAGE_PWT);
250 }
251
252 return (pte_t){ .pte = pte };
253}
254
255pmd_t xen_make_pmd(unsigned long long pmd)
256{
257 if (pmd & 1)
258 pmd = phys_to_machine(XPADDR(pmd)).maddr; 271 pmd = phys_to_machine(XPADDR(pmd)).maddr;
259 272
260 return (pmd_t){ pmd }; 273 return native_make_pmd(pmd);
261}
262
263pgd_t xen_make_pgd(unsigned long long pgd)
264{
265 if (pgd & _PAGE_PRESENT)
266 pgd = phys_to_machine(XPADDR(pgd)).maddr;
267
268 return (pgd_t){ pgd };
269} 274}
270#else /* !PAE */ 275#else /* !PAE */
271void xen_set_pte(pte_t *ptep, pte_t pte) 276void xen_set_pte(pte_t *ptep, pte_t pte)
272{ 277{
273 *ptep = pte; 278 *ptep = pte;
274} 279}
275
276unsigned long xen_pte_val(pte_t pte)
277{
278 unsigned long ret = pte.pte_low;
279
280 if (ret & _PAGE_PRESENT)
281 ret = machine_to_phys(XMADDR(ret)).paddr;
282
283 return ret;
284}
285
286unsigned long xen_pgd_val(pgd_t pgd)
287{
288 unsigned long ret = pgd.pgd;
289 if (ret)
290 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
291 return ret;
292}
293
294pte_t xen_make_pte(unsigned long pte)
295{
296 if (pte & _PAGE_PRESENT) {
297 pte = phys_to_machine(XPADDR(pte)).maddr;
298 pte &= ~(_PAGE_PCD | _PAGE_PWT);
299 }
300
301 return (pte_t){ pte };
302}
303
304pgd_t xen_make_pgd(unsigned long pgd)
305{
306 if (pgd & _PAGE_PRESENT)
307 pgd = phys_to_machine(XPADDR(pgd)).maddr;
308
309 return (pgd_t){ pgd };
310}
311#endif /* CONFIG_X86_PAE */ 280#endif /* CONFIG_X86_PAE */
312 281
313/* 282/*
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2341492bf7a0..82517e4a752a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
16#include <asm/xen/hypervisor.h> 16#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h> 17#include <asm/xen/hypercall.h>
18 18
19#include <xen/interface/callback.h>
19#include <xen/interface/physdev.h> 20#include <xen/interface/physdev.h>
20#include <xen/features.h> 21#include <xen/features.h>
21 22
@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
68 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 69 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
69} 70}
70 71
72void xen_enable_sysenter(void)
73{
74 int cpu = smp_processor_id();
75 extern void xen_sysenter_target(void);
76 /* Mask events on entry, even though they get enabled immediately */
77 static struct callback_register sysenter = {
78 .type = CALLBACKTYPE_sysenter,
79 .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
80 .flags = CALLBACKF_mask_events,
81 };
82
83 if (!boot_cpu_has(X86_FEATURE_SEP) ||
84 HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
85 clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
86 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
87 }
88}
89
71void __init xen_arch_setup(void) 90void __init xen_arch_setup(void)
72{ 91{
73 struct physdev_set_iopl set_iopl; 92 struct physdev_set_iopl set_iopl;
@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
82 HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, 101 HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
83 __KERNEL_CS, (unsigned long)xen_failsafe_callback); 102 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
84 103
104 xen_enable_sysenter();
105
85 set_iopl.iopl = 1; 106 set_iopl.iopl = 1;
86 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 107 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
87 if (rc != 0) 108 if (rc != 0)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index e340ff92f6b6..92dd3dbf3ffb 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,8 +36,9 @@
36#include "mmu.h" 36#include "mmu.h"
37 37
38static cpumask_t xen_cpu_initialized_map; 38static cpumask_t xen_cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq); 39static DEFINE_PER_CPU(int, resched_irq) = -1;
40static DEFINE_PER_CPU(int, callfunc_irq); 40static DEFINE_PER_CPU(int, callfunc_irq) = -1;
41static DEFINE_PER_CPU(int, debug_irq) = -1;
41 42
42/* 43/*
43 * Structure and data for smp_call_function(). This is designed to minimise 44 * Structure and data for smp_call_function(). This is designed to minimise
@@ -72,6 +73,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
72 int cpu = smp_processor_id(); 73 int cpu = smp_processor_id();
73 74
74 cpu_init(); 75 cpu_init();
76 xen_enable_sysenter();
75 77
76 preempt_disable(); 78 preempt_disable();
77 per_cpu(cpu_state, cpu) = CPU_ONLINE; 79 per_cpu(cpu_state, cpu) = CPU_ONLINE;
@@ -88,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
88static int xen_smp_intr_init(unsigned int cpu) 90static int xen_smp_intr_init(unsigned int cpu)
89{ 91{
90 int rc; 92 int rc;
91 const char *resched_name, *callfunc_name; 93 const char *resched_name, *callfunc_name, *debug_name;
92
93 per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
94 94
95 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); 95 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
96 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, 96 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -114,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu)
114 goto fail; 114 goto fail;
115 per_cpu(callfunc_irq, cpu) = rc; 115 per_cpu(callfunc_irq, cpu) = rc;
116 116
117 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
118 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
119 IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
120 debug_name, NULL);
121 if (rc < 0)
122 goto fail;
123 per_cpu(debug_irq, cpu) = rc;
124
117 return 0; 125 return 0;
118 126
119 fail: 127 fail:
@@ -121,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu)
121 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); 129 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
122 if (per_cpu(callfunc_irq, cpu) >= 0) 130 if (per_cpu(callfunc_irq, cpu) >= 0)
123 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 131 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
132 if (per_cpu(debug_irq, cpu) >= 0)
133 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
124 return rc; 134 return rc;
125} 135}
126 136
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index fe161ed4b01e..2497a30f41de 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -108,6 +108,20 @@ ENDPATCH(xen_restore_fl_direct)
108 RELOC(xen_restore_fl_direct, 2b+1) 108 RELOC(xen_restore_fl_direct, 2b+1)
109 109
110/* 110/*
111 We can't use sysexit directly, because we're not running in ring0.
112 But we can easily fake it up using iret. Assuming xen_sysexit
113 is jumped to with a standard stack frame, we can just strip it
114 back to a standard iret frame and use iret.
115 */
116ENTRY(xen_sysexit)
117 movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
118 orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
119 lea PT_EIP(%esp), %esp
120
121 jmp xen_iret
122ENDPROC(xen_sysexit)
123
124/*
111 This is run where a normal iret would be run, with the same stack setup: 125 This is run where a normal iret would be run, with the same stack setup:
112 8: eflags 126 8: eflags
113 4: cs 127 4: cs
@@ -184,8 +198,12 @@ iret_restore_end:
184 region is OK. */ 198 region is OK. */
185 je xen_hypervisor_callback 199 je xen_hypervisor_callback
186 200
187 iret 2011: iret
188xen_iret_end_crit: 202xen_iret_end_crit:
203.section __ex_table,"a"
204 .align 4
205 .long 1b,iret_exc
206.previous
189 207
190hyper_iret: 208hyper_iret:
191 /* put this out of line since its very rarely used */ 209 /* put this out of line since its very rarely used */
@@ -219,9 +237,7 @@ hyper_iret:
219 ds } SAVE_ALL state 237 ds } SAVE_ALL state
220 eax } 238 eax }
221 : : 239 : :
222 ebx } 240 ebx }<- esp
223 ----------------
224 return addr <- esp
225 ---------------- 241 ----------------
226 242
227 In order to deliver the nested exception properly, we need to shift 243 In order to deliver the nested exception properly, we need to shift
@@ -236,10 +252,8 @@ hyper_iret:
236 it's usermode state which we eventually need to restore. 252 it's usermode state which we eventually need to restore.
237 */ 253 */
238ENTRY(xen_iret_crit_fixup) 254ENTRY(xen_iret_crit_fixup)
239 /* offsets +4 for return address */
240
241 /* 255 /*
242 Paranoia: Make sure we're really coming from userspace. 256 Paranoia: Make sure we're really coming from kernel space.
243 One could imagine a case where userspace jumps into the 257 One could imagine a case where userspace jumps into the
244 critical range address, but just before the CPU delivers a GP, 258 critical range address, but just before the CPU delivers a GP,
245 it decides to deliver an interrupt instead. Unlikely? 259 it decides to deliver an interrupt instead. Unlikely?
@@ -248,32 +262,32 @@ ENTRY(xen_iret_crit_fixup)
248 jump instruction itself, not the destination, but some virtual 262 jump instruction itself, not the destination, but some virtual
249 environments get this wrong. 263 environments get this wrong.
250 */ 264 */
251 movl PT_CS+4(%esp), %ecx 265 movl PT_CS(%esp), %ecx
252 andl $SEGMENT_RPL_MASK, %ecx 266 andl $SEGMENT_RPL_MASK, %ecx
253 cmpl $USER_RPL, %ecx 267 cmpl $USER_RPL, %ecx
254 je 2f 268 je 2f
255 269
256 lea PT_ORIG_EAX+4(%esp), %esi 270 lea PT_ORIG_EAX(%esp), %esi
257 lea PT_EFLAGS+4(%esp), %edi 271 lea PT_EFLAGS(%esp), %edi
258 272
259 /* If eip is before iret_restore_end then stack 273 /* If eip is before iret_restore_end then stack
260 hasn't been restored yet. */ 274 hasn't been restored yet. */
261 cmp $iret_restore_end, %eax 275 cmp $iret_restore_end, %eax
262 jae 1f 276 jae 1f
263 277
264 movl 0+4(%edi),%eax /* copy EAX */ 278 movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */
265 movl %eax, PT_EAX+4(%esp) 279 movl %eax, PT_EAX(%esp)
266 280
267 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ 281 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
268 282
269 /* set up the copy */ 283 /* set up the copy */
2701: std 2841: std
271 mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */ 285 mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
272 rep movsl 286 rep movsl
273 cld 287 cld
274 288
275 lea 4(%edi),%esp /* point esp to new frame */ 289 lea 4(%edi),%esp /* point esp to new frame */
2762: ret 2902: jmp xen_do_upcall
277 291
278 292
279/* 293/*
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 956a491ea998..f1063ae08037 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -2,6 +2,8 @@
2#define XEN_OPS_H 2#define XEN_OPS_H
3 3
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/irqreturn.h>
6#include <xen/xen-ops.h>
5 7
6/* These are code, but not functions. Defined in entry.S */ 8/* These are code, but not functions. Defined in entry.S */
7extern const char xen_hypervisor_callback[]; 9extern const char xen_hypervisor_callback[];
@@ -9,7 +11,6 @@ extern const char xen_failsafe_callback[];
9 11
10void xen_copy_trap_info(struct trap_info *traps); 12void xen_copy_trap_info(struct trap_info *traps);
11 13
12DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
13DECLARE_PER_CPU(unsigned long, xen_cr3); 14DECLARE_PER_CPU(unsigned long, xen_cr3);
14DECLARE_PER_CPU(unsigned long, xen_current_cr3); 15DECLARE_PER_CPU(unsigned long, xen_current_cr3);
15 16
@@ -19,6 +20,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
19char * __init xen_memory_setup(void); 20char * __init xen_memory_setup(void);
20void __init xen_arch_setup(void); 21void __init xen_arch_setup(void);
21void __init xen_init_IRQ(void); 22void __init xen_init_IRQ(void);
23void xen_enable_sysenter(void);
22 24
23void xen_setup_timer(int cpu); 25void xen_setup_timer(int cpu);
24void xen_setup_cpu_clockevents(void); 26void xen_setup_cpu_clockevents(void);
@@ -28,6 +30,8 @@ unsigned long xen_get_wallclock(void);
28int xen_set_wallclock(unsigned long time); 30int xen_set_wallclock(unsigned long time);
29unsigned long long xen_sched_clock(void); 31unsigned long long xen_sched_clock(void);
30 32
33irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
34
31bool xen_vcpu_stolen(int vcpu); 35bool xen_vcpu_stolen(int vcpu);
32 36
33void xen_mark_init_mm_pinned(void); 37void xen_mark_init_mm_pinned(void);
@@ -64,4 +68,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
64DECL_ASM(void, xen_restore_fl_direct, unsigned long); 68DECL_ASM(void, xen_restore_fl_direct, unsigned long);
65 69
66void xen_iret(void); 70void xen_iret(void);
71void xen_sysexit(void);
72
67#endif /* XEN_OPS_H */ 73#endif /* XEN_OPS_H */
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3a0e3549739f..80f0ec91e2cf 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -97,4 +97,6 @@ source "drivers/dca/Kconfig"
97source "drivers/auxdisplay/Kconfig" 97source "drivers/auxdisplay/Kconfig"
98 98
99source "drivers/uio/Kconfig" 99source "drivers/uio/Kconfig"
100
101source "drivers/xen/Kconfig"
100endmenu 102endmenu
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9c6f3f99208d..d771da816d95 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -47,6 +47,7 @@
47 47
48#include <xen/interface/grant_table.h> 48#include <xen/interface/grant_table.h>
49#include <xen/interface/io/blkif.h> 49#include <xen/interface/io/blkif.h>
50#include <xen/interface/io/protocols.h>
50 51
51#include <asm/xen/hypervisor.h> 52#include <asm/xen/hypervisor.h>
52 53
@@ -74,7 +75,6 @@ static struct block_device_operations xlvbd_block_fops;
74struct blkfront_info 75struct blkfront_info
75{ 76{
76 struct xenbus_device *xbdev; 77 struct xenbus_device *xbdev;
77 dev_t dev;
78 struct gendisk *gd; 78 struct gendisk *gd;
79 int vdevice; 79 int vdevice;
80 blkif_vdev_t handle; 80 blkif_vdev_t handle;
@@ -88,6 +88,7 @@ struct blkfront_info
88 struct blk_shadow shadow[BLK_RING_SIZE]; 88 struct blk_shadow shadow[BLK_RING_SIZE];
89 unsigned long shadow_free; 89 unsigned long shadow_free;
90 int feature_barrier; 90 int feature_barrier;
91 int is_ready;
91 92
92 /** 93 /**
93 * The number of people holding this device open. We won't allow a 94 * The number of people holding this device open. We won't allow a
@@ -614,6 +615,12 @@ again:
614 message = "writing event-channel"; 615 message = "writing event-channel";
615 goto abort_transaction; 616 goto abort_transaction;
616 } 617 }
618 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
619 XEN_IO_PROTO_ABI_NATIVE);
620 if (err) {
621 message = "writing protocol";
622 goto abort_transaction;
623 }
617 624
618 err = xenbus_transaction_end(xbt, 0); 625 err = xenbus_transaction_end(xbt, 0);
619 if (err) { 626 if (err) {
@@ -833,6 +840,8 @@ static void blkfront_connect(struct blkfront_info *info)
833 spin_unlock_irq(&blkif_io_lock); 840 spin_unlock_irq(&blkif_io_lock);
834 841
835 add_disk(info->gd); 842 add_disk(info->gd);
843
844 info->is_ready = 1;
836} 845}
837 846
838/** 847/**
@@ -896,7 +905,7 @@ static void backend_changed(struct xenbus_device *dev,
896 break; 905 break;
897 906
898 case XenbusStateClosing: 907 case XenbusStateClosing:
899 bd = bdget(info->dev); 908 bd = bdget_disk(info->gd, 0);
900 if (bd == NULL) 909 if (bd == NULL)
901 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 910 xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
902 911
@@ -925,6 +934,13 @@ static int blkfront_remove(struct xenbus_device *dev)
925 return 0; 934 return 0;
926} 935}
927 936
937static int blkfront_is_ready(struct xenbus_device *dev)
938{
939 struct blkfront_info *info = dev->dev.driver_data;
940
941 return info->is_ready;
942}
943
928static int blkif_open(struct inode *inode, struct file *filep) 944static int blkif_open(struct inode *inode, struct file *filep)
929{ 945{
930 struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; 946 struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
@@ -971,6 +987,7 @@ static struct xenbus_driver blkfront = {
971 .remove = blkfront_remove, 987 .remove = blkfront_remove,
972 .resume = blkfront_resume, 988 .resume = blkfront_resume,
973 .otherend_changed = backend_changed, 989 .otherend_changed = backend_changed,
990 .is_ready = blkfront_is_ready,
974}; 991};
975 992
976static int __init xlblk_init(void) 993static int __init xlblk_init(void)
@@ -998,3 +1015,5 @@ module_exit(xlblk_exit);
998MODULE_DESCRIPTION("Xen virtual block device frontend"); 1015MODULE_DESCRIPTION("Xen virtual block device frontend");
999MODULE_LICENSE("GPL"); 1016MODULE_LICENSE("GPL");
1000MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); 1017MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
1018MODULE_ALIAS("xen:vbd");
1019MODULE_ALIAS("xenblk");
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index 9dea14db724c..5f9d860925a1 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -149,6 +149,15 @@ config INPUT_APMPOWER
149 To compile this driver as a module, choose M here: the 149 To compile this driver as a module, choose M here: the
150 module will be called apm-power. 150 module will be called apm-power.
151 151
152config XEN_KBDDEV_FRONTEND
153 tristate "Xen virtual keyboard and mouse support"
154 depends on XEN_FBDEV_FRONTEND
155 default y
156 help
157 This driver implements the front-end of the Xen virtual
158 keyboard and mouse device driver. It communicates with a back-end
159 in another domain.
160
152comment "Input Device Drivers" 161comment "Input Device Drivers"
153 162
154source "drivers/input/keyboard/Kconfig" 163source "drivers/input/keyboard/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 2ae87b19caa8..98c4f9a77876 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -23,3 +23,5 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN) += touchscreen/
23obj-$(CONFIG_INPUT_MISC) += misc/ 23obj-$(CONFIG_INPUT_MISC) += misc/
24 24
25obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o 25obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o
26
27obj-$(CONFIG_XEN_KBDDEV_FRONTEND) += xen-kbdfront.o
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
new file mode 100644
index 000000000000..0f47f4697cdf
--- /dev/null
+++ b/drivers/input/xen-kbdfront.c
@@ -0,0 +1,340 @@
1/*
2 * Xen para-virtual input device
3 *
4 * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
5 * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
6 *
7 * Based on linux/drivers/input/mouse/sermouse.c
8 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file COPYING in the main directory of this archive for
11 * more details.
12 */
13
14/*
15 * TODO:
16 *
17 * Switch to grant tables together with xen-fbfront.c.
18 */
19
20#include <linux/kernel.h>
21#include <linux/errno.h>
22#include <linux/module.h>
23#include <linux/input.h>
24#include <asm/xen/hypervisor.h>
25#include <xen/events.h>
26#include <xen/page.h>
27#include <xen/interface/io/fbif.h>
28#include <xen/interface/io/kbdif.h>
29#include <xen/xenbus.h>
30
31struct xenkbd_info {
32 struct input_dev *kbd;
33 struct input_dev *ptr;
34 struct xenkbd_page *page;
35 int irq;
36 struct xenbus_device *xbdev;
37 char phys[32];
38};
39
40static int xenkbd_remove(struct xenbus_device *);
41static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
42static void xenkbd_disconnect_backend(struct xenkbd_info *);
43
44/*
45 * Note: if you need to send out events, see xenfb_do_update() for how
46 * to do that.
47 */
48
49static irqreturn_t input_handler(int rq, void *dev_id)
50{
51 struct xenkbd_info *info = dev_id;
52 struct xenkbd_page *page = info->page;
53 __u32 cons, prod;
54
55 prod = page->in_prod;
56 if (prod == page->in_cons)
57 return IRQ_HANDLED;
58 rmb(); /* ensure we see ring contents up to prod */
59 for (cons = page->in_cons; cons != prod; cons++) {
60 union xenkbd_in_event *event;
61 struct input_dev *dev;
62 event = &XENKBD_IN_RING_REF(page, cons);
63
64 dev = info->ptr;
65 switch (event->type) {
66 case XENKBD_TYPE_MOTION:
67 input_report_rel(dev, REL_X, event->motion.rel_x);
68 input_report_rel(dev, REL_Y, event->motion.rel_y);
69 break;
70 case XENKBD_TYPE_KEY:
71 dev = NULL;
72 if (test_bit(event->key.keycode, info->kbd->keybit))
73 dev = info->kbd;
74 if (test_bit(event->key.keycode, info->ptr->keybit))
75 dev = info->ptr;
76 if (dev)
77 input_report_key(dev, event->key.keycode,
78 event->key.pressed);
79 else
80 printk(KERN_WARNING
81 "xenkbd: unhandled keycode 0x%x\n",
82 event->key.keycode);
83 break;
84 case XENKBD_TYPE_POS:
85 input_report_abs(dev, ABS_X, event->pos.abs_x);
86 input_report_abs(dev, ABS_Y, event->pos.abs_y);
87 break;
88 }
89 if (dev)
90 input_sync(dev);
91 }
92 mb(); /* ensure we got ring contents */
93 page->in_cons = cons;
94 notify_remote_via_irq(info->irq);
95
96 return IRQ_HANDLED;
97}
98
99static int __devinit xenkbd_probe(struct xenbus_device *dev,
100 const struct xenbus_device_id *id)
101{
102 int ret, i;
103 struct xenkbd_info *info;
104 struct input_dev *kbd, *ptr;
105
106 info = kzalloc(sizeof(*info), GFP_KERNEL);
107 if (!info) {
108 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
109 return -ENOMEM;
110 }
111 dev->dev.driver_data = info;
112 info->xbdev = dev;
113 info->irq = -1;
114 snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
115
116 info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
117 if (!info->page)
118 goto error_nomem;
119
120 /* keyboard */
121 kbd = input_allocate_device();
122 if (!kbd)
123 goto error_nomem;
124 kbd->name = "Xen Virtual Keyboard";
125 kbd->phys = info->phys;
126 kbd->id.bustype = BUS_PCI;
127 kbd->id.vendor = 0x5853;
128 kbd->id.product = 0xffff;
129 kbd->evbit[0] = BIT(EV_KEY);
130 for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
131 set_bit(i, kbd->keybit);
132 for (i = KEY_OK; i < KEY_MAX; i++)
133 set_bit(i, kbd->keybit);
134
135 ret = input_register_device(kbd);
136 if (ret) {
137 input_free_device(kbd);
138 xenbus_dev_fatal(dev, ret, "input_register_device(kbd)");
139 goto error;
140 }
141 info->kbd = kbd;
142
143 /* pointing device */
144 ptr = input_allocate_device();
145 if (!ptr)
146 goto error_nomem;
147 ptr->name = "Xen Virtual Pointer";
148 ptr->phys = info->phys;
149 ptr->id.bustype = BUS_PCI;
150 ptr->id.vendor = 0x5853;
151 ptr->id.product = 0xfffe;
152 ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
153 for (i = BTN_LEFT; i <= BTN_TASK; i++)
154 set_bit(i, ptr->keybit);
155 ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y);
156 input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
157 input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
158
159 ret = input_register_device(ptr);
160 if (ret) {
161 input_free_device(ptr);
162 xenbus_dev_fatal(dev, ret, "input_register_device(ptr)");
163 goto error;
164 }
165 info->ptr = ptr;
166
167 ret = xenkbd_connect_backend(dev, info);
168 if (ret < 0)
169 goto error;
170
171 return 0;
172
173 error_nomem:
174 ret = -ENOMEM;
175 xenbus_dev_fatal(dev, ret, "allocating device memory");
176 error:
177 xenkbd_remove(dev);
178 return ret;
179}
180
181static int xenkbd_resume(struct xenbus_device *dev)
182{
183 struct xenkbd_info *info = dev->dev.driver_data;
184
185 xenkbd_disconnect_backend(info);
186 memset(info->page, 0, PAGE_SIZE);
187 return xenkbd_connect_backend(dev, info);
188}
189
190static int xenkbd_remove(struct xenbus_device *dev)
191{
192 struct xenkbd_info *info = dev->dev.driver_data;
193
194 xenkbd_disconnect_backend(info);
195 if (info->kbd)
196 input_unregister_device(info->kbd);
197 if (info->ptr)
198 input_unregister_device(info->ptr);
199 free_page((unsigned long)info->page);
200 kfree(info);
201 return 0;
202}
203
204static int xenkbd_connect_backend(struct xenbus_device *dev,
205 struct xenkbd_info *info)
206{
207 int ret, evtchn;
208 struct xenbus_transaction xbt;
209
210 ret = xenbus_alloc_evtchn(dev, &evtchn);
211 if (ret)
212 return ret;
213 ret = bind_evtchn_to_irqhandler(evtchn, input_handler,
214 0, dev->devicetype, info);
215 if (ret < 0) {
216 xenbus_free_evtchn(dev, evtchn);
217 xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler");
218 return ret;
219 }
220 info->irq = ret;
221
222 again:
223 ret = xenbus_transaction_start(&xbt);
224 if (ret) {
225 xenbus_dev_fatal(dev, ret, "starting transaction");
226 return ret;
227 }
228 ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
229 virt_to_mfn(info->page));
230 if (ret)
231 goto error_xenbus;
232 ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
233 evtchn);
234 if (ret)
235 goto error_xenbus;
236 ret = xenbus_transaction_end(xbt, 0);
237 if (ret) {
238 if (ret == -EAGAIN)
239 goto again;
240 xenbus_dev_fatal(dev, ret, "completing transaction");
241 return ret;
242 }
243
244 xenbus_switch_state(dev, XenbusStateInitialised);
245 return 0;
246
247 error_xenbus:
248 xenbus_transaction_end(xbt, 1);
249 xenbus_dev_fatal(dev, ret, "writing xenstore");
250 return ret;
251}
252
253static void xenkbd_disconnect_backend(struct xenkbd_info *info)
254{
255 if (info->irq >= 0)
256 unbind_from_irqhandler(info->irq, info);
257 info->irq = -1;
258}
259
260static void xenkbd_backend_changed(struct xenbus_device *dev,
261 enum xenbus_state backend_state)
262{
263 struct xenkbd_info *info = dev->dev.driver_data;
264 int ret, val;
265
266 switch (backend_state) {
267 case XenbusStateInitialising:
268 case XenbusStateInitialised:
269 case XenbusStateUnknown:
270 case XenbusStateClosed:
271 break;
272
273 case XenbusStateInitWait:
274InitWait:
275 ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
276 "feature-abs-pointer", "%d", &val);
277 if (ret < 0)
278 val = 0;
279 if (val) {
280 ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
281 "request-abs-pointer", "1");
282 if (ret)
283 printk(KERN_WARNING
284 "xenkbd: can't request abs-pointer");
285 }
286 xenbus_switch_state(dev, XenbusStateConnected);
287 break;
288
289 case XenbusStateConnected:
290 /*
291 * Work around xenbus race condition: If backend goes
292 * through InitWait to Connected fast enough, we can
293 * get Connected twice here.
294 */
295 if (dev->state != XenbusStateConnected)
296 goto InitWait; /* no InitWait seen yet, fudge it */
297 break;
298
299 case XenbusStateClosing:
300 xenbus_frontend_closed(dev);
301 break;
302 }
303}
304
305static struct xenbus_device_id xenkbd_ids[] = {
306 { "vkbd" },
307 { "" }
308};
309
310static struct xenbus_driver xenkbd = {
311 .name = "vkbd",
312 .owner = THIS_MODULE,
313 .ids = xenkbd_ids,
314 .probe = xenkbd_probe,
315 .remove = xenkbd_remove,
316 .resume = xenkbd_resume,
317 .otherend_changed = xenkbd_backend_changed,
318};
319
320static int __init xenkbd_init(void)
321{
322 if (!is_running_on_xen())
323 return -ENODEV;
324
325 /* Nothing to do if running in dom0. */
326 if (is_initial_xendomain())
327 return -ENODEV;
328
329 return xenbus_register_frontend(&xenkbd);
330}
331
332static void __exit xenkbd_cleanup(void)
333{
334 xenbus_unregister_driver(&xenkbd);
335}
336
337module_init(xenkbd_init);
338module_exit(xenkbd_cleanup);
339
340MODULE_LICENSE("GPL");
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 7483d45bc5bc..e62018a36133 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1809,3 +1809,5 @@ module_exit(netif_exit);
1809 1809
1810MODULE_DESCRIPTION("Xen virtual network device frontend"); 1810MODULE_DESCRIPTION("Xen virtual network device frontend");
1811MODULE_LICENSE("GPL"); 1811MODULE_LICENSE("GPL");
1812MODULE_ALIAS("xen:vif");
1813MODULE_ALIAS("xennet");
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 1bd5fb30237d..e3dc8f8d0c3e 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1930,6 +1930,20 @@ config FB_VIRTUAL
1930 1930
1931 If unsure, say N. 1931 If unsure, say N.
1932 1932
1933config XEN_FBDEV_FRONTEND
1934 tristate "Xen virtual frame buffer support"
1935 depends on FB && XEN
1936 select FB_SYS_FILLRECT
1937 select FB_SYS_COPYAREA
1938 select FB_SYS_IMAGEBLIT
1939 select FB_SYS_FOPS
1940 select FB_DEFERRED_IO
1941 default y
1942 help
1943 This driver implements the front-end of the Xen virtual
1944 frame buffer driver. It communicates with a back-end
1945 in another domain.
1946
1933source "drivers/video/omap/Kconfig" 1947source "drivers/video/omap/Kconfig"
1934 1948
1935source "drivers/video/backlight/Kconfig" 1949source "drivers/video/backlight/Kconfig"
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 11c0e5e05f21..f172b9b73314 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -114,6 +114,7 @@ obj-$(CONFIG_FB_PS3) += ps3fb.o
114obj-$(CONFIG_FB_SM501) += sm501fb.o 114obj-$(CONFIG_FB_SM501) += sm501fb.o
115obj-$(CONFIG_FB_XILINX) += xilinxfb.o 115obj-$(CONFIG_FB_XILINX) += xilinxfb.o
116obj-$(CONFIG_FB_OMAP) += omap/ 116obj-$(CONFIG_FB_OMAP) += omap/
117obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o
117 118
118# Platform or fallback drivers go here 119# Platform or fallback drivers go here
119obj-$(CONFIG_FB_UVESA) += uvesafb.o 120obj-$(CONFIG_FB_UVESA) += uvesafb.o
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
new file mode 100644
index 000000000000..619a6f8d65a2
--- /dev/null
+++ b/drivers/video/xen-fbfront.c
@@ -0,0 +1,550 @@
1/*
2 * Xen para-virtual frame buffer device
3 *
4 * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com>
5 * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
6 *
7 * Based on linux/drivers/video/q40fb.c
8 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file COPYING in the main directory of this archive for
11 * more details.
12 */
13
14/*
15 * TODO:
16 *
17 * Switch to grant tables when they become capable of dealing with the
18 * frame buffer.
19 */
20
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/fb.h>
24#include <linux/module.h>
25#include <linux/vmalloc.h>
26#include <linux/mm.h>
27#include <asm/xen/hypervisor.h>
28#include <xen/events.h>
29#include <xen/page.h>
30#include <xen/interface/io/fbif.h>
31#include <xen/interface/io/protocols.h>
32#include <xen/xenbus.h>
33
34struct xenfb_info {
35 unsigned char *fb;
36 struct fb_info *fb_info;
37 int x1, y1, x2, y2; /* dirty rectangle,
38 protected by dirty_lock */
39 spinlock_t dirty_lock;
40 int nr_pages;
41 int irq;
42 struct xenfb_page *page;
43 unsigned long *mfns;
44 int update_wanted; /* XENFB_TYPE_UPDATE wanted */
45
46 struct xenbus_device *xbdev;
47};
48
49static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8;
50
51static int xenfb_remove(struct xenbus_device *);
52static void xenfb_init_shared_page(struct xenfb_info *);
53static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
54static void xenfb_disconnect_backend(struct xenfb_info *);
55
56static void xenfb_do_update(struct xenfb_info *info,
57 int x, int y, int w, int h)
58{
59 union xenfb_out_event event;
60 u32 prod;
61
62 event.type = XENFB_TYPE_UPDATE;
63 event.update.x = x;
64 event.update.y = y;
65 event.update.width = w;
66 event.update.height = h;
67
68 prod = info->page->out_prod;
69 /* caller ensures !xenfb_queue_full() */
70 mb(); /* ensure ring space available */
71 XENFB_OUT_RING_REF(info->page, prod) = event;
72 wmb(); /* ensure ring contents visible */
73 info->page->out_prod = prod + 1;
74
75 notify_remote_via_irq(info->irq);
76}
77
78static int xenfb_queue_full(struct xenfb_info *info)
79{
80 u32 cons, prod;
81
82 prod = info->page->out_prod;
83 cons = info->page->out_cons;
84 return prod - cons == XENFB_OUT_RING_LEN;
85}
86
87static void xenfb_refresh(struct xenfb_info *info,
88 int x1, int y1, int w, int h)
89{
90 unsigned long flags;
91 int y2 = y1 + h - 1;
92 int x2 = x1 + w - 1;
93
94 if (!info->update_wanted)
95 return;
96
97 spin_lock_irqsave(&info->dirty_lock, flags);
98
99 /* Combine with dirty rectangle: */
100 if (info->y1 < y1)
101 y1 = info->y1;
102 if (info->y2 > y2)
103 y2 = info->y2;
104 if (info->x1 < x1)
105 x1 = info->x1;
106 if (info->x2 > x2)
107 x2 = info->x2;
108
109 if (xenfb_queue_full(info)) {
110 /* Can't send right now, stash it in the dirty rectangle */
111 info->x1 = x1;
112 info->x2 = x2;
113 info->y1 = y1;
114 info->y2 = y2;
115 spin_unlock_irqrestore(&info->dirty_lock, flags);
116 return;
117 }
118
119 /* Clear dirty rectangle: */
120 info->x1 = info->y1 = INT_MAX;
121 info->x2 = info->y2 = 0;
122
123 spin_unlock_irqrestore(&info->dirty_lock, flags);
124
125 if (x1 <= x2 && y1 <= y2)
126 xenfb_do_update(info, x1, y1, x2 - x1 + 1, y2 - y1 + 1);
127}
128
129static void xenfb_deferred_io(struct fb_info *fb_info,
130 struct list_head *pagelist)
131{
132 struct xenfb_info *info = fb_info->par;
133 struct page *page;
134 unsigned long beg, end;
135 int y1, y2, miny, maxy;
136
137 miny = INT_MAX;
138 maxy = 0;
139 list_for_each_entry(page, pagelist, lru) {
140 beg = page->index << PAGE_SHIFT;
141 end = beg + PAGE_SIZE - 1;
142 y1 = beg / fb_info->fix.line_length;
143 y2 = end / fb_info->fix.line_length;
144 if (y2 >= fb_info->var.yres)
145 y2 = fb_info->var.yres - 1;
146 if (miny > y1)
147 miny = y1;
148 if (maxy < y2)
149 maxy = y2;
150 }
151 xenfb_refresh(info, 0, miny, fb_info->var.xres, maxy - miny + 1);
152}
153
154static struct fb_deferred_io xenfb_defio = {
155 .delay = HZ / 20,
156 .deferred_io = xenfb_deferred_io,
157};
158
159static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green,
160 unsigned blue, unsigned transp,
161 struct fb_info *info)
162{
163 u32 v;
164
165 if (regno > info->cmap.len)
166 return 1;
167
168#define CNVT_TOHW(val, width) ((((val)<<(width))+0x7FFF-(val))>>16)
169 red = CNVT_TOHW(red, info->var.red.length);
170 green = CNVT_TOHW(green, info->var.green.length);
171 blue = CNVT_TOHW(blue, info->var.blue.length);
172 transp = CNVT_TOHW(transp, info->var.transp.length);
173#undef CNVT_TOHW
174
175 v = (red << info->var.red.offset) |
176 (green << info->var.green.offset) |
177 (blue << info->var.blue.offset);
178
179 switch (info->var.bits_per_pixel) {
180 case 16:
181 case 24:
182 case 32:
183 ((u32 *)info->pseudo_palette)[regno] = v;
184 break;
185 }
186
187 return 0;
188}
189
190static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
191{
192 struct xenfb_info *info = p->par;
193
194 sys_fillrect(p, rect);
195 xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height);
196}
197
198static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image)
199{
200 struct xenfb_info *info = p->par;
201
202 sys_imageblit(p, image);
203 xenfb_refresh(info, image->dx, image->dy, image->width, image->height);
204}
205
206static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
207{
208 struct xenfb_info *info = p->par;
209
210 sys_copyarea(p, area);
211 xenfb_refresh(info, area->dx, area->dy, area->width, area->height);
212}
213
214static ssize_t xenfb_write(struct fb_info *p, const char __user *buf,
215 size_t count, loff_t *ppos)
216{
217 struct xenfb_info *info = p->par;
218 ssize_t res;
219
220 res = fb_sys_write(p, buf, count, ppos);
221 xenfb_refresh(info, 0, 0, info->page->width, info->page->height);
222 return res;
223}
224
225static struct fb_ops xenfb_fb_ops = {
226 .owner = THIS_MODULE,
227 .fb_read = fb_sys_read,
228 .fb_write = xenfb_write,
229 .fb_setcolreg = xenfb_setcolreg,
230 .fb_fillrect = xenfb_fillrect,
231 .fb_copyarea = xenfb_copyarea,
232 .fb_imageblit = xenfb_imageblit,
233};
234
235static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
236{
237 /*
238 * No in events recognized, simply ignore them all.
239 * If you need to recognize some, see xen-kbdfront's
240 * input_handler() for how to do that.
241 */
242 struct xenfb_info *info = dev_id;
243 struct xenfb_page *page = info->page;
244
245 if (page->in_cons != page->in_prod) {
246 info->page->in_cons = info->page->in_prod;
247 notify_remote_via_irq(info->irq);
248 }
249
250 /* Flush dirty rectangle: */
251 xenfb_refresh(info, INT_MAX, INT_MAX, -INT_MAX, -INT_MAX);
252
253 return IRQ_HANDLED;
254}
255
256static int __devinit xenfb_probe(struct xenbus_device *dev,
257 const struct xenbus_device_id *id)
258{
259 struct xenfb_info *info;
260 struct fb_info *fb_info;
261 int ret;
262
263 info = kzalloc(sizeof(*info), GFP_KERNEL);
264 if (info == NULL) {
265 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
266 return -ENOMEM;
267 }
268 dev->dev.driver_data = info;
269 info->xbdev = dev;
270 info->irq = -1;
271 info->x1 = info->y1 = INT_MAX;
272 spin_lock_init(&info->dirty_lock);
273
274 info->fb = vmalloc(xenfb_mem_len);
275 if (info->fb == NULL)
276 goto error_nomem;
277 memset(info->fb, 0, xenfb_mem_len);
278
279 info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
280
281 info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
282 if (!info->mfns)
283 goto error_nomem;
284
285 /* set up shared page */
286 info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
287 if (!info->page)
288 goto error_nomem;
289
290 xenfb_init_shared_page(info);
291
292 /* abusing framebuffer_alloc() to allocate pseudo_palette */
293 fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
294 if (fb_info == NULL)
295 goto error_nomem;
296
297 /* complete the abuse: */
298 fb_info->pseudo_palette = fb_info->par;
299 fb_info->par = info;
300
301 fb_info->screen_base = info->fb;
302
303 fb_info->fbops = &xenfb_fb_ops;
304 fb_info->var.xres_virtual = fb_info->var.xres = info->page->width;
305 fb_info->var.yres_virtual = fb_info->var.yres = info->page->height;
306 fb_info->var.bits_per_pixel = info->page->depth;
307
308 fb_info->var.red = (struct fb_bitfield){16, 8, 0};
309 fb_info->var.green = (struct fb_bitfield){8, 8, 0};
310 fb_info->var.blue = (struct fb_bitfield){0, 8, 0};
311
312 fb_info->var.activate = FB_ACTIVATE_NOW;
313 fb_info->var.height = -1;
314 fb_info->var.width = -1;
315 fb_info->var.vmode = FB_VMODE_NONINTERLACED;
316
317 fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
318 fb_info->fix.line_length = info->page->line_length;
319 fb_info->fix.smem_start = 0;
320 fb_info->fix.smem_len = xenfb_mem_len;
321 strcpy(fb_info->fix.id, "xen");
322 fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
323 fb_info->fix.accel = FB_ACCEL_NONE;
324
325 fb_info->flags = FBINFO_FLAG_DEFAULT;
326
327 ret = fb_alloc_cmap(&fb_info->cmap, 256, 0);
328 if (ret < 0) {
329 framebuffer_release(fb_info);
330 xenbus_dev_fatal(dev, ret, "fb_alloc_cmap");
331 goto error;
332 }
333
334 fb_info->fbdefio = &xenfb_defio;
335 fb_deferred_io_init(fb_info);
336
337 ret = register_framebuffer(fb_info);
338 if (ret) {
339 fb_deferred_io_cleanup(fb_info);
340 fb_dealloc_cmap(&fb_info->cmap);
341 framebuffer_release(fb_info);
342 xenbus_dev_fatal(dev, ret, "register_framebuffer");
343 goto error;
344 }
345 info->fb_info = fb_info;
346
347 ret = xenfb_connect_backend(dev, info);
348 if (ret < 0)
349 goto error;
350
351 return 0;
352
353 error_nomem:
354 ret = -ENOMEM;
355 xenbus_dev_fatal(dev, ret, "allocating device memory");
356 error:
357 xenfb_remove(dev);
358 return ret;
359}
360
361static int xenfb_resume(struct xenbus_device *dev)
362{
363 struct xenfb_info *info = dev->dev.driver_data;
364
365 xenfb_disconnect_backend(info);
366 xenfb_init_shared_page(info);
367 return xenfb_connect_backend(dev, info);
368}
369
370static int xenfb_remove(struct xenbus_device *dev)
371{
372 struct xenfb_info *info = dev->dev.driver_data;
373
374 xenfb_disconnect_backend(info);
375 if (info->fb_info) {
376 fb_deferred_io_cleanup(info->fb_info);
377 unregister_framebuffer(info->fb_info);
378 fb_dealloc_cmap(&info->fb_info->cmap);
379 framebuffer_release(info->fb_info);
380 }
381 free_page((unsigned long)info->page);
382 vfree(info->mfns);
383 vfree(info->fb);
384 kfree(info);
385
386 return 0;
387}
388
389static unsigned long vmalloc_to_mfn(void *address)
390{
391 return pfn_to_mfn(vmalloc_to_pfn(address));
392}
393
394static void xenfb_init_shared_page(struct xenfb_info *info)
395{
396 int i;
397
398 for (i = 0; i < info->nr_pages; i++)
399 info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
400
401 info->page->pd[0] = vmalloc_to_mfn(info->mfns);
402 info->page->pd[1] = 0;
403 info->page->width = XENFB_WIDTH;
404 info->page->height = XENFB_HEIGHT;
405 info->page->depth = XENFB_DEPTH;
406 info->page->line_length = (info->page->depth / 8) * info->page->width;
407 info->page->mem_length = xenfb_mem_len;
408 info->page->in_cons = info->page->in_prod = 0;
409 info->page->out_cons = info->page->out_prod = 0;
410}
411
412static int xenfb_connect_backend(struct xenbus_device *dev,
413 struct xenfb_info *info)
414{
415 int ret, evtchn;
416 struct xenbus_transaction xbt;
417
418 ret = xenbus_alloc_evtchn(dev, &evtchn);
419 if (ret)
420 return ret;
421 ret = bind_evtchn_to_irqhandler(evtchn, xenfb_event_handler,
422 0, dev->devicetype, info);
423 if (ret < 0) {
424 xenbus_free_evtchn(dev, evtchn);
425 xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler");
426 return ret;
427 }
428 info->irq = ret;
429
430 again:
431 ret = xenbus_transaction_start(&xbt);
432 if (ret) {
433 xenbus_dev_fatal(dev, ret, "starting transaction");
434 return ret;
435 }
436 ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
437 virt_to_mfn(info->page));
438 if (ret)
439 goto error_xenbus;
440 ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
441 evtchn);
442 if (ret)
443 goto error_xenbus;
444 ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
445 XEN_IO_PROTO_ABI_NATIVE);
446 if (ret)
447 goto error_xenbus;
448 ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1");
449 if (ret)
450 goto error_xenbus;
451 ret = xenbus_transaction_end(xbt, 0);
452 if (ret) {
453 if (ret == -EAGAIN)
454 goto again;
455 xenbus_dev_fatal(dev, ret, "completing transaction");
456 return ret;
457 }
458
459 xenbus_switch_state(dev, XenbusStateInitialised);
460 return 0;
461
462 error_xenbus:
463 xenbus_transaction_end(xbt, 1);
464 xenbus_dev_fatal(dev, ret, "writing xenstore");
465 return ret;
466}
467
468static void xenfb_disconnect_backend(struct xenfb_info *info)
469{
470 if (info->irq >= 0)
471 unbind_from_irqhandler(info->irq, info);
472 info->irq = -1;
473}
474
475static void xenfb_backend_changed(struct xenbus_device *dev,
476 enum xenbus_state backend_state)
477{
478 struct xenfb_info *info = dev->dev.driver_data;
479 int val;
480
481 switch (backend_state) {
482 case XenbusStateInitialising:
483 case XenbusStateInitialised:
484 case XenbusStateUnknown:
485 case XenbusStateClosed:
486 break;
487
488 case XenbusStateInitWait:
489InitWait:
490 xenbus_switch_state(dev, XenbusStateConnected);
491 break;
492
493 case XenbusStateConnected:
494 /*
495 * Work around xenbus race condition: If backend goes
496 * through InitWait to Connected fast enough, we can
497 * get Connected twice here.
498 */
499 if (dev->state != XenbusStateConnected)
500 goto InitWait; /* no InitWait seen yet, fudge it */
501
502 if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
503 "request-update", "%d", &val) < 0)
504 val = 0;
505 if (val)
506 info->update_wanted = 1;
507 break;
508
509 case XenbusStateClosing:
510 xenbus_frontend_closed(dev);
511 break;
512 }
513}
514
515static struct xenbus_device_id xenfb_ids[] = {
516 { "vfb" },
517 { "" }
518};
519
520static struct xenbus_driver xenfb = {
521 .name = "vfb",
522 .owner = THIS_MODULE,
523 .ids = xenfb_ids,
524 .probe = xenfb_probe,
525 .remove = xenfb_remove,
526 .resume = xenfb_resume,
527 .otherend_changed = xenfb_backend_changed,
528};
529
530static int __init xenfb_init(void)
531{
532 if (!is_running_on_xen())
533 return -ENODEV;
534
535 /* Nothing to do if running in dom0. */
536 if (is_initial_xendomain())
537 return -ENODEV;
538
539 return xenbus_register_frontend(&xenfb);
540}
541
542static void __exit xenfb_cleanup(void)
543{
544 xenbus_unregister_driver(&xenfb);
545}
546
547module_init(xenfb_init);
548module_exit(xenfb_cleanup);
549
550MODULE_LICENSE("GPL");
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
new file mode 100644
index 000000000000..4b75a16de009
--- /dev/null
+++ b/drivers/xen/Kconfig
@@ -0,0 +1,19 @@
1config XEN_BALLOON
2 bool "Xen memory balloon driver"
3 depends on XEN
4 default y
5 help
6 The balloon driver allows the Xen domain to request more memory from
7 the system to expand the domain's memory allocation, or alternatively
8 return unneeded memory to the system.
9
10config XEN_SCRUB_PAGES
11 bool "Scrub pages before returning them to system"
12 depends on XEN_BALLOON
13 default y
14 help
15 Scrub pages before returning them to the system for reuse by
16 other domains. This makes sure that any confidential data
17 is not accidentally visible to other domains. Is it more
18 secure, but slightly less efficient.
19 If in doubt, say yes.
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 56592f0d6cef..37af04f1ffd9 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,2 +1,4 @@
1obj-y += grant-table.o 1obj-y += grant-table.o features.o events.o
2obj-y += xenbus/ 2obj-y += xenbus/
3obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
4obj-$(CONFIG_XEN_BALLOON) += balloon.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
new file mode 100644
index 000000000000..ab25ba6cbbb9
--- /dev/null
+++ b/drivers/xen/balloon.c
@@ -0,0 +1,712 @@
1/******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
34
35#include <linux/kernel.h>
36#include <linux/module.h>
37#include <linux/sched.h>
38#include <linux/errno.h>
39#include <linux/mm.h>
40#include <linux/bootmem.h>
41#include <linux/pagemap.h>
42#include <linux/highmem.h>
43#include <linux/mutex.h>
44#include <linux/highmem.h>
45#include <linux/list.h>
46#include <linux/sysdev.h>
47
48#include <asm/xen/hypervisor.h>
49#include <asm/page.h>
50#include <asm/pgalloc.h>
51#include <asm/pgtable.h>
52#include <asm/uaccess.h>
53#include <asm/tlb.h>
54
55#include <xen/interface/memory.h>
56#include <xen/balloon.h>
57#include <xen/xenbus.h>
58#include <xen/features.h>
59#include <xen/page.h>
60
61#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
62
63#define BALLOON_CLASS_NAME "memory"
64
65struct balloon_stats {
66 /* We aim for 'current allocation' == 'target allocation'. */
67 unsigned long current_pages;
68 unsigned long target_pages;
69 /* We may hit the hard limit in Xen. If we do then we remember it. */
70 unsigned long hard_limit;
71 /*
72 * Drivers may alter the memory reservation independently, but they
73 * must inform the balloon driver so we avoid hitting the hard limit.
74 */
75 unsigned long driver_pages;
76 /* Number of pages in high- and low-memory balloons. */
77 unsigned long balloon_low;
78 unsigned long balloon_high;
79};
80
81static DEFINE_MUTEX(balloon_mutex);
82
83static struct sys_device balloon_sysdev;
84
85static int register_balloon(struct sys_device *sysdev);
86
87/*
88 * Protects atomic reservation decrease/increase against concurrent increases.
89 * Also protects non-atomic updates of current_pages and driver_pages, and
90 * balloon lists.
91 */
92static DEFINE_SPINLOCK(balloon_lock);
93
94static struct balloon_stats balloon_stats;
95
96/* We increase/decrease in batches which fit in a page */
97static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
98
99/* VM /proc information for memory */
100extern unsigned long totalram_pages;
101
102#ifdef CONFIG_HIGHMEM
103extern unsigned long totalhigh_pages;
104#define inc_totalhigh_pages() (totalhigh_pages++)
105#define dec_totalhigh_pages() (totalhigh_pages--)
106#else
107#define inc_totalhigh_pages() do {} while(0)
108#define dec_totalhigh_pages() do {} while(0)
109#endif
110
111/* List of ballooned pages, threaded through the mem_map array. */
112static LIST_HEAD(ballooned_pages);
113
114/* Main work function, always executed in process context. */
115static void balloon_process(struct work_struct *work);
116static DECLARE_WORK(balloon_worker, balloon_process);
117static struct timer_list balloon_timer;
118
119/* When ballooning out (allocating memory to return to Xen) we don't really
120 want the kernel to try too hard since that can trigger the oom killer. */
121#define GFP_BALLOON \
122 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
123
124static void scrub_page(struct page *page)
125{
126#ifdef CONFIG_XEN_SCRUB_PAGES
127 if (PageHighMem(page)) {
128 void *v = kmap(page);
129 clear_page(v);
130 kunmap(v);
131 } else {
132 void *v = page_address(page);
133 clear_page(v);
134 }
135#endif
136}
137
138/* balloon_append: add the given page to the balloon. */
139static void balloon_append(struct page *page)
140{
141 /* Lowmem is re-populated first, so highmem pages go at list tail. */
142 if (PageHighMem(page)) {
143 list_add_tail(&page->lru, &ballooned_pages);
144 balloon_stats.balloon_high++;
145 dec_totalhigh_pages();
146 } else {
147 list_add(&page->lru, &ballooned_pages);
148 balloon_stats.balloon_low++;
149 }
150}
151
152/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
153static struct page *balloon_retrieve(void)
154{
155 struct page *page;
156
157 if (list_empty(&ballooned_pages))
158 return NULL;
159
160 page = list_entry(ballooned_pages.next, struct page, lru);
161 list_del(&page->lru);
162
163 if (PageHighMem(page)) {
164 balloon_stats.balloon_high--;
165 inc_totalhigh_pages();
166 }
167 else
168 balloon_stats.balloon_low--;
169
170 return page;
171}
172
173static struct page *balloon_first_page(void)
174{
175 if (list_empty(&ballooned_pages))
176 return NULL;
177 return list_entry(ballooned_pages.next, struct page, lru);
178}
179
180static struct page *balloon_next_page(struct page *page)
181{
182 struct list_head *next = page->lru.next;
183 if (next == &ballooned_pages)
184 return NULL;
185 return list_entry(next, struct page, lru);
186}
187
188static void balloon_alarm(unsigned long unused)
189{
190 schedule_work(&balloon_worker);
191}
192
193static unsigned long current_target(void)
194{
195 unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
196
197 target = min(target,
198 balloon_stats.current_pages +
199 balloon_stats.balloon_low +
200 balloon_stats.balloon_high);
201
202 return target;
203}
204
205static int increase_reservation(unsigned long nr_pages)
206{
207 unsigned long pfn, i, flags;
208 struct page *page;
209 long rc;
210 struct xen_memory_reservation reservation = {
211 .address_bits = 0,
212 .extent_order = 0,
213 .domid = DOMID_SELF
214 };
215
216 if (nr_pages > ARRAY_SIZE(frame_list))
217 nr_pages = ARRAY_SIZE(frame_list);
218
219 spin_lock_irqsave(&balloon_lock, flags);
220
221 page = balloon_first_page();
222 for (i = 0; i < nr_pages; i++) {
223 BUG_ON(page == NULL);
224 frame_list[i] = page_to_pfn(page);;
225 page = balloon_next_page(page);
226 }
227
228 reservation.extent_start = (unsigned long)frame_list;
229 reservation.nr_extents = nr_pages;
230 rc = HYPERVISOR_memory_op(
231 XENMEM_populate_physmap, &reservation);
232 if (rc < nr_pages) {
233 if (rc > 0) {
234 int ret;
235
236 /* We hit the Xen hard limit: reprobe. */
237 reservation.nr_extents = rc;
238 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
239 &reservation);
240 BUG_ON(ret != rc);
241 }
242 if (rc >= 0)
243 balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
244 balloon_stats.driver_pages);
245 goto out;
246 }
247
248 for (i = 0; i < nr_pages; i++) {
249 page = balloon_retrieve();
250 BUG_ON(page == NULL);
251
252 pfn = page_to_pfn(page);
253 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
254 phys_to_machine_mapping_valid(pfn));
255
256 set_phys_to_machine(pfn, frame_list[i]);
257
258 /* Link back into the page tables if not highmem. */
259 if (pfn < max_low_pfn) {
260 int ret;
261 ret = HYPERVISOR_update_va_mapping(
262 (unsigned long)__va(pfn << PAGE_SHIFT),
263 mfn_pte(frame_list[i], PAGE_KERNEL),
264 0);
265 BUG_ON(ret);
266 }
267
268 /* Relinquish the page back to the allocator. */
269 ClearPageReserved(page);
270 init_page_count(page);
271 __free_page(page);
272 }
273
274 balloon_stats.current_pages += nr_pages;
275 totalram_pages = balloon_stats.current_pages;
276
277 out:
278 spin_unlock_irqrestore(&balloon_lock, flags);
279
280 return 0;
281}
282
283static int decrease_reservation(unsigned long nr_pages)
284{
285 unsigned long pfn, i, flags;
286 struct page *page;
287 int need_sleep = 0;
288 int ret;
289 struct xen_memory_reservation reservation = {
290 .address_bits = 0,
291 .extent_order = 0,
292 .domid = DOMID_SELF
293 };
294
295 if (nr_pages > ARRAY_SIZE(frame_list))
296 nr_pages = ARRAY_SIZE(frame_list);
297
298 for (i = 0; i < nr_pages; i++) {
299 if ((page = alloc_page(GFP_BALLOON)) == NULL) {
300 nr_pages = i;
301 need_sleep = 1;
302 break;
303 }
304
305 pfn = page_to_pfn(page);
306 frame_list[i] = pfn_to_mfn(pfn);
307
308 scrub_page(page);
309 }
310
311 /* Ensure that ballooned highmem pages don't have kmaps. */
312 kmap_flush_unused();
313 flush_tlb_all();
314
315 spin_lock_irqsave(&balloon_lock, flags);
316
317 /* No more mappings: invalidate P2M and add to balloon. */
318 for (i = 0; i < nr_pages; i++) {
319 pfn = mfn_to_pfn(frame_list[i]);
320 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
321 balloon_append(pfn_to_page(pfn));
322 }
323
324 reservation.extent_start = (unsigned long)frame_list;
325 reservation.nr_extents = nr_pages;
326 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
327 BUG_ON(ret != nr_pages);
328
329 balloon_stats.current_pages -= nr_pages;
330 totalram_pages = balloon_stats.current_pages;
331
332 spin_unlock_irqrestore(&balloon_lock, flags);
333
334 return need_sleep;
335}
336
337/*
338 * We avoid multiple worker processes conflicting via the balloon mutex.
339 * We may of course race updates of the target counts (which are protected
340 * by the balloon lock), or with changes to the Xen hard limit, but we will
341 * recover from these in time.
342 */
343static void balloon_process(struct work_struct *work)
344{
345 int need_sleep = 0;
346 long credit;
347
348 mutex_lock(&balloon_mutex);
349
350 do {
351 credit = current_target() - balloon_stats.current_pages;
352 if (credit > 0)
353 need_sleep = (increase_reservation(credit) != 0);
354 if (credit < 0)
355 need_sleep = (decrease_reservation(-credit) != 0);
356
357#ifndef CONFIG_PREEMPT
358 if (need_resched())
359 schedule();
360#endif
361 } while ((credit != 0) && !need_sleep);
362
363 /* Schedule more work if there is some still to be done. */
364 if (current_target() != balloon_stats.current_pages)
365 mod_timer(&balloon_timer, jiffies + HZ);
366
367 mutex_unlock(&balloon_mutex);
368}
369
370/* Resets the Xen limit, sets new target, and kicks off processing. */
371void balloon_set_new_target(unsigned long target)
372{
373 /* No need for lock. Not read-modify-write updates. */
374 balloon_stats.hard_limit = ~0UL;
375 balloon_stats.target_pages = target;
376 schedule_work(&balloon_worker);
377}
378
379static struct xenbus_watch target_watch =
380{
381 .node = "memory/target"
382};
383
384/* React to a change in the target key */
385static void watch_target(struct xenbus_watch *watch,
386 const char **vec, unsigned int len)
387{
388 unsigned long long new_target;
389 int err;
390
391 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
392 if (err != 1) {
393 /* This is ok (for domain0 at least) - so just return */
394 return;
395 }
396
397 /* The given memory/target value is in KiB, so it needs converting to
398 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
399 */
400 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
401}
402
403static int balloon_init_watcher(struct notifier_block *notifier,
404 unsigned long event,
405 void *data)
406{
407 int err;
408
409 err = register_xenbus_watch(&target_watch);
410 if (err)
411 printk(KERN_ERR "Failed to set balloon watcher\n");
412
413 return NOTIFY_DONE;
414}
415
416static struct notifier_block xenstore_notifier;
417
418static int __init balloon_init(void)
419{
420 unsigned long pfn;
421 struct page *page;
422
423 if (!is_running_on_xen())
424 return -ENODEV;
425
426 pr_info("xen_balloon: Initialising balloon driver.\n");
427
428 balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
429 totalram_pages = balloon_stats.current_pages;
430 balloon_stats.target_pages = balloon_stats.current_pages;
431 balloon_stats.balloon_low = 0;
432 balloon_stats.balloon_high = 0;
433 balloon_stats.driver_pages = 0UL;
434 balloon_stats.hard_limit = ~0UL;
435
436 init_timer(&balloon_timer);
437 balloon_timer.data = 0;
438 balloon_timer.function = balloon_alarm;
439
440 register_balloon(&balloon_sysdev);
441
442 /* Initialise the balloon with excess memory space. */
443 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
444 page = pfn_to_page(pfn);
445 if (!PageReserved(page))
446 balloon_append(page);
447 }
448
449 target_watch.callback = watch_target;
450 xenstore_notifier.notifier_call = balloon_init_watcher;
451
452 register_xenstore_notifier(&xenstore_notifier);
453
454 return 0;
455}
456
457subsys_initcall(balloon_init);
458
459static void balloon_exit(void)
460{
461 /* XXX - release balloon here */
462 return;
463}
464
465module_exit(balloon_exit);
466
467static void balloon_update_driver_allowance(long delta)
468{
469 unsigned long flags;
470
471 spin_lock_irqsave(&balloon_lock, flags);
472 balloon_stats.driver_pages += delta;
473 spin_unlock_irqrestore(&balloon_lock, flags);
474}
475
476static int dealloc_pte_fn(
477 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
478{
479 unsigned long mfn = pte_mfn(*pte);
480 int ret;
481 struct xen_memory_reservation reservation = {
482 .nr_extents = 1,
483 .extent_order = 0,
484 .domid = DOMID_SELF
485 };
486 reservation.extent_start = (unsigned long)&mfn;
487 set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
488 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
489 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
490 BUG_ON(ret != 1);
491 return 0;
492}
493
494static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
495{
496 unsigned long vaddr, flags;
497 struct page *page, **pagevec;
498 int i, ret;
499
500 pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
501 if (pagevec == NULL)
502 return NULL;
503
504 for (i = 0; i < nr_pages; i++) {
505 page = pagevec[i] = alloc_page(GFP_KERNEL);
506 if (page == NULL)
507 goto err;
508
509 vaddr = (unsigned long)page_address(page);
510
511 scrub_page(page);
512
513 spin_lock_irqsave(&balloon_lock, flags);
514
515 if (xen_feature(XENFEAT_auto_translated_physmap)) {
516 unsigned long gmfn = page_to_pfn(page);
517 struct xen_memory_reservation reservation = {
518 .nr_extents = 1,
519 .extent_order = 0,
520 .domid = DOMID_SELF
521 };
522 reservation.extent_start = (unsigned long)&gmfn;
523 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
524 &reservation);
525 if (ret == 1)
526 ret = 0; /* success */
527 } else {
528 ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
529 dealloc_pte_fn, NULL);
530 }
531
532 if (ret != 0) {
533 spin_unlock_irqrestore(&balloon_lock, flags);
534 __free_page(page);
535 goto err;
536 }
537
538 totalram_pages = --balloon_stats.current_pages;
539
540 spin_unlock_irqrestore(&balloon_lock, flags);
541 }
542
543 out:
544 schedule_work(&balloon_worker);
545 flush_tlb_all();
546 return pagevec;
547
548 err:
549 spin_lock_irqsave(&balloon_lock, flags);
550 while (--i >= 0)
551 balloon_append(pagevec[i]);
552 spin_unlock_irqrestore(&balloon_lock, flags);
553 kfree(pagevec);
554 pagevec = NULL;
555 goto out;
556}
557
558static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
559{
560 unsigned long flags;
561 int i;
562
563 if (pagevec == NULL)
564 return;
565
566 spin_lock_irqsave(&balloon_lock, flags);
567 for (i = 0; i < nr_pages; i++) {
568 BUG_ON(page_count(pagevec[i]) != 1);
569 balloon_append(pagevec[i]);
570 }
571 spin_unlock_irqrestore(&balloon_lock, flags);
572
573 kfree(pagevec);
574
575 schedule_work(&balloon_worker);
576}
577
578static void balloon_release_driver_page(struct page *page)
579{
580 unsigned long flags;
581
582 spin_lock_irqsave(&balloon_lock, flags);
583 balloon_append(page);
584 balloon_stats.driver_pages--;
585 spin_unlock_irqrestore(&balloon_lock, flags);
586
587 schedule_work(&balloon_worker);
588}
589
590
591#define BALLOON_SHOW(name, format, args...) \
592 static ssize_t show_##name(struct sys_device *dev, \
593 char *buf) \
594 { \
595 return sprintf(buf, format, ##args); \
596 } \
597 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
598
599BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
600BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
601BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
602BALLOON_SHOW(hard_limit_kb,
603 (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
604 (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
605BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
606
607static ssize_t show_target_kb(struct sys_device *dev, char *buf)
608{
609 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
610}
611
612static ssize_t store_target_kb(struct sys_device *dev,
613 const char *buf,
614 size_t count)
615{
616 char memstring[64], *endchar;
617 unsigned long long target_bytes;
618
619 if (!capable(CAP_SYS_ADMIN))
620 return -EPERM;
621
622 if (count <= 1)
623 return -EBADMSG; /* runt */
624 if (count > sizeof(memstring))
625 return -EFBIG; /* too long */
626 strcpy(memstring, buf);
627
628 target_bytes = memparse(memstring, &endchar);
629 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
630
631 return count;
632}
633
634static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
635 show_target_kb, store_target_kb);
636
637static struct sysdev_attribute *balloon_attrs[] = {
638 &attr_target_kb,
639};
640
641static struct attribute *balloon_info_attrs[] = {
642 &attr_current_kb.attr,
643 &attr_low_kb.attr,
644 &attr_high_kb.attr,
645 &attr_hard_limit_kb.attr,
646 &attr_driver_kb.attr,
647 NULL
648};
649
650static struct attribute_group balloon_info_group = {
651 .name = "info",
652 .attrs = balloon_info_attrs,
653};
654
655static struct sysdev_class balloon_sysdev_class = {
656 .name = BALLOON_CLASS_NAME,
657};
658
659static int register_balloon(struct sys_device *sysdev)
660{
661 int i, error;
662
663 error = sysdev_class_register(&balloon_sysdev_class);
664 if (error)
665 return error;
666
667 sysdev->id = 0;
668 sysdev->cls = &balloon_sysdev_class;
669
670 error = sysdev_register(sysdev);
671 if (error) {
672 sysdev_class_unregister(&balloon_sysdev_class);
673 return error;
674 }
675
676 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
677 error = sysdev_create_file(sysdev, balloon_attrs[i]);
678 if (error)
679 goto fail;
680 }
681
682 error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
683 if (error)
684 goto fail;
685
686 return 0;
687
688 fail:
689 while (--i >= 0)
690 sysdev_remove_file(sysdev, balloon_attrs[i]);
691 sysdev_unregister(sysdev);
692 sysdev_class_unregister(&balloon_sysdev_class);
693 return error;
694}
695
696static void unregister_balloon(struct sys_device *sysdev)
697{
698 int i;
699
700 sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
701 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
702 sysdev_remove_file(sysdev, balloon_attrs[i]);
703 sysdev_unregister(sysdev);
704 sysdev_class_unregister(&balloon_sysdev_class);
705}
706
707static void balloon_sysfs_exit(void)
708{
709 unregister_balloon(&balloon_sysdev);
710}
711
712MODULE_LICENSE("GPL");
diff --git a/arch/x86/xen/events.c b/drivers/xen/events.c
index dcf613e17581..4f0f22b020ea 100644
--- a/arch/x86/xen/events.c
+++ b/drivers/xen/events.c
@@ -33,12 +33,11 @@
33#include <asm/xen/hypercall.h> 33#include <asm/xen/hypercall.h>
34#include <asm/xen/hypervisor.h> 34#include <asm/xen/hypervisor.h>
35 35
36#include <xen/xen-ops.h>
36#include <xen/events.h> 37#include <xen/events.h>
37#include <xen/interface/xen.h> 38#include <xen/interface/xen.h>
38#include <xen/interface/event_channel.h> 39#include <xen/interface/event_channel.h>
39 40
40#include "xen-ops.h"
41
42/* 41/*
43 * This lock protects updates to the following mapping and reference-count 42 * This lock protects updates to the following mapping and reference-count
44 * arrays. The lock does not need to be acquired to read the mapping tables. 43 * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -455,6 +454,53 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
455 notify_remote_via_irq(irq); 454 notify_remote_via_irq(irq);
456} 455}
457 456
457irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
458{
459 struct shared_info *sh = HYPERVISOR_shared_info;
460 int cpu = smp_processor_id();
461 int i;
462 unsigned long flags;
463 static DEFINE_SPINLOCK(debug_lock);
464
465 spin_lock_irqsave(&debug_lock, flags);
466
467 printk("vcpu %d\n ", cpu);
468
469 for_each_online_cpu(i) {
470 struct vcpu_info *v = per_cpu(xen_vcpu, i);
471 printk("%d: masked=%d pending=%d event_sel %08lx\n ", i,
472 (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask,
473 v->evtchn_upcall_pending,
474 v->evtchn_pending_sel);
475 }
476 printk("pending:\n ");
477 for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
478 printk("%08lx%s", sh->evtchn_pending[i],
479 i % 8 == 0 ? "\n " : " ");
480 printk("\nmasks:\n ");
481 for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
482 printk("%08lx%s", sh->evtchn_mask[i],
483 i % 8 == 0 ? "\n " : " ");
484
485 printk("\nunmasked:\n ");
486 for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
487 printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
488 i % 8 == 0 ? "\n " : " ");
489
490 printk("\npending list:\n");
491 for(i = 0; i < NR_EVENT_CHANNELS; i++) {
492 if (sync_test_bit(i, sh->evtchn_pending)) {
493 printk(" %d: event %d -> irq %d\n",
494 cpu_evtchn[i], i,
495 evtchn_to_irq[i]);
496 }
497 }
498
499 spin_unlock_irqrestore(&debug_lock, flags);
500
501 return IRQ_HANDLED;
502}
503
458 504
459/* 505/*
460 * Search the CPUs pending events bitmasks. For each one found, map 506 * Search the CPUs pending events bitmasks. For each one found, map
@@ -470,29 +516,44 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
470 int cpu = get_cpu(); 516 int cpu = get_cpu();
471 struct shared_info *s = HYPERVISOR_shared_info; 517 struct shared_info *s = HYPERVISOR_shared_info;
472 struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); 518 struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
473 unsigned long pending_words; 519 static DEFINE_PER_CPU(unsigned, nesting_count);
520 unsigned count;
474 521
475 vcpu_info->evtchn_upcall_pending = 0; 522 do {
523 unsigned long pending_words;
476 524
477 /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ 525 vcpu_info->evtchn_upcall_pending = 0;
478 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
479 while (pending_words != 0) {
480 unsigned long pending_bits;
481 int word_idx = __ffs(pending_words);
482 pending_words &= ~(1UL << word_idx);
483 526
484 while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { 527 if (__get_cpu_var(nesting_count)++)
485 int bit_idx = __ffs(pending_bits); 528 goto out;
486 int port = (word_idx * BITS_PER_LONG) + bit_idx;
487 int irq = evtchn_to_irq[port];
488 529
489 if (irq != -1) { 530#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
490 regs->orig_ax = ~irq; 531 /* Clear master flag /before/ clearing selector flag. */
491 do_IRQ(regs); 532 rmb();
533#endif
534 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
535 while (pending_words != 0) {
536 unsigned long pending_bits;
537 int word_idx = __ffs(pending_words);
538 pending_words &= ~(1UL << word_idx);
539
540 while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
541 int bit_idx = __ffs(pending_bits);
542 int port = (word_idx * BITS_PER_LONG) + bit_idx;
543 int irq = evtchn_to_irq[port];
544
545 if (irq != -1)
546 xen_do_IRQ(irq, regs);
492 } 547 }
493 } 548 }
494 }
495 549
550 BUG_ON(!irqs_disabled());
551
552 count = __get_cpu_var(nesting_count);
553 __get_cpu_var(nesting_count) = 0;
554 } while(count != 1);
555
556out:
496 put_cpu(); 557 put_cpu();
497} 558}
498 559
@@ -525,6 +586,22 @@ static void set_affinity_irq(unsigned irq, cpumask_t dest)
525 rebind_irq_to_cpu(irq, tcpu); 586 rebind_irq_to_cpu(irq, tcpu);
526} 587}
527 588
589int resend_irq_on_evtchn(unsigned int irq)
590{
591 int masked, evtchn = evtchn_from_irq(irq);
592 struct shared_info *s = HYPERVISOR_shared_info;
593
594 if (!VALID_EVTCHN(evtchn))
595 return 1;
596
597 masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
598 sync_set_bit(evtchn, s->evtchn_pending);
599 if (!masked)
600 unmask_evtchn(evtchn);
601
602 return 1;
603}
604
528static void enable_dynirq(unsigned int irq) 605static void enable_dynirq(unsigned int irq)
529{ 606{
530 int evtchn = evtchn_from_irq(irq); 607 int evtchn = evtchn_from_irq(irq);
@@ -554,10 +631,16 @@ static void ack_dynirq(unsigned int irq)
554static int retrigger_dynirq(unsigned int irq) 631static int retrigger_dynirq(unsigned int irq)
555{ 632{
556 int evtchn = evtchn_from_irq(irq); 633 int evtchn = evtchn_from_irq(irq);
634 struct shared_info *sh = HYPERVISOR_shared_info;
557 int ret = 0; 635 int ret = 0;
558 636
559 if (VALID_EVTCHN(evtchn)) { 637 if (VALID_EVTCHN(evtchn)) {
560 set_evtchn(evtchn); 638 int masked;
639
640 masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
641 sync_set_bit(evtchn, sh->evtchn_pending);
642 if (!masked)
643 unmask_evtchn(evtchn);
561 ret = 1; 644 ret = 1;
562 } 645 }
563 646
diff --git a/arch/x86/xen/features.c b/drivers/xen/features.c
index 0707714e40d6..0707714e40d6 100644
--- a/arch/x86/xen/features.c
+++ b/drivers/xen/features.c
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index d85dc6d41c2a..52b6b41b909d 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -439,24 +439,6 @@ static inline unsigned int max_nr_grant_frames(void)
439 return xen_max; 439 return xen_max;
440} 440}
441 441
442static int map_pte_fn(pte_t *pte, struct page *pmd_page,
443 unsigned long addr, void *data)
444{
445 unsigned long **frames = (unsigned long **)data;
446
447 set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
448 (*frames)++;
449 return 0;
450}
451
452static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
453 unsigned long addr, void *data)
454{
455
456 set_pte_at(&init_mm, addr, pte, __pte(0));
457 return 0;
458}
459
460static int gnttab_map(unsigned int start_idx, unsigned int end_idx) 442static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
461{ 443{
462 struct gnttab_setup_table setup; 444 struct gnttab_setup_table setup;
@@ -470,7 +452,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
470 452
471 setup.dom = DOMID_SELF; 453 setup.dom = DOMID_SELF;
472 setup.nr_frames = nr_gframes; 454 setup.nr_frames = nr_gframes;
473 setup.frame_list = frames; 455 set_xen_guest_handle(setup.frame_list, frames);
474 456
475 rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); 457 rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
476 if (rc == -ENOSYS) { 458 if (rc == -ENOSYS) {
@@ -480,17 +462,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
480 462
481 BUG_ON(rc || setup.status); 463 BUG_ON(rc || setup.status);
482 464
483 if (shared == NULL) { 465 rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
484 struct vm_struct *area; 466 &shared);
485 area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
486 BUG_ON(area == NULL);
487 shared = area->addr;
488 }
489 rc = apply_to_page_range(&init_mm, (unsigned long)shared,
490 PAGE_SIZE * nr_gframes,
491 map_pte_fn, &frames);
492 BUG_ON(rc); 467 BUG_ON(rc);
493 frames -= nr_gframes; /* adjust after map_pte_fn() */
494 468
495 kfree(frames); 469 kfree(frames);
496 470
@@ -506,10 +480,7 @@ static int gnttab_resume(void)
506 480
507static int gnttab_suspend(void) 481static int gnttab_suspend(void)
508{ 482{
509 apply_to_page_range(&init_mm, (unsigned long)shared, 483 arch_gnttab_unmap_shared(shared, nr_grant_frames);
510 PAGE_SIZE * nr_grant_frames,
511 unmap_pte_fn, NULL);
512
513 return 0; 484 return 0;
514} 485}
515 486
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 9fd2f70ab46d..0f86b0ff7879 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -399,7 +399,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
399 399
400 *vaddr = NULL; 400 *vaddr = NULL;
401 401
402 area = alloc_vm_area(PAGE_SIZE); 402 area = xen_alloc_vm_area(PAGE_SIZE);
403 if (!area) 403 if (!area)
404 return -ENOMEM; 404 return -ENOMEM;
405 405
@@ -409,7 +409,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
409 BUG(); 409 BUG();
410 410
411 if (op.status != GNTST_okay) { 411 if (op.status != GNTST_okay) {
412 free_vm_area(area); 412 xen_free_vm_area(area);
413 xenbus_dev_fatal(dev, op.status, 413 xenbus_dev_fatal(dev, op.status,
414 "mapping in shared page %d from domain %d", 414 "mapping in shared page %d from domain %d",
415 gnt_ref, dev->otherend_id); 415 gnt_ref, dev->otherend_id);
@@ -508,7 +508,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
508 BUG(); 508 BUG();
509 509
510 if (op.status == GNTST_okay) 510 if (op.status == GNTST_okay)
511 free_vm_area(area); 511 xen_free_vm_area(area);
512 else 512 else
513 xenbus_dev_error(dev, op.status, 513 xenbus_dev_error(dev, op.status,
514 "unmapping page at handle %d error %d", 514 "unmapping page at handle %d error %d",
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 4750de316ad3..57ceb5346b74 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -88,6 +88,16 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv)
88 return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; 88 return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
89} 89}
90 90
91static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env)
92{
93 struct xenbus_device *dev = to_xenbus_device(_dev);
94
95 if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
96 return -ENOMEM;
97
98 return 0;
99}
100
91/* device/<type>/<id> => <type>-<id> */ 101/* device/<type>/<id> => <type>-<id> */
92static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) 102static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
93{ 103{
@@ -166,6 +176,7 @@ static struct xen_bus_type xenbus_frontend = {
166 .bus = { 176 .bus = {
167 .name = "xen", 177 .name = "xen",
168 .match = xenbus_match, 178 .match = xenbus_match,
179 .uevent = xenbus_uevent,
169 .probe = xenbus_dev_probe, 180 .probe = xenbus_dev_probe,
170 .remove = xenbus_dev_remove, 181 .remove = xenbus_dev_remove,
171 .shutdown = xenbus_dev_shutdown, 182 .shutdown = xenbus_dev_shutdown,
@@ -438,6 +449,12 @@ static ssize_t xendev_show_devtype(struct device *dev,
438} 449}
439DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); 450DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
440 451
452static ssize_t xendev_show_modalias(struct device *dev,
453 struct device_attribute *attr, char *buf)
454{
455 return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
456}
457DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
441 458
442int xenbus_probe_node(struct xen_bus_type *bus, 459int xenbus_probe_node(struct xen_bus_type *bus,
443 const char *type, 460 const char *type,
@@ -492,10 +509,16 @@ int xenbus_probe_node(struct xen_bus_type *bus,
492 509
493 err = device_create_file(&xendev->dev, &dev_attr_devtype); 510 err = device_create_file(&xendev->dev, &dev_attr_devtype);
494 if (err) 511 if (err)
495 goto fail_remove_file; 512 goto fail_remove_nodename;
513
514 err = device_create_file(&xendev->dev, &dev_attr_modalias);
515 if (err)
516 goto fail_remove_devtype;
496 517
497 return 0; 518 return 0;
498fail_remove_file: 519fail_remove_devtype:
520 device_remove_file(&xendev->dev, &dev_attr_devtype);
521fail_remove_nodename:
499 device_remove_file(&xendev->dev, &dev_attr_nodename); 522 device_remove_file(&xendev->dev, &dev_attr_nodename);
500fail_unregister: 523fail_unregister:
501 device_unregister(&xendev->dev); 524 device_unregister(&xendev->dev);
@@ -846,6 +869,7 @@ static int is_disconnected_device(struct device *dev, void *data)
846{ 869{
847 struct xenbus_device *xendev = to_xenbus_device(dev); 870 struct xenbus_device *xendev = to_xenbus_device(dev);
848 struct device_driver *drv = data; 871 struct device_driver *drv = data;
872 struct xenbus_driver *xendrv;
849 873
850 /* 874 /*
851 * A device with no driver will never connect. We care only about 875 * A device with no driver will never connect. We care only about
@@ -858,7 +882,9 @@ static int is_disconnected_device(struct device *dev, void *data)
858 if (drv && (dev->driver != drv)) 882 if (drv && (dev->driver != drv))
859 return 0; 883 return 0;
860 884
861 return (xendev->state != XenbusStateConnected); 885 xendrv = to_xenbus_driver(dev->driver);
886 return (xendev->state != XenbusStateConnected ||
887 (xendrv->is_ready && !xendrv->is_ready(xendev)));
862} 888}
863 889
864static int exists_disconnected_device(struct device_driver *drv) 890static int exists_disconnected_device(struct device_driver *drv)
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c
new file mode 100644
index 000000000000..797cb4e31f07
--- /dev/null
+++ b/drivers/xen/xencomm.c
@@ -0,0 +1,232 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 *
16 * Copyright (C) IBM Corp. 2006
17 *
18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
19 */
20
21#include <linux/gfp.h>
22#include <linux/mm.h>
23#include <asm/page.h>
24#include <xen/xencomm.h>
25#include <xen/interface/xen.h>
26#ifdef __ia64__
27#include <asm/xen/xencomm.h> /* for is_kern_addr() */
28#endif
29
30#ifdef HAVE_XEN_PLATFORM_COMPAT_H
31#include <xen/platform-compat.h>
32#endif
33
34static int xencomm_init(struct xencomm_desc *desc,
35 void *buffer, unsigned long bytes)
36{
37 unsigned long recorded = 0;
38 int i = 0;
39
40 while ((recorded < bytes) && (i < desc->nr_addrs)) {
41 unsigned long vaddr = (unsigned long)buffer + recorded;
42 unsigned long paddr;
43 int offset;
44 int chunksz;
45
46 offset = vaddr % PAGE_SIZE; /* handle partial pages */
47 chunksz = min(PAGE_SIZE - offset, bytes - recorded);
48
49 paddr = xencomm_vtop(vaddr);
50 if (paddr == ~0UL) {
51 printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
52 __func__, vaddr);
53 return -EINVAL;
54 }
55
56 desc->address[i++] = paddr;
57 recorded += chunksz;
58 }
59
60 if (recorded < bytes) {
61 printk(KERN_DEBUG
62 "%s: could only translate %ld of %ld bytes\n",
63 __func__, recorded, bytes);
64 return -ENOSPC;
65 }
66
67 /* mark remaining addresses invalid (just for safety) */
68 while (i < desc->nr_addrs)
69 desc->address[i++] = XENCOMM_INVALID;
70
71 desc->magic = XENCOMM_MAGIC;
72
73 return 0;
74}
75
76static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
77 void *buffer, unsigned long bytes)
78{
79 struct xencomm_desc *desc;
80 unsigned long buffer_ulong = (unsigned long)buffer;
81 unsigned long start = buffer_ulong & PAGE_MASK;
82 unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
83 unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
84 unsigned long size = sizeof(*desc) +
85 sizeof(desc->address[0]) * nr_addrs;
86
87 /*
88 * slab allocator returns at least sizeof(void*) aligned pointer.
89 * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
90 * cross page boundary.
91 */
92 if (sizeof(*desc) > sizeof(void *)) {
93 unsigned long order = get_order(size);
94 desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
95 order);
96 if (desc == NULL)
97 return NULL;
98
99 desc->nr_addrs =
100 ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
101 sizeof(*desc->address);
102 } else {
103 desc = kmalloc(size, gfp_mask);
104 if (desc == NULL)
105 return NULL;
106
107 desc->nr_addrs = nr_addrs;
108 }
109 return desc;
110}
111
112void xencomm_free(struct xencomm_handle *desc)
113{
114 if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
115 struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
116 if (sizeof(*desc__) > sizeof(void *)) {
117 unsigned long size = sizeof(*desc__) +
118 sizeof(desc__->address[0]) * desc__->nr_addrs;
119 unsigned long order = get_order(size);
120 free_pages((unsigned long)__va(desc), order);
121 } else
122 kfree(__va(desc));
123 }
124}
125
126static int xencomm_create(void *buffer, unsigned long bytes,
127 struct xencomm_desc **ret, gfp_t gfp_mask)
128{
129 struct xencomm_desc *desc;
130 int rc;
131
132 pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
133
134 if (bytes == 0) {
135 /* don't create a descriptor; Xen recognizes NULL. */
136 BUG_ON(buffer != NULL);
137 *ret = NULL;
138 return 0;
139 }
140
141 BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
142
143 desc = xencomm_alloc(gfp_mask, buffer, bytes);
144 if (!desc) {
145 printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
146 return -ENOMEM;
147 }
148
149 rc = xencomm_init(desc, buffer, bytes);
150 if (rc) {
151 printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
152 xencomm_free((struct xencomm_handle *)__pa(desc));
153 return rc;
154 }
155
156 *ret = desc;
157 return 0;
158}
159
160/* check if memory address is within VMALLOC region */
161static int is_phys_contiguous(unsigned long addr)
162{
163 if (!is_kernel_addr(addr))
164 return 0;
165
166 return (addr < VMALLOC_START) || (addr >= VMALLOC_END);
167}
168
169static struct xencomm_handle *xencomm_create_inline(void *ptr)
170{
171 unsigned long paddr;
172
173 BUG_ON(!is_phys_contiguous((unsigned long)ptr));
174
175 paddr = (unsigned long)xencomm_pa(ptr);
176 BUG_ON(paddr & XENCOMM_INLINE_FLAG);
177 return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
178}
179
180/* "mini" routine, for stack-based communications: */
181static int xencomm_create_mini(void *buffer,
182 unsigned long bytes, struct xencomm_mini *xc_desc,
183 struct xencomm_desc **ret)
184{
185 int rc = 0;
186 struct xencomm_desc *desc;
187 BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
188
189 desc = (void *)xc_desc;
190
191 desc->nr_addrs = XENCOMM_MINI_ADDRS;
192
193 rc = xencomm_init(desc, buffer, bytes);
194 if (!rc)
195 *ret = desc;
196
197 return rc;
198}
199
200struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
201{
202 int rc;
203 struct xencomm_desc *desc;
204
205 if (is_phys_contiguous((unsigned long)ptr))
206 return xencomm_create_inline(ptr);
207
208 rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
209
210 if (rc || desc == NULL)
211 return NULL;
212
213 return xencomm_pa(desc);
214}
215
216struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
217 struct xencomm_mini *xc_desc)
218{
219 int rc;
220 struct xencomm_desc *desc = NULL;
221
222 if (is_phys_contiguous((unsigned long)ptr))
223 return xencomm_create_inline(ptr);
224
225 rc = xencomm_create_mini(ptr, bytes, xc_desc,
226 &desc);
227
228 if (rc)
229 return NULL;
230
231 return xencomm_pa(desc);
232}
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 3d419398499b..0f13b945e240 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -220,11 +220,13 @@ struct pv_mmu_ops {
220 unsigned long va); 220 unsigned long va);
221 221
222 /* Hooks for allocating/releasing pagetable pages */ 222 /* Hooks for allocating/releasing pagetable pages */
223 void (*alloc_pt)(struct mm_struct *mm, u32 pfn); 223 void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
224 void (*alloc_pd)(struct mm_struct *mm, u32 pfn); 224 void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
225 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 225 void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
226 void (*release_pt)(u32 pfn); 226 void (*alloc_pud)(struct mm_struct *mm, u32 pfn);
227 void (*release_pd)(u32 pfn); 227 void (*release_pte)(u32 pfn);
228 void (*release_pmd)(u32 pfn);
229 void (*release_pud)(u32 pfn);
228 230
229 /* Pagetable manipulation functions */ 231 /* Pagetable manipulation functions */
230 void (*set_pte)(pte_t *ptep, pte_t pteval); 232 void (*set_pte)(pte_t *ptep, pte_t pteval);
@@ -910,28 +912,37 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
910 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 912 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
911} 913}
912 914
913static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn) 915static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
914{ 916{
915 PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn); 917 PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
916} 918}
917static inline void paravirt_release_pt(unsigned pfn) 919static inline void paravirt_release_pte(unsigned pfn)
918{ 920{
919 PVOP_VCALL1(pv_mmu_ops.release_pt, pfn); 921 PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
920} 922}
921 923
922static inline void paravirt_alloc_pd(struct mm_struct *mm, unsigned pfn) 924static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn)
923{ 925{
924 PVOP_VCALL2(pv_mmu_ops.alloc_pd, mm, pfn); 926 PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
925} 927}
926 928
927static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, 929static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn,
928 unsigned start, unsigned count) 930 unsigned start, unsigned count)
929{ 931{
930 PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count); 932 PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
931} 933}
932static inline void paravirt_release_pd(unsigned pfn) 934static inline void paravirt_release_pmd(unsigned pfn)
933{ 935{
934 PVOP_VCALL1(pv_mmu_ops.release_pd, pfn); 936 PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
937}
938
939static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn)
940{
941 PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
942}
943static inline void paravirt_release_pud(unsigned pfn)
944{
945 PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
935} 946}
936 947
937#ifdef CONFIG_HIGHPTE 948#ifdef CONFIG_HIGHPTE
diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h
index 5886eed05886..91e4641f3f31 100644
--- a/include/asm-x86/pgalloc.h
+++ b/include/asm-x86/pgalloc.h
@@ -1,5 +1,110 @@
1#ifdef CONFIG_X86_32 1#ifndef _ASM_X86_PGALLOC_H
2# include "pgalloc_32.h" 2#define _ASM_X86_PGALLOC_H
3
4#include <linux/threads.h>
5#include <linux/mm.h> /* for struct page */
6#include <linux/pagemap.h>
7
8#ifdef CONFIG_PARAVIRT
9#include <asm/paravirt.h>
3#else 10#else
4# include "pgalloc_64.h" 11static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
12static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
13static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
14 unsigned long start, unsigned long count) {}
15static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
16static inline void paravirt_release_pte(unsigned long pfn) {}
17static inline void paravirt_release_pmd(unsigned long pfn) {}
18static inline void paravirt_release_pud(unsigned long pfn) {}
5#endif 19#endif
20
21/*
22 * Allocate and free page tables.
23 */
24extern pgd_t *pgd_alloc(struct mm_struct *);
25extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
26
27extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
28extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
29
30/* Should really implement gc for free page table pages. This could be
31 done with a reference count in struct page. */
32
33static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
34{
35 BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
36 free_page((unsigned long)pte);
37}
38
39static inline void pte_free(struct mm_struct *mm, struct page *pte)
40{
41 __free_page(pte);
42}
43
44extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
45
46static inline void pmd_populate_kernel(struct mm_struct *mm,
47 pmd_t *pmd, pte_t *pte)
48{
49 paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
50 set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
51}
52
53static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
54 struct page *pte)
55{
56 unsigned long pfn = page_to_pfn(pte);
57
58 paravirt_alloc_pte(mm, pfn);
59 set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
60}
61
62#define pmd_pgtable(pmd) pmd_page(pmd)
63
64#if PAGETABLE_LEVELS > 2
65static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
66{
67 return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
68}
69
70static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
71{
72 BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
73 free_page((unsigned long)pmd);
74}
75
76extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
77
78#ifdef CONFIG_X86_PAE
79extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
80#else /* !CONFIG_X86_PAE */
81static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
82{
83 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
84 set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
85}
86#endif /* CONFIG_X86_PAE */
87
88#if PAGETABLE_LEVELS > 3
89static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
90{
91 paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
92 set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
93}
94
95static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
96{
97 return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
98}
99
100static inline void pud_free(struct mm_struct *mm, pud_t *pud)
101{
102 BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
103 free_page((unsigned long)pud);
104}
105
106extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
107#endif /* PAGETABLE_LEVELS > 3 */
108#endif /* PAGETABLE_LEVELS > 2 */
109
110#endif /* _ASM_X86_PGALLOC_H */
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h
deleted file mode 100644
index 6bea6e5b5ee5..000000000000
--- a/include/asm-x86/pgalloc_32.h
+++ /dev/null
@@ -1,95 +0,0 @@
1#ifndef _I386_PGALLOC_H
2#define _I386_PGALLOC_H
3
4#include <linux/threads.h>
5#include <linux/mm.h> /* for struct page */
6#include <linux/pagemap.h>
7#include <asm/tlb.h>
8#include <asm-generic/tlb.h>
9
10#ifdef CONFIG_PARAVIRT
11#include <asm/paravirt.h>
12#else
13#define paravirt_alloc_pt(mm, pfn) do { } while (0)
14#define paravirt_alloc_pd(mm, pfn) do { } while (0)
15#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
16#define paravirt_release_pt(pfn) do { } while (0)
17#define paravirt_release_pd(pfn) do { } while (0)
18#endif
19
20static inline void pmd_populate_kernel(struct mm_struct *mm,
21 pmd_t *pmd, pte_t *pte)
22{
23 paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
24 set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
25}
26
27static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
28{
29 unsigned long pfn = page_to_pfn(pte);
30
31 paravirt_alloc_pt(mm, pfn);
32 set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
33}
34#define pmd_pgtable(pmd) pmd_page(pmd)
35
36/*
37 * Allocate and free page tables.
38 */
39extern pgd_t *pgd_alloc(struct mm_struct *);
40extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
41
42extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
43extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
44
45static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
46{
47 free_page((unsigned long)pte);
48}
49
50static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
51{
52 pgtable_page_dtor(pte);
53 __free_page(pte);
54}
55
56
57extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
58
59#ifdef CONFIG_X86_PAE
60/*
61 * In the PAE case we free the pmds as part of the pgd.
62 */
63static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
64{
65 return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
66}
67
68static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
69{
70 BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
71 free_page((unsigned long)pmd);
72}
73
74extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
75
76static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
77{
78 paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
79
80 /* Note: almost everything apart from _PAGE_PRESENT is
81 reserved at the pmd (PDPT) level. */
82 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
83
84 /*
85 * According to Intel App note "TLBs, Paging-Structure Caches,
86 * and Their Invalidation", April 2007, document 317080-001,
87 * section 8.1: in PAE mode we explicitly have to flush the
88 * TLB via cr3 if the top-level pgd is changed...
89 */
90 if (mm == current->active_mm)
91 write_cr3(read_cr3());
92}
93#endif /* CONFIG_X86_PAE */
94
95#endif /* _I386_PGALLOC_H */
diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h
deleted file mode 100644
index 8d6722320dcc..000000000000
--- a/include/asm-x86/pgalloc_64.h
+++ /dev/null
@@ -1,133 +0,0 @@
1#ifndef _X86_64_PGALLOC_H
2#define _X86_64_PGALLOC_H
3
4#include <asm/pda.h>
5#include <linux/threads.h>
6#include <linux/mm.h>
7
8#define pmd_populate_kernel(mm, pmd, pte) \
9 set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
10#define pud_populate(mm, pud, pmd) \
11 set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)))
12#define pgd_populate(mm, pgd, pud) \
13 set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)))
14
15#define pmd_pgtable(pmd) pmd_page(pmd)
16
17static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
18{
19 set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
20}
21
22static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
23{
24 BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
25 free_page((unsigned long)pmd);
26}
27
28static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
29{
30 return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
31}
32
33static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
34{
35 return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
36}
37
38static inline void pud_free(struct mm_struct *mm, pud_t *pud)
39{
40 BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
41 free_page((unsigned long)pud);
42}
43
44static inline void pgd_list_add(pgd_t *pgd)
45{
46 struct page *page = virt_to_page(pgd);
47 unsigned long flags;
48
49 spin_lock_irqsave(&pgd_lock, flags);
50 list_add(&page->lru, &pgd_list);
51 spin_unlock_irqrestore(&pgd_lock, flags);
52}
53
54static inline void pgd_list_del(pgd_t *pgd)
55{
56 struct page *page = virt_to_page(pgd);
57 unsigned long flags;
58
59 spin_lock_irqsave(&pgd_lock, flags);
60 list_del(&page->lru);
61 spin_unlock_irqrestore(&pgd_lock, flags);
62}
63
64static inline pgd_t *pgd_alloc(struct mm_struct *mm)
65{
66 unsigned boundary;
67 pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
68 if (!pgd)
69 return NULL;
70 pgd_list_add(pgd);
71 /*
72 * Copy kernel pointers in from init.
73 * Could keep a freelist or slab cache of those because the kernel
74 * part never changes.
75 */
76 boundary = pgd_index(__PAGE_OFFSET);
77 memset(pgd, 0, boundary * sizeof(pgd_t));
78 memcpy(pgd + boundary,
79 init_level4_pgt + boundary,
80 (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
81 return pgd;
82}
83
84static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
85{
86 BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
87 pgd_list_del(pgd);
88 free_page((unsigned long)pgd);
89}
90
91static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
92{
93 return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
94}
95
96static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
97{
98 struct page *page;
99 void *p;
100
101 p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
102 if (!p)
103 return NULL;
104 page = virt_to_page(p);
105 pgtable_page_ctor(page);
106 return page;
107}
108
109/* Should really implement gc for free page table pages. This could be
110 done with a reference count in struct page. */
111
112static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
113{
114 BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
115 free_page((unsigned long)pte);
116}
117
118static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
119{
120 pgtable_page_dtor(pte);
121 __free_page(pte);
122}
123
124#define __pte_free_tlb(tlb,pte) \
125do { \
126 pgtable_page_dtor((pte)); \
127 tlb_remove_page((tlb), (pte)); \
128} while (0)
129
130#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
131#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
132
133#endif /* _X86_64_PGALLOC_H */
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index f1d9f4a03f6f..b8a08bd7bd48 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -1,7 +1,6 @@
1#ifndef _ASM_X86_PGTABLE_H 1#ifndef _ASM_X86_PGTABLE_H
2#define _ASM_X86_PGTABLE_H 2#define _ASM_X86_PGTABLE_H
3 3
4#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
5#define FIRST_USER_ADDRESS 0 4#define FIRST_USER_ADDRESS 0
6 5
7#define _PAGE_BIT_PRESENT 0 /* is present */ 6#define _PAGE_BIT_PRESENT 0 /* is present */
@@ -330,6 +329,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
330# include "pgtable_64.h" 329# include "pgtable_64.h"
331#endif 330#endif
332 331
332#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
333#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
334
333#ifndef __ASSEMBLY__ 335#ifndef __ASSEMBLY__
334 336
335enum { 337enum {
@@ -389,37 +391,17 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
389 * bit at the same time. 391 * bit at the same time.
390 */ 392 */
391#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 393#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
392#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ 394extern int ptep_set_access_flags(struct vm_area_struct *vma,
393({ \ 395 unsigned long address, pte_t *ptep,
394 int __changed = !pte_same(*(ptep), entry); \ 396 pte_t entry, int dirty);
395 if (__changed && dirty) { \
396 *ptep = entry; \
397 pte_update_defer((vma)->vm_mm, (address), (ptep)); \
398 flush_tlb_page(vma, address); \
399 } \
400 __changed; \
401})
402 397
403#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 398#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
404#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ 399extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
405 int __ret = 0; \ 400 unsigned long addr, pte_t *ptep);
406 if (pte_young(*(ptep))) \
407 __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
408 &(ptep)->pte); \
409 if (__ret) \
410 pte_update((vma)->vm_mm, addr, ptep); \
411 __ret; \
412})
413 401
414#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 402#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
415#define ptep_clear_flush_young(vma, address, ptep) \ 403extern int ptep_clear_flush_young(struct vm_area_struct *vma,
416({ \ 404 unsigned long address, pte_t *ptep);
417 int __young; \
418 __young = ptep_test_and_clear_young((vma), (address), (ptep)); \
419 if (__young) \
420 flush_tlb_page(vma, address); \
421 __young; \
422})
423 405
424#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 406#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
425static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 407static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
@@ -456,6 +438,22 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
456 pte_update(mm, addr, ptep); 438 pte_update(mm, addr, ptep);
457} 439}
458 440
441/*
442 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
443 *
444 * dst - pointer to pgd range anwhere on a pgd page
445 * src - ""
446 * count - the number of pgds to copy.
447 *
448 * dst and src can be on the same page, but the range must not overlap,
449 * and must not cross a page boundary.
450 */
451static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
452{
453 memcpy(dst, src, count * sizeof(pgd_t));
454}
455
456
459#include <asm-generic/pgtable.h> 457#include <asm-generic/pgtable.h>
460#endif /* __ASSEMBLY__ */ 458#endif /* __ASSEMBLY__ */
461 459
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index c4a643674458..168b6447cf18 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -48,9 +48,6 @@ void paging_init(void);
48#define PGDIR_SIZE (1UL << PGDIR_SHIFT) 48#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
49#define PGDIR_MASK (~(PGDIR_SIZE - 1)) 49#define PGDIR_MASK (~(PGDIR_SIZE - 1))
50 50
51#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
52#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
53
54/* Just any arbitrary offset to the start of the vmalloc VM area: the 51/* Just any arbitrary offset to the start of the vmalloc VM area: the
55 * current 8MB value just means that there will be a 8MB "hole" after the 52 * current 8MB value just means that there will be a 8MB "hole" after the
56 * physical memory until the kernel virtual memory starts. That means that 53 * physical memory until the kernel virtual memory starts. That means that
@@ -109,21 +106,6 @@ extern int pmd_bad(pmd_t pmd);
109#endif 106#endif
110 107
111/* 108/*
112 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
113 *
114 * dst - pointer to pgd range anwhere on a pgd page
115 * src - ""
116 * count - the number of pgds to copy.
117 *
118 * dst and src can be on the same page, but the range must not overlap,
119 * and must not cross a page boundary.
120 */
121static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
122{
123 memcpy(dst, src, count * sizeof(pgd_t));
124}
125
126/*
127 * Macro to mark a page protection value as "uncacheable". 109 * Macro to mark a page protection value as "uncacheable".
128 * On processors which do not support it, this is a no-op. 110 * On processors which do not support it, this is a no-op.
129 */ 111 */
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 9fd87d0b6477..a3bbf8766c1d 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -24,7 +24,7 @@ extern void paging_init(void);
24 24
25#endif /* !__ASSEMBLY__ */ 25#endif /* !__ASSEMBLY__ */
26 26
27#define SHARED_KERNEL_PMD 1 27#define SHARED_KERNEL_PMD 0
28 28
29/* 29/*
30 * PGDIR_SHIFT determines what a top-level page table entry can map 30 * PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
new file mode 100644
index 000000000000..596312a7bfc9
--- /dev/null
+++ b/include/asm-x86/xen/events.h
@@ -0,0 +1,22 @@
1#ifndef __XEN_EVENTS_H
2#define __XEN_EVENTS_H
3
4enum ipi_vector {
5 XEN_RESCHEDULE_VECTOR,
6 XEN_CALL_FUNCTION_VECTOR,
7
8 XEN_NR_IPIS,
9};
10
11static inline int xen_irqs_disabled(struct pt_regs *regs)
12{
13 return raw_irqs_disabled_flags(regs->flags);
14}
15
16static inline void xen_do_IRQ(int irq, struct pt_regs *regs)
17{
18 regs->orig_ax = ~irq;
19 do_IRQ(regs);
20}
21
22#endif /* __XEN_EVENTS_H */
diff --git a/include/asm-x86/xen/grant_table.h b/include/asm-x86/xen/grant_table.h
new file mode 100644
index 000000000000..2444d4593a3b
--- /dev/null
+++ b/include/asm-x86/xen/grant_table.h
@@ -0,0 +1,7 @@
1#ifndef __XEN_GRANT_TABLE_H
2#define __XEN_GRANT_TABLE_H
3
4#define xen_alloc_vm_area(size) alloc_vm_area(size)
5#define xen_free_vm_area(area) free_vm_area(area)
6
7#endif /* __XEN_GRANT_TABLE_H */
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index bc0ee7d961ca..c2ccd997ed35 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -164,6 +164,12 @@ HYPERVISOR_set_callbacks(unsigned long event_selector,
164} 164}
165 165
166static inline int 166static inline int
167HYPERVISOR_callback_op(int cmd, void *arg)
168{
169 return _hypercall2(int, callback_op, cmd, arg);
170}
171
172static inline int
167HYPERVISOR_fpu_taskswitch(int set) 173HYPERVISOR_fpu_taskswitch(int set)
168{ 174{
169 return _hypercall1(int, fpu_taskswitch, set); 175 return _hypercall1(int, fpu_taskswitch, set);
diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h
index 165c3968e138..6227000a1e84 100644
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -22,6 +22,30 @@
22#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) 22#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
23#define GUEST_HANDLE(name) __guest_handle_ ## name 23#define GUEST_HANDLE(name) __guest_handle_ ## name
24 24
25#ifdef __XEN__
26#if defined(__i386__)
27#define set_xen_guest_handle(hnd, val) \
28 do { \
29 if (sizeof(hnd) == 8) \
30 *(uint64_t *)&(hnd) = 0; \
31 (hnd).p = val; \
32 } while (0)
33#elif defined(__x86_64__)
34#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
35#endif
36#else
37#if defined(__i386__)
38#define set_xen_guest_handle(hnd, val) \
39 do { \
40 if (sizeof(hnd) == 8) \
41 *(uint64_t *)&(hnd) = 0; \
42 (hnd) = val; \
43 } while (0)
44#elif defined(__x86_64__)
45#define set_xen_guest_handle(hnd, val) do { (hnd) = val; } while (0)
46#endif
47#endif
48
25#ifndef __ASSEMBLY__ 49#ifndef __ASSEMBLY__
26/* Guest handles for primitive C types. */ 50/* Guest handles for primitive C types. */
27__DEFINE_GUEST_HANDLE(uchar, unsigned char); 51__DEFINE_GUEST_HANDLE(uchar, unsigned char);
@@ -171,6 +195,10 @@ struct arch_vcpu_info {
171 unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ 195 unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
172}; 196};
173 197
198struct xen_callback {
199 unsigned long cs;
200 unsigned long eip;
201};
174#endif /* !__ASSEMBLY__ */ 202#endif /* !__ASSEMBLY__ */
175 203
176/* 204/*
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
new file mode 100644
index 000000000000..01799305f02a
--- /dev/null
+++ b/include/asm-x86/xen/page.h
@@ -0,0 +1,168 @@
1#ifndef __XEN_PAGE_H
2#define __XEN_PAGE_H
3
4#include <linux/pfn.h>
5
6#include <asm/uaccess.h>
7#include <asm/pgtable.h>
8
9#include <xen/features.h>
10
11/* Xen machine address */
12typedef struct xmaddr {
13 phys_addr_t maddr;
14} xmaddr_t;
15
16/* Xen pseudo-physical address */
17typedef struct xpaddr {
18 phys_addr_t paddr;
19} xpaddr_t;
20
21#define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
22#define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
23
24/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
25#define INVALID_P2M_ENTRY (~0UL)
26#define FOREIGN_FRAME_BIT (1UL<<31)
27#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
28
29extern unsigned long *phys_to_machine_mapping;
30
31static inline unsigned long pfn_to_mfn(unsigned long pfn)
32{
33 if (xen_feature(XENFEAT_auto_translated_physmap))
34 return pfn;
35
36 return phys_to_machine_mapping[(unsigned int)(pfn)] &
37 ~FOREIGN_FRAME_BIT;
38}
39
40static inline int phys_to_machine_mapping_valid(unsigned long pfn)
41{
42 if (xen_feature(XENFEAT_auto_translated_physmap))
43 return 1;
44
45 return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
46}
47
48static inline unsigned long mfn_to_pfn(unsigned long mfn)
49{
50 unsigned long pfn;
51
52 if (xen_feature(XENFEAT_auto_translated_physmap))
53 return mfn;
54
55#if 0
56 if (unlikely((mfn >> machine_to_phys_order) != 0))
57 return max_mapnr;
58#endif
59
60 pfn = 0;
61 /*
62 * The array access can fail (e.g., device space beyond end of RAM).
63 * In such cases it doesn't matter what we return (we return garbage),
64 * but we must handle the fault without crashing!
65 */
66 __get_user(pfn, &machine_to_phys_mapping[mfn]);
67
68 return pfn;
69}
70
71static inline xmaddr_t phys_to_machine(xpaddr_t phys)
72{
73 unsigned offset = phys.paddr & ~PAGE_MASK;
74 return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
75}
76
77static inline xpaddr_t machine_to_phys(xmaddr_t machine)
78{
79 unsigned offset = machine.maddr & ~PAGE_MASK;
80 return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
81}
82
83/*
84 * We detect special mappings in one of two ways:
85 * 1. If the MFN is an I/O page then Xen will set the m2p entry
86 * to be outside our maximum possible pseudophys range.
87 * 2. If the MFN belongs to a different domain then we will certainly
88 * not have MFN in our p2m table. Conversely, if the page is ours,
89 * then we'll have p2m(m2p(MFN))==MFN.
90 * If we detect a special mapping then it doesn't have a 'struct page'.
91 * We force !pfn_valid() by returning an out-of-range pointer.
92 *
93 * NB. These checks require that, for any MFN that is not in our reservation,
94 * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
95 * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
96 * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
97 *
98 * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
99 * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
100 * require. In all the cases we care about, the FOREIGN_FRAME bit is
101 * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
102 */
103static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
104{
105 extern unsigned long max_mapnr;
106 unsigned long pfn = mfn_to_pfn(mfn);
107 if ((pfn < max_mapnr)
108 && !xen_feature(XENFEAT_auto_translated_physmap)
109 && (phys_to_machine_mapping[pfn] != mfn))
110 return max_mapnr; /* force !pfn_valid() */
111 return pfn;
112}
113
114static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
115{
116 if (xen_feature(XENFEAT_auto_translated_physmap)) {
117 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
118 return;
119 }
120 phys_to_machine_mapping[pfn] = mfn;
121}
122
123/* VIRT <-> MACHINE conversion */
124#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
125#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
126#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
127
128static inline unsigned long pte_mfn(pte_t pte)
129{
130 return (pte.pte & ~_PAGE_NX) >> PAGE_SHIFT;
131}
132
133static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
134{
135 pte_t pte;
136
137 pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) |
138 (pgprot_val(pgprot) & __supported_pte_mask);
139
140 return pte;
141}
142
143static inline pteval_t pte_val_ma(pte_t pte)
144{
145 return pte.pte;
146}
147
148static inline pte_t __pte_ma(pteval_t x)
149{
150 return (pte_t) { .pte = x };
151}
152
153#ifdef CONFIG_X86_PAE
154#define pmd_val_ma(v) ((v).pmd)
155#define pud_val_ma(v) ((v).pgd.pgd)
156#define __pmd_ma(x) ((pmd_t) { (x) } )
157#else /* !X86_PAE */
158#define pmd_val_ma(v) ((v).pud.pgd.pgd)
159#endif /* CONFIG_X86_PAE */
160
161#define pgd_val_ma(x) ((x).pgd)
162
163
164xmaddr_t arbitrary_virt_to_machine(unsigned long address);
165void make_lowmem_page_readonly(void *vaddr);
166void make_lowmem_page_readwrite(void *vaddr);
167
168#endif /* __XEN_PAGE_H */
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
new file mode 100644
index 000000000000..fe43b0f3c86a
--- /dev/null
+++ b/include/xen/balloon.h
@@ -0,0 +1,61 @@
1/******************************************************************************
2 * balloon.h
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#ifndef __XEN_BALLOON_H__
35#define __XEN_BALLOON_H__
36
37#include <linux/spinlock.h>
38
39#if 0
40/*
41 * Inform the balloon driver that it should allow some slop for device-driver
42 * memory activities.
43 */
44void balloon_update_driver_allowance(long delta);
45
46/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */
47struct page **alloc_empty_pages_and_pagevec(int nr_pages);
48void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
49
50void balloon_release_driver_page(struct page *page);
51
52/*
53 * Prevent the balloon driver from changing the memory reservation during
54 * a driver critical region.
55 */
56extern spinlock_t balloon_lock;
57#define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags)
58#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
59#endif
60
61#endif /* __XEN_BALLOON_H__ */
diff --git a/include/xen/events.h b/include/xen/events.h
index 2bde54d29be5..acd8e062c85f 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -5,13 +5,7 @@
5 5
6#include <xen/interface/event_channel.h> 6#include <xen/interface/event_channel.h>
7#include <asm/xen/hypercall.h> 7#include <asm/xen/hypercall.h>
8 8#include <asm/xen/events.h>
9enum ipi_vector {
10 XEN_RESCHEDULE_VECTOR,
11 XEN_CALL_FUNCTION_VECTOR,
12
13 XEN_NR_IPIS,
14};
15 9
16int bind_evtchn_to_irq(unsigned int evtchn); 10int bind_evtchn_to_irq(unsigned int evtchn);
17int bind_evtchn_to_irqhandler(unsigned int evtchn, 11int bind_evtchn_to_irqhandler(unsigned int evtchn,
@@ -37,6 +31,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
37void unbind_from_irqhandler(unsigned int irq, void *dev_id); 31void unbind_from_irqhandler(unsigned int irq, void *dev_id);
38 32
39void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); 33void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
34int resend_irq_on_evtchn(unsigned int irq);
40 35
41static inline void notify_remote_via_evtchn(int port) 36static inline void notify_remote_via_evtchn(int port)
42{ 37{
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 761c83498e03..466204846121 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -39,6 +39,7 @@
39 39
40#include <asm/xen/hypervisor.h> 40#include <asm/xen/hypervisor.h>
41#include <xen/interface/grant_table.h> 41#include <xen/interface/grant_table.h>
42#include <asm/xen/grant_table.h>
42 43
43/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ 44/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
44#define NR_GRANT_FRAMES 4 45#define NR_GRANT_FRAMES 4
@@ -102,6 +103,12 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
102void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, 103void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
103 unsigned long pfn); 104 unsigned long pfn);
104 105
106int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
107 unsigned long max_nr_gframes,
108 struct grant_entry **__shared);
109void arch_gnttab_unmap_shared(struct grant_entry *shared,
110 unsigned long nr_gframes);
111
105#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) 112#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
106 113
107#endif /* __ASM_GNTTAB_H__ */ 114#endif /* __ASM_GNTTAB_H__ */
diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h
new file mode 100644
index 000000000000..4aadcba31af9
--- /dev/null
+++ b/include/xen/interface/callback.h
@@ -0,0 +1,102 @@
1/******************************************************************************
2 * callback.h
3 *
4 * Register guest OS callbacks with Xen.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Copyright (c) 2006, Ian Campbell
25 */
26
27#ifndef __XEN_PUBLIC_CALLBACK_H__
28#define __XEN_PUBLIC_CALLBACK_H__
29
30#include "xen.h"
31
32/*
33 * Prototype for this hypercall is:
34 * long callback_op(int cmd, void *extra_args)
35 * @cmd == CALLBACKOP_??? (callback operation).
36 * @extra_args == Operation-specific extra arguments (NULL if none).
37 */
38
39/* ia64, x86: Callback for event delivery. */
40#define CALLBACKTYPE_event 0
41
42/* x86: Failsafe callback when guest state cannot be restored by Xen. */
43#define CALLBACKTYPE_failsafe 1
44
45/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
46#define CALLBACKTYPE_syscall 2
47
48/*
49 * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
50 * feature is enabled. Do not use this callback type in new code.
51 */
52#define CALLBACKTYPE_sysenter_deprecated 3
53
54/* x86: Callback for NMI delivery. */
55#define CALLBACKTYPE_nmi 4
56
57/*
58 * x86: sysenter is only available as follows:
59 * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
60 * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
61 * ('32-on-32-on-64', '32-on-64-on-64')
62 * [nb. also 64-bit guest applications on Intel CPUs
63 * ('64-on-64-on-64'), but syscall is preferred]
64 */
65#define CALLBACKTYPE_sysenter 5
66
67/*
68 * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
69 * ('32-on-32-on-64', '32-on-64-on-64')
70 */
71#define CALLBACKTYPE_syscall32 7
72
73/*
74 * Disable event deliver during callback? This flag is ignored for event and
75 * NMI callbacks: event delivery is unconditionally disabled.
76 */
77#define _CALLBACKF_mask_events 0
78#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events)
79
80/*
81 * Register a callback.
82 */
83#define CALLBACKOP_register 0
84struct callback_register {
85 uint16_t type;
86 uint16_t flags;
87 struct xen_callback address;
88};
89
90/*
91 * Unregister a callback.
92 *
93 * Not all callbacks can be unregistered. -EINVAL will be returned if
94 * you attempt to unregister such a callback.
95 */
96#define CALLBACKOP_unregister 1
97struct callback_unregister {
98 uint16_t type;
99 uint16_t _unused;
100};
101
102#endif /* __XEN_PUBLIC_CALLBACK_H__ */
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index 219049802cf2..39da93c21de0 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -185,6 +185,7 @@ struct gnttab_map_grant_ref {
185 grant_handle_t handle; 185 grant_handle_t handle;
186 uint64_t dev_bus_addr; 186 uint64_t dev_bus_addr;
187}; 187};
188DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
188 189
189/* 190/*
190 * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings 191 * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
@@ -206,6 +207,7 @@ struct gnttab_unmap_grant_ref {
206 /* OUT parameters. */ 207 /* OUT parameters. */
207 int16_t status; /* GNTST_* */ 208 int16_t status; /* GNTST_* */
208}; 209};
210DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
209 211
210/* 212/*
211 * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least 213 * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
@@ -223,8 +225,9 @@ struct gnttab_setup_table {
223 uint32_t nr_frames; 225 uint32_t nr_frames;
224 /* OUT parameters. */ 226 /* OUT parameters. */
225 int16_t status; /* GNTST_* */ 227 int16_t status; /* GNTST_* */
226 ulong *frame_list; 228 GUEST_HANDLE(ulong) frame_list;
227}; 229};
230DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
228 231
229/* 232/*
230 * GNTTABOP_dump_table: Dump the contents of the grant table to the 233 * GNTTABOP_dump_table: Dump the contents of the grant table to the
@@ -237,6 +240,7 @@ struct gnttab_dump_table {
237 /* OUT parameters. */ 240 /* OUT parameters. */
238 int16_t status; /* GNTST_* */ 241 int16_t status; /* GNTST_* */
239}; 242};
243DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
240 244
241/* 245/*
242 * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The 246 * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
@@ -255,7 +259,7 @@ struct gnttab_transfer {
255 /* OUT parameters. */ 259 /* OUT parameters. */
256 int16_t status; 260 int16_t status;
257}; 261};
258 262DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
259 263
260/* 264/*
261 * GNTTABOP_copy: Hypervisor based copy 265 * GNTTABOP_copy: Hypervisor based copy
@@ -296,6 +300,7 @@ struct gnttab_copy {
296 /* OUT parameters. */ 300 /* OUT parameters. */
297 int16_t status; 301 int16_t status;
298}; 302};
303DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
299 304
300/* 305/*
301 * GNTTABOP_query_size: Query the current and maximum sizes of the shared 306 * GNTTABOP_query_size: Query the current and maximum sizes of the shared
@@ -313,7 +318,7 @@ struct gnttab_query_size {
313 uint32_t max_nr_frames; 318 uint32_t max_nr_frames;
314 int16_t status; /* GNTST_* */ 319 int16_t status; /* GNTST_* */
315}; 320};
316 321DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
317 322
318/* 323/*
319 * Bitfield values for update_pin_status.flags. 324 * Bitfield values for update_pin_status.flags.
diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h
new file mode 100644
index 000000000000..5a934dd7796d
--- /dev/null
+++ b/include/xen/interface/io/fbif.h
@@ -0,0 +1,124 @@
1/*
2 * fbif.h -- Xen virtual frame buffer device
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to
6 * deal in the Software without restriction, including without limitation the
7 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 * sell copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 *
22 * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
23 * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
24 */
25
26#ifndef __XEN_PUBLIC_IO_FBIF_H__
27#define __XEN_PUBLIC_IO_FBIF_H__
28
29/* Out events (frontend -> backend) */
30
31/*
32 * Out events may be sent only when requested by backend, and receipt
33 * of an unknown out event is an error.
34 */
35
36/* Event type 1 currently not used */
37/*
38 * Framebuffer update notification event
39 * Capable frontend sets feature-update in xenstore.
40 * Backend requests it by setting request-update in xenstore.
41 */
42#define XENFB_TYPE_UPDATE 2
43
44struct xenfb_update {
45 uint8_t type; /* XENFB_TYPE_UPDATE */
46 int32_t x; /* source x */
47 int32_t y; /* source y */
48 int32_t width; /* rect width */
49 int32_t height; /* rect height */
50};
51
52#define XENFB_OUT_EVENT_SIZE 40
53
54union xenfb_out_event {
55 uint8_t type;
56 struct xenfb_update update;
57 char pad[XENFB_OUT_EVENT_SIZE];
58};
59
60/* In events (backend -> frontend) */
61
62/*
63 * Frontends should ignore unknown in events.
64 * No in events currently defined.
65 */
66
67#define XENFB_IN_EVENT_SIZE 40
68
69union xenfb_in_event {
70 uint8_t type;
71 char pad[XENFB_IN_EVENT_SIZE];
72};
73
74/* shared page */
75
76#define XENFB_IN_RING_SIZE 1024
77#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
78#define XENFB_IN_RING_OFFS 1024
79#define XENFB_IN_RING(page) \
80 ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
81#define XENFB_IN_RING_REF(page, idx) \
82 (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
83
84#define XENFB_OUT_RING_SIZE 2048
85#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
86#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
87#define XENFB_OUT_RING(page) \
88 ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
89#define XENFB_OUT_RING_REF(page, idx) \
90 (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
91
92struct xenfb_page {
93 uint32_t in_cons, in_prod;
94 uint32_t out_cons, out_prod;
95
96 int32_t width; /* width of the framebuffer (in pixels) */
97 int32_t height; /* height of the framebuffer (in pixels) */
98 uint32_t line_length; /* length of a row of pixels (in bytes) */
99 uint32_t mem_length; /* length of the framebuffer (in bytes) */
100 uint8_t depth; /* depth of a pixel (in bits) */
101
102 /*
103 * Framebuffer page directory
104 *
105 * Each directory page holds PAGE_SIZE / sizeof(*pd)
106 * framebuffer pages, and can thus map up to PAGE_SIZE *
107 * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and
108 * sizeof(unsigned long) == 4, that's 4 Megs. Two directory
109 * pages should be enough for a while.
110 */
111 unsigned long pd[2];
112};
113
114/*
115 * Wart: xenkbd needs to know resolution. Put it here until a better
116 * solution is found, but don't leak it to the backend.
117 */
118#ifdef __KERNEL__
119#define XENFB_WIDTH 800
120#define XENFB_HEIGHT 600
121#define XENFB_DEPTH 32
122#endif
123
124#endif
diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h
new file mode 100644
index 000000000000..fb97f4284ffd
--- /dev/null
+++ b/include/xen/interface/io/kbdif.h
@@ -0,0 +1,114 @@
1/*
2 * kbdif.h -- Xen virtual keyboard/mouse
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to
6 * deal in the Software without restriction, including without limitation the
7 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 * sell copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 *
22 * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
23 * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
24 */
25
26#ifndef __XEN_PUBLIC_IO_KBDIF_H__
27#define __XEN_PUBLIC_IO_KBDIF_H__
28
29/* In events (backend -> frontend) */
30
31/*
32 * Frontends should ignore unknown in events.
33 */
34
35/* Pointer movement event */
36#define XENKBD_TYPE_MOTION 1
37/* Event type 2 currently not used */
38/* Key event (includes pointer buttons) */
39#define XENKBD_TYPE_KEY 3
40/*
41 * Pointer position event
42 * Capable backend sets feature-abs-pointer in xenstore.
43 * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting
44 * request-abs-update in xenstore.
45 */
46#define XENKBD_TYPE_POS 4
47
48struct xenkbd_motion {
49 uint8_t type; /* XENKBD_TYPE_MOTION */
50 int32_t rel_x; /* relative X motion */
51 int32_t rel_y; /* relative Y motion */
52};
53
54struct xenkbd_key {
55 uint8_t type; /* XENKBD_TYPE_KEY */
56 uint8_t pressed; /* 1 if pressed; 0 otherwise */
57 uint32_t keycode; /* KEY_* from linux/input.h */
58};
59
60struct xenkbd_position {
61 uint8_t type; /* XENKBD_TYPE_POS */
62 int32_t abs_x; /* absolute X position (in FB pixels) */
63 int32_t abs_y; /* absolute Y position (in FB pixels) */
64};
65
66#define XENKBD_IN_EVENT_SIZE 40
67
68union xenkbd_in_event {
69 uint8_t type;
70 struct xenkbd_motion motion;
71 struct xenkbd_key key;
72 struct xenkbd_position pos;
73 char pad[XENKBD_IN_EVENT_SIZE];
74};
75
76/* Out events (frontend -> backend) */
77
78/*
79 * Out events may be sent only when requested by backend, and receipt
80 * of an unknown out event is an error.
81 * No out events currently defined.
82 */
83
84#define XENKBD_OUT_EVENT_SIZE 40
85
86union xenkbd_out_event {
87 uint8_t type;
88 char pad[XENKBD_OUT_EVENT_SIZE];
89};
90
91/* shared page */
92
93#define XENKBD_IN_RING_SIZE 2048
94#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
95#define XENKBD_IN_RING_OFFS 1024
96#define XENKBD_IN_RING(page) \
97 ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
98#define XENKBD_IN_RING_REF(page, idx) \
99 (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
100
101#define XENKBD_OUT_RING_SIZE 1024
102#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
103#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
104#define XENKBD_OUT_RING(page) \
105 ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
106#define XENKBD_OUT_RING_REF(page, idx) \
107 (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
108
109struct xenkbd_page {
110 uint32_t in_cons, in_prod;
111 uint32_t out_cons, out_prod;
112};
113
114#endif
diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h
new file mode 100644
index 000000000000..01fc8ae5f0b0
--- /dev/null
+++ b/include/xen/interface/io/protocols.h
@@ -0,0 +1,21 @@
1#ifndef __XEN_PROTOCOLS_H__
2#define __XEN_PROTOCOLS_H__
3
4#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi"
5#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi"
6#define XEN_IO_PROTO_ABI_IA64 "ia64-abi"
7#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi"
8
9#if defined(__i386__)
10# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
11#elif defined(__x86_64__)
12# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
13#elif defined(__ia64__)
14# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
15#elif defined(__powerpc64__)
16# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64
17#else
18# error arch fixup needed here
19#endif
20
21#endif
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index af36ead16817..da768469aa92 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -29,7 +29,7 @@ struct xen_memory_reservation {
29 * OUT: GMFN bases of extents that were allocated 29 * OUT: GMFN bases of extents that were allocated
30 * (NB. This command also updates the mach_to_phys translation table) 30 * (NB. This command also updates the mach_to_phys translation table)
31 */ 31 */
32 GUEST_HANDLE(ulong) extent_start; 32 ulong extent_start;
33 33
34 /* Number of extents, and size/alignment of each (2^extent_order pages). */ 34 /* Number of extents, and size/alignment of each (2^extent_order pages). */
35 unsigned long nr_extents; 35 unsigned long nr_extents;
@@ -50,7 +50,6 @@ struct xen_memory_reservation {
50 domid_t domid; 50 domid_t domid;
51 51
52}; 52};
53DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
54 53
55/* 54/*
56 * Returns the maximum machine frame number of mapped RAM in this system. 55 * Returns the maximum machine frame number of mapped RAM in this system.
@@ -86,7 +85,7 @@ struct xen_machphys_mfn_list {
86 * any large discontiguities in the machine address space, 2MB gaps in 85 * any large discontiguities in the machine address space, 2MB gaps in
87 * the machphys table will be represented by an MFN base of zero. 86 * the machphys table will be represented by an MFN base of zero.
88 */ 87 */
89 GUEST_HANDLE(ulong) extent_start; 88 ulong extent_start;
90 89
91 /* 90 /*
92 * Number of extents written to the above array. This will be smaller 91 * Number of extents written to the above array. This will be smaller
@@ -94,7 +93,6 @@ struct xen_machphys_mfn_list {
94 */ 93 */
95 unsigned int nr_extents; 94 unsigned int nr_extents;
96}; 95};
97DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
98 96
99/* 97/*
100 * Sets the GPFN at which a particular page appears in the specified guest's 98 * Sets the GPFN at which a particular page appears in the specified guest's
@@ -117,7 +115,6 @@ struct xen_add_to_physmap {
117 /* GPFN where the source mapping page should appear. */ 115 /* GPFN where the source mapping page should appear. */
118 unsigned long gpfn; 116 unsigned long gpfn;
119}; 117};
120DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
121 118
122/* 119/*
123 * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error 120 * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
@@ -132,14 +129,13 @@ struct xen_translate_gpfn_list {
132 unsigned long nr_gpfns; 129 unsigned long nr_gpfns;
133 130
134 /* List of GPFNs to translate. */ 131 /* List of GPFNs to translate. */
135 GUEST_HANDLE(ulong) gpfn_list; 132 ulong gpfn_list;
136 133
137 /* 134 /*
138 * Output list to contain MFN translations. May be the same as the input 135 * Output list to contain MFN translations. May be the same as the input
139 * list (in which case each input GPFN is overwritten with the output MFN). 136 * list (in which case each input GPFN is overwritten with the output MFN).
140 */ 137 */
141 GUEST_HANDLE(ulong) mfn_list; 138 ulong mfn_list;
142}; 139};
143DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
144 140
145#endif /* __XEN_PUBLIC_MEMORY_H__ */ 141#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index b05d8a6d9143..87e6f8a48661 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -85,6 +85,7 @@ struct vcpu_runstate_info {
85 */ 85 */
86 uint64_t time[4]; 86 uint64_t time[4];
87}; 87};
88DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
88 89
89/* VCPU is currently running on a physical CPU. */ 90/* VCPU is currently running on a physical CPU. */
90#define RUNSTATE_running 0 91#define RUNSTATE_running 0
@@ -119,6 +120,7 @@ struct vcpu_runstate_info {
119#define VCPUOP_register_runstate_memory_area 5 120#define VCPUOP_register_runstate_memory_area 5
120struct vcpu_register_runstate_memory_area { 121struct vcpu_register_runstate_memory_area {
121 union { 122 union {
123 GUEST_HANDLE(vcpu_runstate_info) h;
122 struct vcpu_runstate_info *v; 124 struct vcpu_runstate_info *v;
123 uint64_t p; 125 uint64_t p;
124 } addr; 126 } addr;
@@ -134,6 +136,7 @@ struct vcpu_register_runstate_memory_area {
134struct vcpu_set_periodic_timer { 136struct vcpu_set_periodic_timer {
135 uint64_t period_ns; 137 uint64_t period_ns;
136}; 138};
139DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer);
137 140
138/* 141/*
139 * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot 142 * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
@@ -145,6 +148,7 @@ struct vcpu_set_singleshot_timer {
145 uint64_t timeout_abs_ns; 148 uint64_t timeout_abs_ns;
146 uint32_t flags; /* VCPU_SSHOTTMR_??? */ 149 uint32_t flags; /* VCPU_SSHOTTMR_??? */
147}; 150};
151DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer);
148 152
149/* Flags to VCPUOP_set_singleshot_timer. */ 153/* Flags to VCPUOP_set_singleshot_timer. */
150 /* Require the timeout to be in the future (return -ETIME if it's passed). */ 154 /* Require the timeout to be in the future (return -ETIME if it's passed). */
@@ -164,5 +168,6 @@ struct vcpu_register_vcpu_info {
164 uint32_t offset; /* offset within page */ 168 uint32_t offset; /* offset within page */
165 uint32_t rsvd; /* unused */ 169 uint32_t rsvd; /* unused */
166}; 170};
171DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
167 172
168#endif /* __XEN_PUBLIC_VCPU_H__ */ 173#endif /* __XEN_PUBLIC_VCPU_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 518a5bf79ed3..9b018da48cf3 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -58,6 +58,16 @@
58#define __HYPERVISOR_physdev_op 33 58#define __HYPERVISOR_physdev_op 33
59#define __HYPERVISOR_hvm_op 34 59#define __HYPERVISOR_hvm_op 34
60 60
61/* Architecture-specific hypercall definitions. */
62#define __HYPERVISOR_arch_0 48
63#define __HYPERVISOR_arch_1 49
64#define __HYPERVISOR_arch_2 50
65#define __HYPERVISOR_arch_3 51
66#define __HYPERVISOR_arch_4 52
67#define __HYPERVISOR_arch_5 53
68#define __HYPERVISOR_arch_6 54
69#define __HYPERVISOR_arch_7 55
70
61/* 71/*
62 * VIRTUAL INTERRUPTS 72 * VIRTUAL INTERRUPTS
63 * 73 *
@@ -68,8 +78,18 @@
68#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ 78#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
69#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ 79#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
70#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ 80#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
71#define NR_VIRQS 8
72 81
82/* Architecture-specific VIRQ definitions. */
83#define VIRQ_ARCH_0 16
84#define VIRQ_ARCH_1 17
85#define VIRQ_ARCH_2 18
86#define VIRQ_ARCH_3 19
87#define VIRQ_ARCH_4 20
88#define VIRQ_ARCH_5 21
89#define VIRQ_ARCH_6 22
90#define VIRQ_ARCH_7 23
91
92#define NR_VIRQS 24
73/* 93/*
74 * MMU-UPDATE REQUESTS 94 * MMU-UPDATE REQUESTS
75 * 95 *
diff --git a/include/xen/interface/xencomm.h b/include/xen/interface/xencomm.h
new file mode 100644
index 000000000000..ac45e0712afa
--- /dev/null
+++ b/include/xen/interface/xencomm.h
@@ -0,0 +1,41 @@
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy
3 * of this software and associated documentation files (the "Software"), to
4 * deal in the Software without restriction, including without limitation the
5 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6 * sell copies of the Software, and to permit persons to whom the Software is
7 * furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in
10 * all copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
18 * DEALINGS IN THE SOFTWARE.
19 *
20 * Copyright (C) IBM Corp. 2006
21 */
22
23#ifndef _XEN_XENCOMM_H_
24#define _XEN_XENCOMM_H_
25
26/* A xencomm descriptor is a scatter/gather list containing physical
27 * addresses corresponding to a virtually contiguous memory area. The
28 * hypervisor translates these physical addresses to machine addresses to copy
29 * to and from the virtually contiguous area.
30 */
31
32#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */
33#define XENCOMM_INVALID (~0UL)
34
35struct xencomm_desc {
36 uint32_t magic;
37 uint32_t nr_addrs; /* the number of entries in address[] */
38 uint64_t address[0];
39};
40
41#endif /* _XEN_XENCOMM_H_ */
diff --git a/include/xen/page.h b/include/xen/page.h
index 031ef22a971e..eaf85fab1263 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -1,180 +1 @@
1#ifndef __XEN_PAGE_H #include <asm/xen/page.h>
2#define __XEN_PAGE_H
3
4#include <linux/pfn.h>
5
6#include <asm/uaccess.h>
7#include <asm/pgtable.h>
8
9#include <xen/features.h>
10
11#ifdef CONFIG_X86_PAE
12/* Xen machine address */
13typedef struct xmaddr {
14 unsigned long long maddr;
15} xmaddr_t;
16
17/* Xen pseudo-physical address */
18typedef struct xpaddr {
19 unsigned long long paddr;
20} xpaddr_t;
21#else
22/* Xen machine address */
23typedef struct xmaddr {
24 unsigned long maddr;
25} xmaddr_t;
26
27/* Xen pseudo-physical address */
28typedef struct xpaddr {
29 unsigned long paddr;
30} xpaddr_t;
31#endif
32
33#define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
34#define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
35
36/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
37#define INVALID_P2M_ENTRY (~0UL)
38#define FOREIGN_FRAME_BIT (1UL<<31)
39#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
40
41extern unsigned long *phys_to_machine_mapping;
42
43static inline unsigned long pfn_to_mfn(unsigned long pfn)
44{
45 if (xen_feature(XENFEAT_auto_translated_physmap))
46 return pfn;
47
48 return phys_to_machine_mapping[(unsigned int)(pfn)] &
49 ~FOREIGN_FRAME_BIT;
50}
51
52static inline int phys_to_machine_mapping_valid(unsigned long pfn)
53{
54 if (xen_feature(XENFEAT_auto_translated_physmap))
55 return 1;
56
57 return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
58}
59
60static inline unsigned long mfn_to_pfn(unsigned long mfn)
61{
62 unsigned long pfn;
63
64 if (xen_feature(XENFEAT_auto_translated_physmap))
65 return mfn;
66
67#if 0
68 if (unlikely((mfn >> machine_to_phys_order) != 0))
69 return max_mapnr;
70#endif
71
72 pfn = 0;
73 /*
74 * The array access can fail (e.g., device space beyond end of RAM).
75 * In such cases it doesn't matter what we return (we return garbage),
76 * but we must handle the fault without crashing!
77 */
78 __get_user(pfn, &machine_to_phys_mapping[mfn]);
79
80 return pfn;
81}
82
83static inline xmaddr_t phys_to_machine(xpaddr_t phys)
84{
85 unsigned offset = phys.paddr & ~PAGE_MASK;
86 return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
87}
88
89static inline xpaddr_t machine_to_phys(xmaddr_t machine)
90{
91 unsigned offset = machine.maddr & ~PAGE_MASK;
92 return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
93}
94
95/*
96 * We detect special mappings in one of two ways:
97 * 1. If the MFN is an I/O page then Xen will set the m2p entry
98 * to be outside our maximum possible pseudophys range.
99 * 2. If the MFN belongs to a different domain then we will certainly
100 * not have MFN in our p2m table. Conversely, if the page is ours,
101 * then we'll have p2m(m2p(MFN))==MFN.
102 * If we detect a special mapping then it doesn't have a 'struct page'.
103 * We force !pfn_valid() by returning an out-of-range pointer.
104 *
105 * NB. These checks require that, for any MFN that is not in our reservation,
106 * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
107 * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
108 * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
109 *
110 * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
111 * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
112 * require. In all the cases we care about, the FOREIGN_FRAME bit is
113 * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
114 */
115static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
116{
117 extern unsigned long max_mapnr;
118 unsigned long pfn = mfn_to_pfn(mfn);
119 if ((pfn < max_mapnr)
120 && !xen_feature(XENFEAT_auto_translated_physmap)
121 && (phys_to_machine_mapping[pfn] != mfn))
122 return max_mapnr; /* force !pfn_valid() */
123 return pfn;
124}
125
126static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
127{
128 if (xen_feature(XENFEAT_auto_translated_physmap)) {
129 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
130 return;
131 }
132 phys_to_machine_mapping[pfn] = mfn;
133}
134
135/* VIRT <-> MACHINE conversion */
136#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
137#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
138#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
139
140#ifdef CONFIG_X86_PAE
141#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
142 (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)))
143
144static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
145{
146 pte_t pte;
147
148 pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) |
149 (pgprot_val(pgprot) >> 32);
150 pte.pte_high &= (__supported_pte_mask >> 32);
151 pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
152 pte.pte_low &= __supported_pte_mask;
153
154 return pte;
155}
156
157static inline unsigned long long pte_val_ma(pte_t x)
158{
159 return x.pte;
160}
161#define pmd_val_ma(v) ((v).pmd)
162#define pud_val_ma(v) ((v).pgd.pgd)
163#define __pte_ma(x) ((pte_t) { .pte = (x) })
164#define __pmd_ma(x) ((pmd_t) { (x) } )
165#else /* !X86_PAE */
166#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
167#define mfn_pte(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
168#define pte_val_ma(x) ((x).pte)
169#define pmd_val_ma(v) ((v).pud.pgd.pgd)
170#define __pte_ma(x) ((pte_t) { (x) } )
171#endif /* CONFIG_X86_PAE */
172
173#define pgd_val_ma(x) ((x).pgd)
174
175
176xmaddr_t arbitrary_virt_to_machine(unsigned long address);
177void make_lowmem_page_readonly(void *vaddr);
178void make_lowmem_page_readwrite(void *vaddr);
179
180#endif /* __XEN_PAGE_H */
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
new file mode 100644
index 000000000000..10ddfe0142d0
--- /dev/null
+++ b/include/xen/xen-ops.h
@@ -0,0 +1,8 @@
1#ifndef INCLUDE_XEN_OPS_H
2#define INCLUDE_XEN_OPS_H
3
4#include <linux/percpu.h>
5
6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
7
8#endif /* INCLUDE_XEN_OPS_H */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 6f7c290651ae..6369d89c25d5 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -97,6 +97,7 @@ struct xenbus_driver {
97 int (*uevent)(struct xenbus_device *, char **, int, char *, int); 97 int (*uevent)(struct xenbus_device *, char **, int, char *, int);
98 struct device_driver driver; 98 struct device_driver driver;
99 int (*read_otherend_details)(struct xenbus_device *dev); 99 int (*read_otherend_details)(struct xenbus_device *dev);
100 int (*is_ready)(struct xenbus_device *dev);
100}; 101};
101 102
102static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) 103static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
diff --git a/include/xen/xencomm.h b/include/xen/xencomm.h
new file mode 100644
index 000000000000..e43b039be112
--- /dev/null
+++ b/include/xen/xencomm.h
@@ -0,0 +1,77 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 *
16 * Copyright (C) IBM Corp. 2006
17 *
18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
19 * Jerone Young <jyoung5@us.ibm.com>
20 */
21
22#ifndef _LINUX_XENCOMM_H_
23#define _LINUX_XENCOMM_H_
24
25#include <xen/interface/xencomm.h>
26
27#define XENCOMM_MINI_ADDRS 3
28struct xencomm_mini {
29 struct xencomm_desc _desc;
30 uint64_t address[XENCOMM_MINI_ADDRS];
31};
32
33/* To avoid additionnal virt to phys conversion, an opaque structure is
34 presented. */
35struct xencomm_handle;
36
37extern void xencomm_free(struct xencomm_handle *desc);
38extern struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes);
39extern struct xencomm_handle *__xencomm_map_no_alloc(void *ptr,
40 unsigned long bytes, struct xencomm_mini *xc_area);
41
42#if 0
43#define XENCOMM_MINI_ALIGNED(xc_desc, n) \
44 struct xencomm_mini xc_desc ## _base[(n)] \
45 __attribute__((__aligned__(sizeof(struct xencomm_mini)))); \
46 struct xencomm_mini *xc_desc = &xc_desc ## _base[0];
47#else
48/*
49 * gcc bug workaround:
50 * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16660
51 * gcc doesn't handle properly stack variable with
52 * __attribute__((__align__(sizeof(struct xencomm_mini))))
53 */
54#define XENCOMM_MINI_ALIGNED(xc_desc, n) \
55 unsigned char xc_desc ## _base[((n) + 1 ) * \
56 sizeof(struct xencomm_mini)]; \
57 struct xencomm_mini *xc_desc = (struct xencomm_mini *) \
58 ((unsigned long)xc_desc ## _base + \
59 (sizeof(struct xencomm_mini) - \
60 ((unsigned long)xc_desc ## _base) % \
61 sizeof(struct xencomm_mini)));
62#endif
63#define xencomm_map_no_alloc(ptr, bytes) \
64 ({ XENCOMM_MINI_ALIGNED(xc_desc, 1); \
65 __xencomm_map_no_alloc(ptr, bytes, xc_desc); })
66
67/* provided by architecture code: */
68extern unsigned long xencomm_vtop(unsigned long vaddr);
69
70static inline void *xencomm_pa(void *ptr)
71{
72 return (void *)xencomm_vtop((unsigned long)ptr);
73}
74
75#define xen_guest_handle(hnd) ((hnd).p)
76
77#endif /* _LINUX_XENCOMM_H_ */