diff options
67 files changed, 3607 insertions, 921 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0f8934fc303..2a609dc3271c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -409,7 +409,7 @@ restore_nocheck_notrace: | |||
409 | irq_return: | 409 | irq_return: |
410 | INTERRUPT_RETURN | 410 | INTERRUPT_RETURN |
411 | .section .fixup,"ax" | 411 | .section .fixup,"ax" |
412 | iret_exc: | 412 | ENTRY(iret_exc) |
413 | pushl $0 # no error code | 413 | pushl $0 # no error code |
414 | pushl $do_iret_error | 414 | pushl $do_iret_error |
415 | jmp error_code | 415 | jmp error_code |
@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper) | |||
1017 | ENDPROC(kernel_thread_helper) | 1017 | ENDPROC(kernel_thread_helper) |
1018 | 1018 | ||
1019 | #ifdef CONFIG_XEN | 1019 | #ifdef CONFIG_XEN |
1020 | /* Xen doesn't set %esp to be precisely what the normal sysenter | ||
1021 | entrypoint expects, so fix it up before using the normal path. */ | ||
1022 | ENTRY(xen_sysenter_target) | ||
1023 | RING0_INT_FRAME | ||
1024 | addl $5*4, %esp /* remove xen-provided frame */ | ||
1025 | jmp sysenter_past_esp | ||
1026 | |||
1020 | ENTRY(xen_hypervisor_callback) | 1027 | ENTRY(xen_hypervisor_callback) |
1021 | CFI_STARTPROC | 1028 | CFI_STARTPROC |
1022 | pushl $0 | 1029 | pushl $0 |
@@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback) | |||
1035 | cmpl $xen_iret_end_crit,%eax | 1042 | cmpl $xen_iret_end_crit,%eax |
1036 | jae 1f | 1043 | jae 1f |
1037 | 1044 | ||
1038 | call xen_iret_crit_fixup | 1045 | jmp xen_iret_crit_fixup |
1039 | 1046 | ||
1047 | ENTRY(xen_do_upcall) | ||
1040 | 1: mov %esp, %eax | 1048 | 1: mov %esp, %eax |
1041 | call xen_evtchn_do_upcall | 1049 | call xen_evtchn_do_upcall |
1042 | jmp ret_from_intr | 1050 | jmp ret_from_intr |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 3733412d1357..74f0c5ea2a03 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
366 | .flush_tlb_single = native_flush_tlb_single, | 366 | .flush_tlb_single = native_flush_tlb_single, |
367 | .flush_tlb_others = native_flush_tlb_others, | 367 | .flush_tlb_others = native_flush_tlb_others, |
368 | 368 | ||
369 | .alloc_pt = paravirt_nop, | 369 | .alloc_pte = paravirt_nop, |
370 | .alloc_pd = paravirt_nop, | 370 | .alloc_pmd = paravirt_nop, |
371 | .alloc_pd_clone = paravirt_nop, | 371 | .alloc_pmd_clone = paravirt_nop, |
372 | .release_pt = paravirt_nop, | 372 | .alloc_pud = paravirt_nop, |
373 | .release_pd = paravirt_nop, | 373 | .release_pte = paravirt_nop, |
374 | .release_pmd = paravirt_nop, | ||
375 | .release_pud = paravirt_nop, | ||
374 | 376 | ||
375 | .set_pte = native_set_pte, | 377 | .set_pte = native_set_pte, |
376 | .set_pte_at = native_set_pte_at, | 378 | .set_pte_at = native_set_pte_at, |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 19c9386ac118..1791a751a772 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <asm/apic.h> | 8 | #include <asm/apic.h> |
9 | #include <asm/desc.h> | 9 | #include <asm/desc.h> |
10 | #include <asm/hpet.h> | 10 | #include <asm/hpet.h> |
11 | #include <asm/pgtable.h> | ||
11 | #include <asm/reboot_fixups.h> | 12 | #include <asm/reboot_fixups.h> |
12 | #include <asm/reboot.h> | 13 | #include <asm/reboot.h> |
13 | 14 | ||
@@ -15,7 +16,6 @@ | |||
15 | # include <linux/dmi.h> | 16 | # include <linux/dmi.h> |
16 | # include <linux/ctype.h> | 17 | # include <linux/ctype.h> |
17 | # include <linux/mc146818rtc.h> | 18 | # include <linux/mc146818rtc.h> |
18 | # include <asm/pgtable.h> | ||
19 | #else | 19 | #else |
20 | # include <asm/iommu.h> | 20 | # include <asm/iommu.h> |
21 | #endif | 21 | #endif |
@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length) | |||
275 | /* Remap the kernel at virtual address zero, as well as offset zero | 275 | /* Remap the kernel at virtual address zero, as well as offset zero |
276 | from the kernel segment. This assumes the kernel segment starts at | 276 | from the kernel segment. This assumes the kernel segment starts at |
277 | virtual address PAGE_OFFSET. */ | 277 | virtual address PAGE_OFFSET. */ |
278 | memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 278 | memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
279 | sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); | 279 | sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); |
280 | 280 | ||
281 | /* | 281 | /* |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ade371f9663a..eef79e84145f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
1039 | 1039 | ||
1040 | #ifdef CONFIG_X86_32 | 1040 | #ifdef CONFIG_X86_32 |
1041 | /* init low mem mapping */ | 1041 | /* init low mem mapping */ |
1042 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 1042 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
1043 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); | 1043 | min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); |
1044 | flush_tlb_all(); | 1044 | flush_tlb_all(); |
1045 | #endif | 1045 | #endif |
1046 | 1046 | ||
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 12affe1f9bce..956f38927aa7 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page) | |||
320 | * pdes need to be zeroed. | 320 | * pdes need to be zeroed. |
321 | */ | 321 | */ |
322 | if (type & VMI_PAGE_CLONE) | 322 | if (type & VMI_PAGE_CLONE) |
323 | limit = USER_PTRS_PER_PGD; | 323 | limit = KERNEL_PGD_BOUNDARY; |
324 | for (i = 0; i < limit; i++) | 324 | for (i = 0; i < limit; i++) |
325 | BUG_ON(ptr[i]); | 325 | BUG_ON(ptr[i]); |
326 | } | 326 | } |
@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | |||
392 | } | 392 | } |
393 | #endif | 393 | #endif |
394 | 394 | ||
395 | static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) | 395 | static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) |
396 | { | 396 | { |
397 | vmi_set_page_type(pfn, VMI_PAGE_L1); | 397 | vmi_set_page_type(pfn, VMI_PAGE_L1); |
398 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 398 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
399 | } | 399 | } |
400 | 400 | ||
401 | static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) | 401 | static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) |
402 | { | 402 | { |
403 | /* | 403 | /* |
404 | * This call comes in very early, before mem_map is setup. | 404 | * This call comes in very early, before mem_map is setup. |
@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) | |||
409 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | 409 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); |
410 | } | 410 | } |
411 | 411 | ||
412 | static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) | 412 | static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) |
413 | { | 413 | { |
414 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | 414 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); |
415 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | 415 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); |
416 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | 416 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); |
417 | } | 417 | } |
418 | 418 | ||
419 | static void vmi_release_pt(u32 pfn) | 419 | static void vmi_release_pte(u32 pfn) |
420 | { | 420 | { |
421 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | 421 | vmi_ops.release_page(pfn, VMI_PAGE_L1); |
422 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 422 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); |
423 | } | 423 | } |
424 | 424 | ||
425 | static void vmi_release_pd(u32 pfn) | 425 | static void vmi_release_pmd(u32 pfn) |
426 | { | 426 | { |
427 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | 427 | vmi_ops.release_page(pfn, VMI_PAGE_L2); |
428 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 428 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); |
@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void) | |||
871 | 871 | ||
872 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | 872 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); |
873 | if (vmi_ops.allocate_page) { | 873 | if (vmi_ops.allocate_page) { |
874 | pv_mmu_ops.alloc_pt = vmi_allocate_pt; | 874 | pv_mmu_ops.alloc_pte = vmi_allocate_pte; |
875 | pv_mmu_ops.alloc_pd = vmi_allocate_pd; | 875 | pv_mmu_ops.alloc_pmd = vmi_allocate_pmd; |
876 | pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone; | 876 | pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone; |
877 | } | 877 | } |
878 | 878 | ||
879 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | 879 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); |
880 | if (vmi_ops.release_page) { | 880 | if (vmi_ops.release_page) { |
881 | pv_mmu_ops.release_pt = vmi_release_pt; | 881 | pv_mmu_ops.release_pte = vmi_release_pte; |
882 | pv_mmu_ops.release_pd = vmi_release_pd; | 882 | pv_mmu_ops.release_pmd = vmi_release_pmd; |
883 | } | 883 | } |
884 | 884 | ||
885 | /* Set linear is needed in all cases */ | 885 | /* Set linear is needed in all cases */ |
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index d05722121d24..6e2c4efce0ef 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c | |||
@@ -543,8 +543,8 @@ static void __init do_boot_cpu(__u8 cpu) | |||
543 | hijack_source.idt.Offset, stack_start.sp)); | 543 | hijack_source.idt.Offset, stack_start.sp)); |
544 | 544 | ||
545 | /* init lowmem identity mapping */ | 545 | /* init lowmem identity mapping */ |
546 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 546 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
547 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); | 547 | min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); |
548 | flush_tlb_all(); | 548 | flush_tlb_all(); |
549 | 549 | ||
550 | if (quad_boot) { | 550 | if (quad_boot) { |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 20941d2954e2..b7b3e4c7cfc9 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
2 | pat.o | 2 | pat.o pgtable.o |
3 | 3 | ||
4 | obj-$(CONFIG_X86_32) += pgtable_32.o | 4 | obj-$(CONFIG_X86_32) += pgtable_32.o |
5 | 5 | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9ec62da85fd7..08aa1878fad4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
71 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 71 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
72 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 72 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
73 | 73 | ||
74 | paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 74 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
75 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 75 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
76 | pud = pud_offset(pgd, 0); | 76 | pud = pud_offset(pgd, 0); |
77 | BUG_ON(pmd_table != pmd_offset(pud, 0)); | 77 | BUG_ON(pmd_table != pmd_offset(pud, 0)); |
@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
100 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 100 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
101 | } | 101 | } |
102 | 102 | ||
103 | paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); | 103 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); |
104 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 104 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
105 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); | 105 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); |
106 | } | 106 | } |
@@ -365,7 +365,7 @@ void __init native_pagetable_setup_start(pgd_t *base) | |||
365 | 365 | ||
366 | pte_clear(NULL, va, pte); | 366 | pte_clear(NULL, va, pte); |
367 | } | 367 | } |
368 | paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); | 368 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); |
369 | } | 369 | } |
370 | 370 | ||
371 | void __init native_pagetable_setup_done(pgd_t *base) | 371 | void __init native_pagetable_setup_done(pgd_t *base) |
@@ -457,7 +457,7 @@ void zap_low_mappings(void) | |||
457 | * Note that "pgd_clear()" doesn't do it for | 457 | * Note that "pgd_clear()" doesn't do it for |
458 | * us, because pgd_clear() is a no-op on i386. | 458 | * us, because pgd_clear() is a no-op on i386. |
459 | */ | 459 | */ |
460 | for (i = 0; i < USER_PTRS_PER_PGD; i++) { | 460 | for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { |
461 | #ifdef CONFIG_X86_PAE | 461 | #ifdef CONFIG_X86_PAE |
462 | set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); | 462 | set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); |
463 | #else | 463 | #else |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 3a4baf95e24d..36a3f7ded626 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -407,7 +407,7 @@ void __init early_ioremap_clear(void) | |||
407 | 407 | ||
408 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); | 408 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); |
409 | pmd_clear(pmd); | 409 | pmd_clear(pmd); |
410 | paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); | 410 | paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); |
411 | __flush_tlb_all(); | 411 | __flush_tlb_all(); |
412 | } | 412 | } |
413 | 413 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index c29ebd037254..bd5e05c654dc 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
483 | goto out_unlock; | 483 | goto out_unlock; |
484 | 484 | ||
485 | pbase = (pte_t *)page_address(base); | 485 | pbase = (pte_t *)page_address(base); |
486 | #ifdef CONFIG_X86_32 | 486 | paravirt_alloc_pte(&init_mm, page_to_pfn(base)); |
487 | paravirt_alloc_pt(&init_mm, page_to_pfn(base)); | ||
488 | #endif | ||
489 | ref_prot = pte_pgprot(pte_clrhuge(*kpte)); | 487 | ref_prot = pte_pgprot(pte_clrhuge(*kpte)); |
490 | 488 | ||
491 | #ifdef CONFIG_X86_64 | 489 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c new file mode 100644 index 000000000000..50159764f694 --- /dev/null +++ b/arch/x86/mm/pgtable.c | |||
@@ -0,0 +1,276 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <asm/pgalloc.h> | ||
3 | #include <asm/pgtable.h> | ||
4 | #include <asm/tlb.h> | ||
5 | |||
6 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | ||
7 | { | ||
8 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | ||
9 | } | ||
10 | |||
11 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | ||
12 | { | ||
13 | struct page *pte; | ||
14 | |||
15 | #ifdef CONFIG_HIGHPTE | ||
16 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | ||
17 | #else | ||
18 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | ||
19 | #endif | ||
20 | if (pte) | ||
21 | pgtable_page_ctor(pte); | ||
22 | return pte; | ||
23 | } | ||
24 | |||
25 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | ||
26 | { | ||
27 | pgtable_page_dtor(pte); | ||
28 | paravirt_release_pte(page_to_pfn(pte)); | ||
29 | tlb_remove_page(tlb, pte); | ||
30 | } | ||
31 | |||
32 | #if PAGETABLE_LEVELS > 2 | ||
33 | void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | ||
34 | { | ||
35 | paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); | ||
36 | tlb_remove_page(tlb, virt_to_page(pmd)); | ||
37 | } | ||
38 | |||
39 | #if PAGETABLE_LEVELS > 3 | ||
40 | void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | ||
41 | { | ||
42 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); | ||
43 | tlb_remove_page(tlb, virt_to_page(pud)); | ||
44 | } | ||
45 | #endif /* PAGETABLE_LEVELS > 3 */ | ||
46 | #endif /* PAGETABLE_LEVELS > 2 */ | ||
47 | |||
48 | static inline void pgd_list_add(pgd_t *pgd) | ||
49 | { | ||
50 | struct page *page = virt_to_page(pgd); | ||
51 | |||
52 | list_add(&page->lru, &pgd_list); | ||
53 | } | ||
54 | |||
55 | static inline void pgd_list_del(pgd_t *pgd) | ||
56 | { | ||
57 | struct page *page = virt_to_page(pgd); | ||
58 | |||
59 | list_del(&page->lru); | ||
60 | } | ||
61 | |||
62 | #define UNSHARED_PTRS_PER_PGD \ | ||
63 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) | ||
64 | |||
65 | static void pgd_ctor(void *p) | ||
66 | { | ||
67 | pgd_t *pgd = p; | ||
68 | unsigned long flags; | ||
69 | |||
70 | /* Clear usermode parts of PGD */ | ||
71 | memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t)); | ||
72 | |||
73 | spin_lock_irqsave(&pgd_lock, flags); | ||
74 | |||
75 | /* If the pgd points to a shared pagetable level (either the | ||
76 | ptes in non-PAE, or shared PMD in PAE), then just copy the | ||
77 | references from swapper_pg_dir. */ | ||
78 | if (PAGETABLE_LEVELS == 2 || | ||
79 | (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || | ||
80 | PAGETABLE_LEVELS == 4) { | ||
81 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | ||
82 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
83 | KERNEL_PGD_PTRS); | ||
84 | paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
85 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
86 | KERNEL_PGD_BOUNDARY, | ||
87 | KERNEL_PGD_PTRS); | ||
88 | } | ||
89 | |||
90 | /* list required to sync kernel mapping updates */ | ||
91 | if (!SHARED_KERNEL_PMD) | ||
92 | pgd_list_add(pgd); | ||
93 | |||
94 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
95 | } | ||
96 | |||
97 | static void pgd_dtor(void *pgd) | ||
98 | { | ||
99 | unsigned long flags; /* can be called from interrupt context */ | ||
100 | |||
101 | if (SHARED_KERNEL_PMD) | ||
102 | return; | ||
103 | |||
104 | spin_lock_irqsave(&pgd_lock, flags); | ||
105 | pgd_list_del(pgd); | ||
106 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * List of all pgd's needed for non-PAE so it can invalidate entries | ||
111 | * in both cached and uncached pgd's; not needed for PAE since the | ||
112 | * kernel pmd is shared. If PAE were not to share the pmd a similar | ||
113 | * tactic would be needed. This is essentially codepath-based locking | ||
114 | * against pageattr.c; it is the unique case in which a valid change | ||
115 | * of kernel pagetables can't be lazily synchronized by vmalloc faults. | ||
116 | * vmalloc faults work because attached pagetables are never freed. | ||
117 | * -- wli | ||
118 | */ | ||
119 | |||
120 | #ifdef CONFIG_X86_PAE | ||
121 | /* | ||
122 | * Mop up any pmd pages which may still be attached to the pgd. | ||
123 | * Normally they will be freed by munmap/exit_mmap, but any pmd we | ||
124 | * preallocate which never got a corresponding vma will need to be | ||
125 | * freed manually. | ||
126 | */ | ||
127 | static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | ||
128 | { | ||
129 | int i; | ||
130 | |||
131 | for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { | ||
132 | pgd_t pgd = pgdp[i]; | ||
133 | |||
134 | if (pgd_val(pgd) != 0) { | ||
135 | pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); | ||
136 | |||
137 | pgdp[i] = native_make_pgd(0); | ||
138 | |||
139 | paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); | ||
140 | pmd_free(mm, pmd); | ||
141 | } | ||
142 | } | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * In PAE mode, we need to do a cr3 reload (=tlb flush) when | ||
147 | * updating the top-level pagetable entries to guarantee the | ||
148 | * processor notices the update. Since this is expensive, and | ||
149 | * all 4 top-level entries are used almost immediately in a | ||
150 | * new process's life, we just pre-populate them here. | ||
151 | * | ||
152 | * Also, if we're in a paravirt environment where the kernel pmd is | ||
153 | * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate | ||
154 | * and initialize the kernel pmds here. | ||
155 | */ | ||
156 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
157 | { | ||
158 | pud_t *pud; | ||
159 | unsigned long addr; | ||
160 | int i; | ||
161 | |||
162 | pud = pud_offset(pgd, 0); | ||
163 | for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; | ||
164 | i++, pud++, addr += PUD_SIZE) { | ||
165 | pmd_t *pmd = pmd_alloc_one(mm, addr); | ||
166 | |||
167 | if (!pmd) { | ||
168 | pgd_mop_up_pmds(mm, pgd); | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | if (i >= KERNEL_PGD_BOUNDARY) | ||
173 | memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), | ||
174 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
175 | |||
176 | pud_populate(mm, pud, pmd); | ||
177 | } | ||
178 | |||
179 | return 1; | ||
180 | } | ||
181 | |||
182 | void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | ||
183 | { | ||
184 | paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); | ||
185 | |||
186 | /* Note: almost everything apart from _PAGE_PRESENT is | ||
187 | reserved at the pmd (PDPT) level. */ | ||
188 | set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); | ||
189 | |||
190 | /* | ||
191 | * According to Intel App note "TLBs, Paging-Structure Caches, | ||
192 | * and Their Invalidation", April 2007, document 317080-001, | ||
193 | * section 8.1: in PAE mode we explicitly have to flush the | ||
194 | * TLB via cr3 if the top-level pgd is changed... | ||
195 | */ | ||
196 | if (mm == current->active_mm) | ||
197 | write_cr3(read_cr3()); | ||
198 | } | ||
199 | #else /* !CONFIG_X86_PAE */ | ||
200 | /* No need to prepopulate any pagetable entries in non-PAE modes. */ | ||
201 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
202 | { | ||
203 | return 1; | ||
204 | } | ||
205 | |||
206 | static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd) | ||
207 | { | ||
208 | } | ||
209 | #endif /* CONFIG_X86_PAE */ | ||
210 | |||
211 | pgd_t *pgd_alloc(struct mm_struct *mm) | ||
212 | { | ||
213 | pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
214 | |||
215 | /* so that alloc_pmd can use it */ | ||
216 | mm->pgd = pgd; | ||
217 | if (pgd) | ||
218 | pgd_ctor(pgd); | ||
219 | |||
220 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { | ||
221 | pgd_dtor(pgd); | ||
222 | free_page((unsigned long)pgd); | ||
223 | pgd = NULL; | ||
224 | } | ||
225 | |||
226 | return pgd; | ||
227 | } | ||
228 | |||
229 | void pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
230 | { | ||
231 | pgd_mop_up_pmds(mm, pgd); | ||
232 | pgd_dtor(pgd); | ||
233 | free_page((unsigned long)pgd); | ||
234 | } | ||
235 | |||
236 | int ptep_set_access_flags(struct vm_area_struct *vma, | ||
237 | unsigned long address, pte_t *ptep, | ||
238 | pte_t entry, int dirty) | ||
239 | { | ||
240 | int changed = !pte_same(*ptep, entry); | ||
241 | |||
242 | if (changed && dirty) { | ||
243 | *ptep = entry; | ||
244 | pte_update_defer(vma->vm_mm, address, ptep); | ||
245 | flush_tlb_page(vma, address); | ||
246 | } | ||
247 | |||
248 | return changed; | ||
249 | } | ||
250 | |||
251 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | ||
252 | unsigned long addr, pte_t *ptep) | ||
253 | { | ||
254 | int ret = 0; | ||
255 | |||
256 | if (pte_young(*ptep)) | ||
257 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
258 | &ptep->pte); | ||
259 | |||
260 | if (ret) | ||
261 | pte_update(vma->vm_mm, addr, ptep); | ||
262 | |||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | int ptep_clear_flush_young(struct vm_area_struct *vma, | ||
267 | unsigned long address, pte_t *ptep) | ||
268 | { | ||
269 | int young; | ||
270 | |||
271 | young = ptep_test_and_clear_young(vma, address, ptep); | ||
272 | if (young) | ||
273 | flush_tlb_page(vma, address); | ||
274 | |||
275 | return young; | ||
276 | } | ||
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6fb9e7c6893f..9ee007be9142 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve) | |||
173 | __VMALLOC_RESERVE += reserve; | 173 | __VMALLOC_RESERVE += reserve; |
174 | } | 174 | } |
175 | 175 | ||
176 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | ||
177 | { | ||
178 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | ||
179 | } | ||
180 | |||
181 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | ||
182 | { | ||
183 | struct page *pte; | ||
184 | |||
185 | #ifdef CONFIG_HIGHPTE | ||
186 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | ||
187 | #else | ||
188 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | ||
189 | #endif | ||
190 | if (pte) | ||
191 | pgtable_page_ctor(pte); | ||
192 | return pte; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * List of all pgd's needed for non-PAE so it can invalidate entries | ||
197 | * in both cached and uncached pgd's; not needed for PAE since the | ||
198 | * kernel pmd is shared. If PAE were not to share the pmd a similar | ||
199 | * tactic would be needed. This is essentially codepath-based locking | ||
200 | * against pageattr.c; it is the unique case in which a valid change | ||
201 | * of kernel pagetables can't be lazily synchronized by vmalloc faults. | ||
202 | * vmalloc faults work because attached pagetables are never freed. | ||
203 | * -- wli | ||
204 | */ | ||
205 | static inline void pgd_list_add(pgd_t *pgd) | ||
206 | { | ||
207 | struct page *page = virt_to_page(pgd); | ||
208 | |||
209 | list_add(&page->lru, &pgd_list); | ||
210 | } | ||
211 | |||
212 | static inline void pgd_list_del(pgd_t *pgd) | ||
213 | { | ||
214 | struct page *page = virt_to_page(pgd); | ||
215 | |||
216 | list_del(&page->lru); | ||
217 | } | ||
218 | |||
219 | #define UNSHARED_PTRS_PER_PGD \ | ||
220 | (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | ||
221 | |||
222 | static void pgd_ctor(void *p) | ||
223 | { | ||
224 | pgd_t *pgd = p; | ||
225 | unsigned long flags; | ||
226 | |||
227 | /* Clear usermode parts of PGD */ | ||
228 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | ||
229 | |||
230 | spin_lock_irqsave(&pgd_lock, flags); | ||
231 | |||
232 | /* If the pgd points to a shared pagetable level (either the | ||
233 | ptes in non-PAE, or shared PMD in PAE), then just copy the | ||
234 | references from swapper_pg_dir. */ | ||
235 | if (PAGETABLE_LEVELS == 2 || | ||
236 | (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) { | ||
237 | clone_pgd_range(pgd + USER_PTRS_PER_PGD, | ||
238 | swapper_pg_dir + USER_PTRS_PER_PGD, | ||
239 | KERNEL_PGD_PTRS); | ||
240 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
241 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
242 | USER_PTRS_PER_PGD, | ||
243 | KERNEL_PGD_PTRS); | ||
244 | } | ||
245 | |||
246 | /* list required to sync kernel mapping updates */ | ||
247 | if (!SHARED_KERNEL_PMD) | ||
248 | pgd_list_add(pgd); | ||
249 | |||
250 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
251 | } | ||
252 | |||
253 | static void pgd_dtor(void *pgd) | ||
254 | { | ||
255 | unsigned long flags; /* can be called from interrupt context */ | ||
256 | |||
257 | if (SHARED_KERNEL_PMD) | ||
258 | return; | ||
259 | |||
260 | spin_lock_irqsave(&pgd_lock, flags); | ||
261 | pgd_list_del(pgd); | ||
262 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
263 | } | ||
264 | |||
265 | #ifdef CONFIG_X86_PAE | ||
266 | /* | ||
267 | * Mop up any pmd pages which may still be attached to the pgd. | ||
268 | * Normally they will be freed by munmap/exit_mmap, but any pmd we | ||
269 | * preallocate which never got a corresponding vma will need to be | ||
270 | * freed manually. | ||
271 | */ | ||
272 | static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | ||
273 | { | ||
274 | int i; | ||
275 | |||
276 | for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { | ||
277 | pgd_t pgd = pgdp[i]; | ||
278 | |||
279 | if (pgd_val(pgd) != 0) { | ||
280 | pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); | ||
281 | |||
282 | pgdp[i] = native_make_pgd(0); | ||
283 | |||
284 | paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT); | ||
285 | pmd_free(mm, pmd); | ||
286 | } | ||
287 | } | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * In PAE mode, we need to do a cr3 reload (=tlb flush) when | ||
292 | * updating the top-level pagetable entries to guarantee the | ||
293 | * processor notices the update. Since this is expensive, and | ||
294 | * all 4 top-level entries are used almost immediately in a | ||
295 | * new process's life, we just pre-populate them here. | ||
296 | * | ||
297 | * Also, if we're in a paravirt environment where the kernel pmd is | ||
298 | * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate | ||
299 | * and initialize the kernel pmds here. | ||
300 | */ | ||
301 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
302 | { | ||
303 | pud_t *pud; | ||
304 | unsigned long addr; | ||
305 | int i; | ||
306 | |||
307 | pud = pud_offset(pgd, 0); | ||
308 | for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; | ||
309 | i++, pud++, addr += PUD_SIZE) { | ||
310 | pmd_t *pmd = pmd_alloc_one(mm, addr); | ||
311 | |||
312 | if (!pmd) { | ||
313 | pgd_mop_up_pmds(mm, pgd); | ||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | if (i >= USER_PTRS_PER_PGD) | ||
318 | memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), | ||
319 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
320 | |||
321 | pud_populate(mm, pud, pmd); | ||
322 | } | ||
323 | |||
324 | return 1; | ||
325 | } | ||
326 | #else /* !CONFIG_X86_PAE */ | ||
327 | /* No need to prepopulate any pagetable entries in non-PAE modes. */ | ||
328 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
329 | { | ||
330 | return 1; | ||
331 | } | ||
332 | |||
333 | static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | ||
334 | { | ||
335 | } | ||
336 | #endif /* CONFIG_X86_PAE */ | ||
337 | |||
338 | pgd_t *pgd_alloc(struct mm_struct *mm) | ||
339 | { | ||
340 | pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
341 | |||
342 | /* so that alloc_pd can use it */ | ||
343 | mm->pgd = pgd; | ||
344 | if (pgd) | ||
345 | pgd_ctor(pgd); | ||
346 | |||
347 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { | ||
348 | pgd_dtor(pgd); | ||
349 | free_page((unsigned long)pgd); | ||
350 | pgd = NULL; | ||
351 | } | ||
352 | |||
353 | return pgd; | ||
354 | } | ||
355 | |||
356 | void pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
357 | { | ||
358 | pgd_mop_up_pmds(mm, pgd); | ||
359 | pgd_dtor(pgd); | ||
360 | free_page((unsigned long)pgd); | ||
361 | } | ||
362 | |||
363 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | ||
364 | { | ||
365 | pgtable_page_dtor(pte); | ||
366 | paravirt_release_pt(page_to_pfn(pte)); | ||
367 | tlb_remove_page(tlb, pte); | ||
368 | } | ||
369 | |||
370 | #ifdef CONFIG_X86_PAE | ||
371 | |||
372 | void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | ||
373 | { | ||
374 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
375 | tlb_remove_page(tlb, virt_to_page(pmd)); | ||
376 | } | ||
377 | |||
378 | #endif | ||
379 | |||
380 | int pmd_bad(pmd_t pmd) | 176 | int pmd_bad(pmd_t pmd) |
381 | { | 177 | { |
382 | WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); | 178 | WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 4d5f2649bee4..2e641be2737e 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -6,7 +6,7 @@ config XEN | |||
6 | bool "Xen guest support" | 6 | bool "Xen guest support" |
7 | select PARAVIRT | 7 | select PARAVIRT |
8 | depends on X86_32 | 8 | depends on X86_32 |
9 | depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER) | 9 | depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) |
10 | help | 10 | help |
11 | This is the Linux Xen port. Enabling this will allow the | 11 | This is the Linux Xen port. Enabling this will allow the |
12 | kernel to boot in a paravirtualized environment under the | 12 | kernel to boot in a paravirtualized environment under the |
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 343df246bd3e..3d8df981d5fd 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ | 1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ |
2 | events.o time.o manage.o xen-asm.o | 2 | time.o manage.o xen-asm.o grant-table.o |
3 | 3 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 4 | obj-$(CONFIG_SMP) += smp.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0388220cf97..c8a56e457d61 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -155,7 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
155 | if (*ax == 1) | 155 | if (*ax == 1) |
156 | maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ | 156 | maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ |
157 | (1 << X86_FEATURE_ACPI) | /* disable ACPI */ | 157 | (1 << X86_FEATURE_ACPI) | /* disable ACPI */ |
158 | (1 << X86_FEATURE_SEP) | /* disable SEP */ | 158 | (1 << X86_FEATURE_MCE) | /* disable MCE */ |
159 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
159 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 160 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
160 | 161 | ||
161 | asm(XEN_EMULATE_PREFIX "cpuid" | 162 | asm(XEN_EMULATE_PREFIX "cpuid" |
@@ -531,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val) | |||
531 | static void xen_flush_tlb(void) | 532 | static void xen_flush_tlb(void) |
532 | { | 533 | { |
533 | struct mmuext_op *op; | 534 | struct mmuext_op *op; |
534 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | 535 | struct multicall_space mcs; |
536 | |||
537 | preempt_disable(); | ||
538 | |||
539 | mcs = xen_mc_entry(sizeof(*op)); | ||
535 | 540 | ||
536 | op = mcs.args; | 541 | op = mcs.args; |
537 | op->cmd = MMUEXT_TLB_FLUSH_LOCAL; | 542 | op->cmd = MMUEXT_TLB_FLUSH_LOCAL; |
538 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 543 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
539 | 544 | ||
540 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 545 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
546 | |||
547 | preempt_enable(); | ||
541 | } | 548 | } |
542 | 549 | ||
543 | static void xen_flush_tlb_single(unsigned long addr) | 550 | static void xen_flush_tlb_single(unsigned long addr) |
544 | { | 551 | { |
545 | struct mmuext_op *op; | 552 | struct mmuext_op *op; |
546 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | 553 | struct multicall_space mcs; |
554 | |||
555 | preempt_disable(); | ||
547 | 556 | ||
557 | mcs = xen_mc_entry(sizeof(*op)); | ||
548 | op = mcs.args; | 558 | op = mcs.args; |
549 | op->cmd = MMUEXT_INVLPG_LOCAL; | 559 | op->cmd = MMUEXT_INVLPG_LOCAL; |
550 | op->arg1.linear_addr = addr & PAGE_MASK; | 560 | op->arg1.linear_addr = addr & PAGE_MASK; |
551 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 561 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
552 | 562 | ||
553 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 563 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
564 | |||
565 | preempt_enable(); | ||
554 | } | 566 | } |
555 | 567 | ||
556 | static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | 568 | static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, |
@@ -655,15 +667,17 @@ static void xen_write_cr3(unsigned long cr3) | |||
655 | 667 | ||
656 | /* Early in boot, while setting up the initial pagetable, assume | 668 | /* Early in boot, while setting up the initial pagetable, assume |
657 | everything is pinned. */ | 669 | everything is pinned. */ |
658 | static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) | 670 | static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) |
659 | { | 671 | { |
672 | #ifdef CONFIG_FLATMEM | ||
660 | BUG_ON(mem_map); /* should only be used early */ | 673 | BUG_ON(mem_map); /* should only be used early */ |
674 | #endif | ||
661 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 675 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); |
662 | } | 676 | } |
663 | 677 | ||
664 | /* Early release_pt assumes that all pts are pinned, since there's | 678 | /* Early release_pte assumes that all pts are pinned, since there's |
665 | only init_mm and anything attached to that is pinned. */ | 679 | only init_mm and anything attached to that is pinned. */ |
666 | static void xen_release_pt_init(u32 pfn) | 680 | static void xen_release_pte_init(u32 pfn) |
667 | { | 681 | { |
668 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 682 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
669 | } | 683 | } |
@@ -697,12 +711,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) | |||
697 | } | 711 | } |
698 | } | 712 | } |
699 | 713 | ||
700 | static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) | 714 | static void xen_alloc_pte(struct mm_struct *mm, u32 pfn) |
701 | { | 715 | { |
702 | xen_alloc_ptpage(mm, pfn, PT_PTE); | 716 | xen_alloc_ptpage(mm, pfn, PT_PTE); |
703 | } | 717 | } |
704 | 718 | ||
705 | static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) | 719 | static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) |
706 | { | 720 | { |
707 | xen_alloc_ptpage(mm, pfn, PT_PMD); | 721 | xen_alloc_ptpage(mm, pfn, PT_PMD); |
708 | } | 722 | } |
@@ -722,12 +736,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level) | |||
722 | } | 736 | } |
723 | } | 737 | } |
724 | 738 | ||
725 | static void xen_release_pt(u32 pfn) | 739 | static void xen_release_pte(u32 pfn) |
726 | { | 740 | { |
727 | xen_release_ptpage(pfn, PT_PTE); | 741 | xen_release_ptpage(pfn, PT_PTE); |
728 | } | 742 | } |
729 | 743 | ||
730 | static void xen_release_pd(u32 pfn) | 744 | static void xen_release_pmd(u32 pfn) |
731 | { | 745 | { |
732 | xen_release_ptpage(pfn, PT_PMD); | 746 | xen_release_ptpage(pfn, PT_PMD); |
733 | } | 747 | } |
@@ -849,10 +863,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
849 | { | 863 | { |
850 | /* This will work as long as patching hasn't happened yet | 864 | /* This will work as long as patching hasn't happened yet |
851 | (which it hasn't) */ | 865 | (which it hasn't) */ |
852 | pv_mmu_ops.alloc_pt = xen_alloc_pt; | 866 | pv_mmu_ops.alloc_pte = xen_alloc_pte; |
853 | pv_mmu_ops.alloc_pd = xen_alloc_pd; | 867 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; |
854 | pv_mmu_ops.release_pt = xen_release_pt; | 868 | pv_mmu_ops.release_pte = xen_release_pte; |
855 | pv_mmu_ops.release_pd = xen_release_pd; | 869 | pv_mmu_ops.release_pmd = xen_release_pmd; |
856 | pv_mmu_ops.set_pte = xen_set_pte; | 870 | pv_mmu_ops.set_pte = xen_set_pte; |
857 | 871 | ||
858 | setup_shared_info(); | 872 | setup_shared_info(); |
@@ -994,7 +1008,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
994 | .read_pmc = native_read_pmc, | 1008 | .read_pmc = native_read_pmc, |
995 | 1009 | ||
996 | .iret = xen_iret, | 1010 | .iret = xen_iret, |
997 | .irq_enable_syscall_ret = NULL, /* never called */ | 1011 | .irq_enable_syscall_ret = xen_sysexit, |
998 | 1012 | ||
999 | .load_tr_desc = paravirt_nop, | 1013 | .load_tr_desc = paravirt_nop, |
1000 | .set_ldt = xen_set_ldt, | 1014 | .set_ldt = xen_set_ldt, |
@@ -1059,11 +1073,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1059 | .pte_update = paravirt_nop, | 1073 | .pte_update = paravirt_nop, |
1060 | .pte_update_defer = paravirt_nop, | 1074 | .pte_update_defer = paravirt_nop, |
1061 | 1075 | ||
1062 | .alloc_pt = xen_alloc_pt_init, | 1076 | .alloc_pte = xen_alloc_pte_init, |
1063 | .release_pt = xen_release_pt_init, | 1077 | .release_pte = xen_release_pte_init, |
1064 | .alloc_pd = xen_alloc_pt_init, | 1078 | .alloc_pmd = xen_alloc_pte_init, |
1065 | .alloc_pd_clone = paravirt_nop, | 1079 | .alloc_pmd_clone = paravirt_nop, |
1066 | .release_pd = xen_release_pt_init, | 1080 | .release_pmd = xen_release_pte_init, |
1067 | 1081 | ||
1068 | #ifdef CONFIG_HIGHPTE | 1082 | #ifdef CONFIG_HIGHPTE |
1069 | .kmap_atomic_pte = xen_kmap_atomic_pte, | 1083 | .kmap_atomic_pte = xen_kmap_atomic_pte, |
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c new file mode 100644 index 000000000000..49ba9b5224d1 --- /dev/null +++ b/arch/x86/xen/grant-table.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /****************************************************************************** | ||
2 | * grant_table.c | ||
3 | * x86 specific part | ||
4 | * | ||
5 | * Granting foreign access to our memory reservation. | ||
6 | * | ||
7 | * Copyright (c) 2005-2006, Christopher Clark | ||
8 | * Copyright (c) 2004-2005, K A Fraser | ||
9 | * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> | ||
10 | * VA Linux Systems Japan. Split out x86 specific part. | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License version 2 | ||
14 | * as published by the Free Software Foundation; or, when distributed | ||
15 | * separately from the Linux kernel or incorporated into other | ||
16 | * software packages, subject to the following license: | ||
17 | * | ||
18 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
19 | * of this source file (the "Software"), to deal in the Software without | ||
20 | * restriction, including without limitation the rights to use, copy, modify, | ||
21 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
22 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
23 | * the following conditions: | ||
24 | * | ||
25 | * The above copyright notice and this permission notice shall be included in | ||
26 | * all copies or substantial portions of the Software. | ||
27 | * | ||
28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
30 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
31 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
32 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
33 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
34 | * IN THE SOFTWARE. | ||
35 | */ | ||
36 | |||
37 | #include <linux/sched.h> | ||
38 | #include <linux/mm.h> | ||
39 | #include <linux/vmalloc.h> | ||
40 | |||
41 | #include <xen/interface/xen.h> | ||
42 | #include <xen/page.h> | ||
43 | #include <xen/grant_table.h> | ||
44 | |||
45 | #include <asm/pgtable.h> | ||
46 | |||
47 | static int map_pte_fn(pte_t *pte, struct page *pmd_page, | ||
48 | unsigned long addr, void *data) | ||
49 | { | ||
50 | unsigned long **frames = (unsigned long **)data; | ||
51 | |||
52 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | ||
53 | (*frames)++; | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | ||
58 | unsigned long addr, void *data) | ||
59 | { | ||
60 | |||
61 | set_pte_at(&init_mm, addr, pte, __pte(0)); | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | ||
66 | unsigned long max_nr_gframes, | ||
67 | struct grant_entry **__shared) | ||
68 | { | ||
69 | int rc; | ||
70 | struct grant_entry *shared = *__shared; | ||
71 | |||
72 | if (shared == NULL) { | ||
73 | struct vm_struct *area = | ||
74 | xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes); | ||
75 | BUG_ON(area == NULL); | ||
76 | shared = area->addr; | ||
77 | *__shared = shared; | ||
78 | } | ||
79 | |||
80 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | ||
81 | PAGE_SIZE * nr_gframes, | ||
82 | map_pte_fn, &frames); | ||
83 | return rc; | ||
84 | } | ||
85 | |||
86 | void arch_gnttab_unmap_shared(struct grant_entry *shared, | ||
87 | unsigned long nr_gframes) | ||
88 | { | ||
89 | apply_to_page_range(&init_mm, (unsigned long)shared, | ||
90 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | ||
91 | } | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2a054ef2a3da..6cbcf65609ad 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | |||
156 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 156 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
157 | pte_t *ptep, pte_t pteval) | 157 | pte_t *ptep, pte_t pteval) |
158 | { | 158 | { |
159 | /* updates to init_mm may be done without lock */ | ||
160 | if (mm == &init_mm) | ||
161 | preempt_disable(); | ||
162 | |||
159 | if (mm == current->mm || mm == &init_mm) { | 163 | if (mm == current->mm || mm == &init_mm) { |
160 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 164 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { |
161 | struct multicall_space mcs; | 165 | struct multicall_space mcs; |
@@ -163,14 +167,61 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
163 | 167 | ||
164 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); | 168 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); |
165 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 169 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
166 | return; | 170 | goto out; |
167 | } else | 171 | } else |
168 | if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) | 172 | if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) |
169 | return; | 173 | goto out; |
170 | } | 174 | } |
171 | xen_set_pte(ptep, pteval); | 175 | xen_set_pte(ptep, pteval); |
176 | |||
177 | out: | ||
178 | if (mm == &init_mm) | ||
179 | preempt_enable(); | ||
180 | } | ||
181 | |||
182 | pteval_t xen_pte_val(pte_t pte) | ||
183 | { | ||
184 | pteval_t ret = pte.pte; | ||
185 | |||
186 | if (ret & _PAGE_PRESENT) | ||
187 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
188 | |||
189 | return ret; | ||
190 | } | ||
191 | |||
192 | pgdval_t xen_pgd_val(pgd_t pgd) | ||
193 | { | ||
194 | pgdval_t ret = pgd.pgd; | ||
195 | if (ret & _PAGE_PRESENT) | ||
196 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
197 | return ret; | ||
198 | } | ||
199 | |||
200 | pte_t xen_make_pte(pteval_t pte) | ||
201 | { | ||
202 | if (pte & _PAGE_PRESENT) { | ||
203 | pte = phys_to_machine(XPADDR(pte)).maddr; | ||
204 | pte &= ~(_PAGE_PCD | _PAGE_PWT); | ||
205 | } | ||
206 | |||
207 | return (pte_t){ .pte = pte }; | ||
172 | } | 208 | } |
173 | 209 | ||
210 | pgd_t xen_make_pgd(pgdval_t pgd) | ||
211 | { | ||
212 | if (pgd & _PAGE_PRESENT) | ||
213 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | ||
214 | |||
215 | return (pgd_t){ pgd }; | ||
216 | } | ||
217 | |||
218 | pmdval_t xen_pmd_val(pmd_t pmd) | ||
219 | { | ||
220 | pmdval_t ret = native_pmd_val(pmd); | ||
221 | if (ret & _PAGE_PRESENT) | ||
222 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
223 | return ret; | ||
224 | } | ||
174 | #ifdef CONFIG_X86_PAE | 225 | #ifdef CONFIG_X86_PAE |
175 | void xen_set_pud(pud_t *ptr, pud_t val) | 226 | void xen_set_pud(pud_t *ptr, pud_t val) |
176 | { | 227 | { |
@@ -214,100 +265,18 @@ void xen_pmd_clear(pmd_t *pmdp) | |||
214 | xen_set_pmd(pmdp, __pmd(0)); | 265 | xen_set_pmd(pmdp, __pmd(0)); |
215 | } | 266 | } |
216 | 267 | ||
217 | unsigned long long xen_pte_val(pte_t pte) | 268 | pmd_t xen_make_pmd(pmdval_t pmd) |
218 | { | 269 | { |
219 | unsigned long long ret = 0; | 270 | if (pmd & _PAGE_PRESENT) |
220 | |||
221 | if (pte.pte_low) { | ||
222 | ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low; | ||
223 | ret = machine_to_phys(XMADDR(ret)).paddr | 1; | ||
224 | } | ||
225 | |||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | unsigned long long xen_pmd_val(pmd_t pmd) | ||
230 | { | ||
231 | unsigned long long ret = pmd.pmd; | ||
232 | if (ret) | ||
233 | ret = machine_to_phys(XMADDR(ret)).paddr | 1; | ||
234 | return ret; | ||
235 | } | ||
236 | |||
237 | unsigned long long xen_pgd_val(pgd_t pgd) | ||
238 | { | ||
239 | unsigned long long ret = pgd.pgd; | ||
240 | if (ret) | ||
241 | ret = machine_to_phys(XMADDR(ret)).paddr | 1; | ||
242 | return ret; | ||
243 | } | ||
244 | |||
245 | pte_t xen_make_pte(unsigned long long pte) | ||
246 | { | ||
247 | if (pte & _PAGE_PRESENT) { | ||
248 | pte = phys_to_machine(XPADDR(pte)).maddr; | ||
249 | pte &= ~(_PAGE_PCD | _PAGE_PWT); | ||
250 | } | ||
251 | |||
252 | return (pte_t){ .pte = pte }; | ||
253 | } | ||
254 | |||
255 | pmd_t xen_make_pmd(unsigned long long pmd) | ||
256 | { | ||
257 | if (pmd & 1) | ||
258 | pmd = phys_to_machine(XPADDR(pmd)).maddr; | 271 | pmd = phys_to_machine(XPADDR(pmd)).maddr; |
259 | 272 | ||
260 | return (pmd_t){ pmd }; | 273 | return native_make_pmd(pmd); |
261 | } | ||
262 | |||
263 | pgd_t xen_make_pgd(unsigned long long pgd) | ||
264 | { | ||
265 | if (pgd & _PAGE_PRESENT) | ||
266 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | ||
267 | |||
268 | return (pgd_t){ pgd }; | ||
269 | } | 274 | } |
270 | #else /* !PAE */ | 275 | #else /* !PAE */ |
271 | void xen_set_pte(pte_t *ptep, pte_t pte) | 276 | void xen_set_pte(pte_t *ptep, pte_t pte) |
272 | { | 277 | { |
273 | *ptep = pte; | 278 | *ptep = pte; |
274 | } | 279 | } |
275 | |||
276 | unsigned long xen_pte_val(pte_t pte) | ||
277 | { | ||
278 | unsigned long ret = pte.pte_low; | ||
279 | |||
280 | if (ret & _PAGE_PRESENT) | ||
281 | ret = machine_to_phys(XMADDR(ret)).paddr; | ||
282 | |||
283 | return ret; | ||
284 | } | ||
285 | |||
286 | unsigned long xen_pgd_val(pgd_t pgd) | ||
287 | { | ||
288 | unsigned long ret = pgd.pgd; | ||
289 | if (ret) | ||
290 | ret = machine_to_phys(XMADDR(ret)).paddr | 1; | ||
291 | return ret; | ||
292 | } | ||
293 | |||
294 | pte_t xen_make_pte(unsigned long pte) | ||
295 | { | ||
296 | if (pte & _PAGE_PRESENT) { | ||
297 | pte = phys_to_machine(XPADDR(pte)).maddr; | ||
298 | pte &= ~(_PAGE_PCD | _PAGE_PWT); | ||
299 | } | ||
300 | |||
301 | return (pte_t){ pte }; | ||
302 | } | ||
303 | |||
304 | pgd_t xen_make_pgd(unsigned long pgd) | ||
305 | { | ||
306 | if (pgd & _PAGE_PRESENT) | ||
307 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | ||
308 | |||
309 | return (pgd_t){ pgd }; | ||
310 | } | ||
311 | #endif /* CONFIG_X86_PAE */ | 280 | #endif /* CONFIG_X86_PAE */ |
312 | 281 | ||
313 | /* | 282 | /* |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 2341492bf7a0..82517e4a752a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/xen/hypervisor.h> | 16 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 17 | #include <asm/xen/hypercall.h> |
18 | 18 | ||
19 | #include <xen/interface/callback.h> | ||
19 | #include <xen/interface/physdev.h> | 20 | #include <xen/interface/physdev.h> |
20 | #include <xen/features.h> | 21 | #include <xen/features.h> |
21 | 22 | ||
@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void) | |||
68 | *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; | 69 | *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; |
69 | } | 70 | } |
70 | 71 | ||
72 | void xen_enable_sysenter(void) | ||
73 | { | ||
74 | int cpu = smp_processor_id(); | ||
75 | extern void xen_sysenter_target(void); | ||
76 | /* Mask events on entry, even though they get enabled immediately */ | ||
77 | static struct callback_register sysenter = { | ||
78 | .type = CALLBACKTYPE_sysenter, | ||
79 | .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target }, | ||
80 | .flags = CALLBACKF_mask_events, | ||
81 | }; | ||
82 | |||
83 | if (!boot_cpu_has(X86_FEATURE_SEP) || | ||
84 | HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) { | ||
85 | clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP); | ||
86 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); | ||
87 | } | ||
88 | } | ||
89 | |||
71 | void __init xen_arch_setup(void) | 90 | void __init xen_arch_setup(void) |
72 | { | 91 | { |
73 | struct physdev_set_iopl set_iopl; | 92 | struct physdev_set_iopl set_iopl; |
@@ -82,6 +101,8 @@ void __init xen_arch_setup(void) | |||
82 | HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, | 101 | HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, |
83 | __KERNEL_CS, (unsigned long)xen_failsafe_callback); | 102 | __KERNEL_CS, (unsigned long)xen_failsafe_callback); |
84 | 103 | ||
104 | xen_enable_sysenter(); | ||
105 | |||
85 | set_iopl.iopl = 1; | 106 | set_iopl.iopl = 1; |
86 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | 107 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); |
87 | if (rc != 0) | 108 | if (rc != 0) |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index e340ff92f6b6..92dd3dbf3ffb 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -36,8 +36,9 @@ | |||
36 | #include "mmu.h" | 36 | #include "mmu.h" |
37 | 37 | ||
38 | static cpumask_t xen_cpu_initialized_map; | 38 | static cpumask_t xen_cpu_initialized_map; |
39 | static DEFINE_PER_CPU(int, resched_irq); | 39 | static DEFINE_PER_CPU(int, resched_irq) = -1; |
40 | static DEFINE_PER_CPU(int, callfunc_irq); | 40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; |
41 | static DEFINE_PER_CPU(int, debug_irq) = -1; | ||
41 | 42 | ||
42 | /* | 43 | /* |
43 | * Structure and data for smp_call_function(). This is designed to minimise | 44 | * Structure and data for smp_call_function(). This is designed to minimise |
@@ -72,6 +73,7 @@ static __cpuinit void cpu_bringup_and_idle(void) | |||
72 | int cpu = smp_processor_id(); | 73 | int cpu = smp_processor_id(); |
73 | 74 | ||
74 | cpu_init(); | 75 | cpu_init(); |
76 | xen_enable_sysenter(); | ||
75 | 77 | ||
76 | preempt_disable(); | 78 | preempt_disable(); |
77 | per_cpu(cpu_state, cpu) = CPU_ONLINE; | 79 | per_cpu(cpu_state, cpu) = CPU_ONLINE; |
@@ -88,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void) | |||
88 | static int xen_smp_intr_init(unsigned int cpu) | 90 | static int xen_smp_intr_init(unsigned int cpu) |
89 | { | 91 | { |
90 | int rc; | 92 | int rc; |
91 | const char *resched_name, *callfunc_name; | 93 | const char *resched_name, *callfunc_name, *debug_name; |
92 | |||
93 | per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; | ||
94 | 94 | ||
95 | resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); | 95 | resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); |
96 | rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, | 96 | rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, |
@@ -114,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
114 | goto fail; | 114 | goto fail; |
115 | per_cpu(callfunc_irq, cpu) = rc; | 115 | per_cpu(callfunc_irq, cpu) = rc; |
116 | 116 | ||
117 | debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); | ||
118 | rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, | ||
119 | IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING, | ||
120 | debug_name, NULL); | ||
121 | if (rc < 0) | ||
122 | goto fail; | ||
123 | per_cpu(debug_irq, cpu) = rc; | ||
124 | |||
117 | return 0; | 125 | return 0; |
118 | 126 | ||
119 | fail: | 127 | fail: |
@@ -121,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
121 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | 129 | unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); |
122 | if (per_cpu(callfunc_irq, cpu) >= 0) | 130 | if (per_cpu(callfunc_irq, cpu) >= 0) |
123 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | 131 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); |
132 | if (per_cpu(debug_irq, cpu) >= 0) | ||
133 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); | ||
124 | return rc; | 134 | return rc; |
125 | } | 135 | } |
126 | 136 | ||
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index fe161ed4b01e..2497a30f41de 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S | |||
@@ -108,6 +108,20 @@ ENDPATCH(xen_restore_fl_direct) | |||
108 | RELOC(xen_restore_fl_direct, 2b+1) | 108 | RELOC(xen_restore_fl_direct, 2b+1) |
109 | 109 | ||
110 | /* | 110 | /* |
111 | We can't use sysexit directly, because we're not running in ring0. | ||
112 | But we can easily fake it up using iret. Assuming xen_sysexit | ||
113 | is jumped to with a standard stack frame, we can just strip it | ||
114 | back to a standard iret frame and use iret. | ||
115 | */ | ||
116 | ENTRY(xen_sysexit) | ||
117 | movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ | ||
118 | orl $X86_EFLAGS_IF, PT_EFLAGS(%esp) | ||
119 | lea PT_EIP(%esp), %esp | ||
120 | |||
121 | jmp xen_iret | ||
122 | ENDPROC(xen_sysexit) | ||
123 | |||
124 | /* | ||
111 | This is run where a normal iret would be run, with the same stack setup: | 125 | This is run where a normal iret would be run, with the same stack setup: |
112 | 8: eflags | 126 | 8: eflags |
113 | 4: cs | 127 | 4: cs |
@@ -184,8 +198,12 @@ iret_restore_end: | |||
184 | region is OK. */ | 198 | region is OK. */ |
185 | je xen_hypervisor_callback | 199 | je xen_hypervisor_callback |
186 | 200 | ||
187 | iret | 201 | 1: iret |
188 | xen_iret_end_crit: | 202 | xen_iret_end_crit: |
203 | .section __ex_table,"a" | ||
204 | .align 4 | ||
205 | .long 1b,iret_exc | ||
206 | .previous | ||
189 | 207 | ||
190 | hyper_iret: | 208 | hyper_iret: |
191 | /* put this out of line since its very rarely used */ | 209 | /* put this out of line since its very rarely used */ |
@@ -219,9 +237,7 @@ hyper_iret: | |||
219 | ds } SAVE_ALL state | 237 | ds } SAVE_ALL state |
220 | eax } | 238 | eax } |
221 | : : | 239 | : : |
222 | ebx } | 240 | ebx }<- esp |
223 | ---------------- | ||
224 | return addr <- esp | ||
225 | ---------------- | 241 | ---------------- |
226 | 242 | ||
227 | In order to deliver the nested exception properly, we need to shift | 243 | In order to deliver the nested exception properly, we need to shift |
@@ -236,10 +252,8 @@ hyper_iret: | |||
236 | it's usermode state which we eventually need to restore. | 252 | it's usermode state which we eventually need to restore. |
237 | */ | 253 | */ |
238 | ENTRY(xen_iret_crit_fixup) | 254 | ENTRY(xen_iret_crit_fixup) |
239 | /* offsets +4 for return address */ | ||
240 | |||
241 | /* | 255 | /* |
242 | Paranoia: Make sure we're really coming from userspace. | 256 | Paranoia: Make sure we're really coming from kernel space. |
243 | One could imagine a case where userspace jumps into the | 257 | One could imagine a case where userspace jumps into the |
244 | critical range address, but just before the CPU delivers a GP, | 258 | critical range address, but just before the CPU delivers a GP, |
245 | it decides to deliver an interrupt instead. Unlikely? | 259 | it decides to deliver an interrupt instead. Unlikely? |
@@ -248,32 +262,32 @@ ENTRY(xen_iret_crit_fixup) | |||
248 | jump instruction itself, not the destination, but some virtual | 262 | jump instruction itself, not the destination, but some virtual |
249 | environments get this wrong. | 263 | environments get this wrong. |
250 | */ | 264 | */ |
251 | movl PT_CS+4(%esp), %ecx | 265 | movl PT_CS(%esp), %ecx |
252 | andl $SEGMENT_RPL_MASK, %ecx | 266 | andl $SEGMENT_RPL_MASK, %ecx |
253 | cmpl $USER_RPL, %ecx | 267 | cmpl $USER_RPL, %ecx |
254 | je 2f | 268 | je 2f |
255 | 269 | ||
256 | lea PT_ORIG_EAX+4(%esp), %esi | 270 | lea PT_ORIG_EAX(%esp), %esi |
257 | lea PT_EFLAGS+4(%esp), %edi | 271 | lea PT_EFLAGS(%esp), %edi |
258 | 272 | ||
259 | /* If eip is before iret_restore_end then stack | 273 | /* If eip is before iret_restore_end then stack |
260 | hasn't been restored yet. */ | 274 | hasn't been restored yet. */ |
261 | cmp $iret_restore_end, %eax | 275 | cmp $iret_restore_end, %eax |
262 | jae 1f | 276 | jae 1f |
263 | 277 | ||
264 | movl 0+4(%edi),%eax /* copy EAX */ | 278 | movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ |
265 | movl %eax, PT_EAX+4(%esp) | 279 | movl %eax, PT_EAX(%esp) |
266 | 280 | ||
267 | lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ | 281 | lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ |
268 | 282 | ||
269 | /* set up the copy */ | 283 | /* set up the copy */ |
270 | 1: std | 284 | 1: std |
271 | mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */ | 285 | mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */ |
272 | rep movsl | 286 | rep movsl |
273 | cld | 287 | cld |
274 | 288 | ||
275 | lea 4(%edi),%esp /* point esp to new frame */ | 289 | lea 4(%edi),%esp /* point esp to new frame */ |
276 | 2: ret | 290 | 2: jmp xen_do_upcall |
277 | 291 | ||
278 | 292 | ||
279 | /* | 293 | /* |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 956a491ea998..f1063ae08037 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #define XEN_OPS_H | 2 | #define XEN_OPS_H |
3 | 3 | ||
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/irqreturn.h> | ||
6 | #include <xen/xen-ops.h> | ||
5 | 7 | ||
6 | /* These are code, but not functions. Defined in entry.S */ | 8 | /* These are code, but not functions. Defined in entry.S */ |
7 | extern const char xen_hypervisor_callback[]; | 9 | extern const char xen_hypervisor_callback[]; |
@@ -9,7 +11,6 @@ extern const char xen_failsafe_callback[]; | |||
9 | 11 | ||
10 | void xen_copy_trap_info(struct trap_info *traps); | 12 | void xen_copy_trap_info(struct trap_info *traps); |
11 | 13 | ||
12 | DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); | ||
13 | DECLARE_PER_CPU(unsigned long, xen_cr3); | 14 | DECLARE_PER_CPU(unsigned long, xen_cr3); |
14 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); | 15 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); |
15 | 16 | ||
@@ -19,6 +20,7 @@ extern struct shared_info *HYPERVISOR_shared_info; | |||
19 | char * __init xen_memory_setup(void); | 20 | char * __init xen_memory_setup(void); |
20 | void __init xen_arch_setup(void); | 21 | void __init xen_arch_setup(void); |
21 | void __init xen_init_IRQ(void); | 22 | void __init xen_init_IRQ(void); |
23 | void xen_enable_sysenter(void); | ||
22 | 24 | ||
23 | void xen_setup_timer(int cpu); | 25 | void xen_setup_timer(int cpu); |
24 | void xen_setup_cpu_clockevents(void); | 26 | void xen_setup_cpu_clockevents(void); |
@@ -28,6 +30,8 @@ unsigned long xen_get_wallclock(void); | |||
28 | int xen_set_wallclock(unsigned long time); | 30 | int xen_set_wallclock(unsigned long time); |
29 | unsigned long long xen_sched_clock(void); | 31 | unsigned long long xen_sched_clock(void); |
30 | 32 | ||
33 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); | ||
34 | |||
31 | bool xen_vcpu_stolen(int vcpu); | 35 | bool xen_vcpu_stolen(int vcpu); |
32 | 36 | ||
33 | void xen_mark_init_mm_pinned(void); | 37 | void xen_mark_init_mm_pinned(void); |
@@ -64,4 +68,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void); | |||
64 | DECL_ASM(void, xen_restore_fl_direct, unsigned long); | 68 | DECL_ASM(void, xen_restore_fl_direct, unsigned long); |
65 | 69 | ||
66 | void xen_iret(void); | 70 | void xen_iret(void); |
71 | void xen_sysexit(void); | ||
72 | |||
67 | #endif /* XEN_OPS_H */ | 73 | #endif /* XEN_OPS_H */ |
diff --git a/drivers/Kconfig b/drivers/Kconfig index 3a0e3549739f..80f0ec91e2cf 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig | |||
@@ -97,4 +97,6 @@ source "drivers/dca/Kconfig" | |||
97 | source "drivers/auxdisplay/Kconfig" | 97 | source "drivers/auxdisplay/Kconfig" |
98 | 98 | ||
99 | source "drivers/uio/Kconfig" | 99 | source "drivers/uio/Kconfig" |
100 | |||
101 | source "drivers/xen/Kconfig" | ||
100 | endmenu | 102 | endmenu |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9c6f3f99208d..d771da816d95 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -47,6 +47,7 @@ | |||
47 | 47 | ||
48 | #include <xen/interface/grant_table.h> | 48 | #include <xen/interface/grant_table.h> |
49 | #include <xen/interface/io/blkif.h> | 49 | #include <xen/interface/io/blkif.h> |
50 | #include <xen/interface/io/protocols.h> | ||
50 | 51 | ||
51 | #include <asm/xen/hypervisor.h> | 52 | #include <asm/xen/hypervisor.h> |
52 | 53 | ||
@@ -74,7 +75,6 @@ static struct block_device_operations xlvbd_block_fops; | |||
74 | struct blkfront_info | 75 | struct blkfront_info |
75 | { | 76 | { |
76 | struct xenbus_device *xbdev; | 77 | struct xenbus_device *xbdev; |
77 | dev_t dev; | ||
78 | struct gendisk *gd; | 78 | struct gendisk *gd; |
79 | int vdevice; | 79 | int vdevice; |
80 | blkif_vdev_t handle; | 80 | blkif_vdev_t handle; |
@@ -88,6 +88,7 @@ struct blkfront_info | |||
88 | struct blk_shadow shadow[BLK_RING_SIZE]; | 88 | struct blk_shadow shadow[BLK_RING_SIZE]; |
89 | unsigned long shadow_free; | 89 | unsigned long shadow_free; |
90 | int feature_barrier; | 90 | int feature_barrier; |
91 | int is_ready; | ||
91 | 92 | ||
92 | /** | 93 | /** |
93 | * The number of people holding this device open. We won't allow a | 94 | * The number of people holding this device open. We won't allow a |
@@ -614,6 +615,12 @@ again: | |||
614 | message = "writing event-channel"; | 615 | message = "writing event-channel"; |
615 | goto abort_transaction; | 616 | goto abort_transaction; |
616 | } | 617 | } |
618 | err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", | ||
619 | XEN_IO_PROTO_ABI_NATIVE); | ||
620 | if (err) { | ||
621 | message = "writing protocol"; | ||
622 | goto abort_transaction; | ||
623 | } | ||
617 | 624 | ||
618 | err = xenbus_transaction_end(xbt, 0); | 625 | err = xenbus_transaction_end(xbt, 0); |
619 | if (err) { | 626 | if (err) { |
@@ -833,6 +840,8 @@ static void blkfront_connect(struct blkfront_info *info) | |||
833 | spin_unlock_irq(&blkif_io_lock); | 840 | spin_unlock_irq(&blkif_io_lock); |
834 | 841 | ||
835 | add_disk(info->gd); | 842 | add_disk(info->gd); |
843 | |||
844 | info->is_ready = 1; | ||
836 | } | 845 | } |
837 | 846 | ||
838 | /** | 847 | /** |
@@ -896,7 +905,7 @@ static void backend_changed(struct xenbus_device *dev, | |||
896 | break; | 905 | break; |
897 | 906 | ||
898 | case XenbusStateClosing: | 907 | case XenbusStateClosing: |
899 | bd = bdget(info->dev); | 908 | bd = bdget_disk(info->gd, 0); |
900 | if (bd == NULL) | 909 | if (bd == NULL) |
901 | xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); | 910 | xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); |
902 | 911 | ||
@@ -925,6 +934,13 @@ static int blkfront_remove(struct xenbus_device *dev) | |||
925 | return 0; | 934 | return 0; |
926 | } | 935 | } |
927 | 936 | ||
937 | static int blkfront_is_ready(struct xenbus_device *dev) | ||
938 | { | ||
939 | struct blkfront_info *info = dev->dev.driver_data; | ||
940 | |||
941 | return info->is_ready; | ||
942 | } | ||
943 | |||
928 | static int blkif_open(struct inode *inode, struct file *filep) | 944 | static int blkif_open(struct inode *inode, struct file *filep) |
929 | { | 945 | { |
930 | struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; | 946 | struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; |
@@ -971,6 +987,7 @@ static struct xenbus_driver blkfront = { | |||
971 | .remove = blkfront_remove, | 987 | .remove = blkfront_remove, |
972 | .resume = blkfront_resume, | 988 | .resume = blkfront_resume, |
973 | .otherend_changed = backend_changed, | 989 | .otherend_changed = backend_changed, |
990 | .is_ready = blkfront_is_ready, | ||
974 | }; | 991 | }; |
975 | 992 | ||
976 | static int __init xlblk_init(void) | 993 | static int __init xlblk_init(void) |
@@ -998,3 +1015,5 @@ module_exit(xlblk_exit); | |||
998 | MODULE_DESCRIPTION("Xen virtual block device frontend"); | 1015 | MODULE_DESCRIPTION("Xen virtual block device frontend"); |
999 | MODULE_LICENSE("GPL"); | 1016 | MODULE_LICENSE("GPL"); |
1000 | MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); | 1017 | MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); |
1018 | MODULE_ALIAS("xen:vbd"); | ||
1019 | MODULE_ALIAS("xenblk"); | ||
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 9dea14db724c..5f9d860925a1 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig | |||
@@ -149,6 +149,15 @@ config INPUT_APMPOWER | |||
149 | To compile this driver as a module, choose M here: the | 149 | To compile this driver as a module, choose M here: the |
150 | module will be called apm-power. | 150 | module will be called apm-power. |
151 | 151 | ||
152 | config XEN_KBDDEV_FRONTEND | ||
153 | tristate "Xen virtual keyboard and mouse support" | ||
154 | depends on XEN_FBDEV_FRONTEND | ||
155 | default y | ||
156 | help | ||
157 | This driver implements the front-end of the Xen virtual | ||
158 | keyboard and mouse device driver. It communicates with a back-end | ||
159 | in another domain. | ||
160 | |||
152 | comment "Input Device Drivers" | 161 | comment "Input Device Drivers" |
153 | 162 | ||
154 | source "drivers/input/keyboard/Kconfig" | 163 | source "drivers/input/keyboard/Kconfig" |
diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 2ae87b19caa8..98c4f9a77876 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile | |||
@@ -23,3 +23,5 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN) += touchscreen/ | |||
23 | obj-$(CONFIG_INPUT_MISC) += misc/ | 23 | obj-$(CONFIG_INPUT_MISC) += misc/ |
24 | 24 | ||
25 | obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o | 25 | obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o |
26 | |||
27 | obj-$(CONFIG_XEN_KBDDEV_FRONTEND) += xen-kbdfront.o | ||
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c new file mode 100644 index 000000000000..0f47f4697cdf --- /dev/null +++ b/drivers/input/xen-kbdfront.c | |||
@@ -0,0 +1,340 @@ | |||
1 | /* | ||
2 | * Xen para-virtual input device | ||
3 | * | ||
4 | * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> | ||
5 | * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> | ||
6 | * | ||
7 | * Based on linux/drivers/input/mouse/sermouse.c | ||
8 | * | ||
9 | * This file is subject to the terms and conditions of the GNU General Public | ||
10 | * License. See the file COPYING in the main directory of this archive for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | /* | ||
15 | * TODO: | ||
16 | * | ||
17 | * Switch to grant tables together with xen-fbfront.c. | ||
18 | */ | ||
19 | |||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/errno.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/input.h> | ||
24 | #include <asm/xen/hypervisor.h> | ||
25 | #include <xen/events.h> | ||
26 | #include <xen/page.h> | ||
27 | #include <xen/interface/io/fbif.h> | ||
28 | #include <xen/interface/io/kbdif.h> | ||
29 | #include <xen/xenbus.h> | ||
30 | |||
31 | struct xenkbd_info { | ||
32 | struct input_dev *kbd; | ||
33 | struct input_dev *ptr; | ||
34 | struct xenkbd_page *page; | ||
35 | int irq; | ||
36 | struct xenbus_device *xbdev; | ||
37 | char phys[32]; | ||
38 | }; | ||
39 | |||
40 | static int xenkbd_remove(struct xenbus_device *); | ||
41 | static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *); | ||
42 | static void xenkbd_disconnect_backend(struct xenkbd_info *); | ||
43 | |||
44 | /* | ||
45 | * Note: if you need to send out events, see xenfb_do_update() for how | ||
46 | * to do that. | ||
47 | */ | ||
48 | |||
49 | static irqreturn_t input_handler(int rq, void *dev_id) | ||
50 | { | ||
51 | struct xenkbd_info *info = dev_id; | ||
52 | struct xenkbd_page *page = info->page; | ||
53 | __u32 cons, prod; | ||
54 | |||
55 | prod = page->in_prod; | ||
56 | if (prod == page->in_cons) | ||
57 | return IRQ_HANDLED; | ||
58 | rmb(); /* ensure we see ring contents up to prod */ | ||
59 | for (cons = page->in_cons; cons != prod; cons++) { | ||
60 | union xenkbd_in_event *event; | ||
61 | struct input_dev *dev; | ||
62 | event = &XENKBD_IN_RING_REF(page, cons); | ||
63 | |||
64 | dev = info->ptr; | ||
65 | switch (event->type) { | ||
66 | case XENKBD_TYPE_MOTION: | ||
67 | input_report_rel(dev, REL_X, event->motion.rel_x); | ||
68 | input_report_rel(dev, REL_Y, event->motion.rel_y); | ||
69 | break; | ||
70 | case XENKBD_TYPE_KEY: | ||
71 | dev = NULL; | ||
72 | if (test_bit(event->key.keycode, info->kbd->keybit)) | ||
73 | dev = info->kbd; | ||
74 | if (test_bit(event->key.keycode, info->ptr->keybit)) | ||
75 | dev = info->ptr; | ||
76 | if (dev) | ||
77 | input_report_key(dev, event->key.keycode, | ||
78 | event->key.pressed); | ||
79 | else | ||
80 | printk(KERN_WARNING | ||
81 | "xenkbd: unhandled keycode 0x%x\n", | ||
82 | event->key.keycode); | ||
83 | break; | ||
84 | case XENKBD_TYPE_POS: | ||
85 | input_report_abs(dev, ABS_X, event->pos.abs_x); | ||
86 | input_report_abs(dev, ABS_Y, event->pos.abs_y); | ||
87 | break; | ||
88 | } | ||
89 | if (dev) | ||
90 | input_sync(dev); | ||
91 | } | ||
92 | mb(); /* ensure we got ring contents */ | ||
93 | page->in_cons = cons; | ||
94 | notify_remote_via_irq(info->irq); | ||
95 | |||
96 | return IRQ_HANDLED; | ||
97 | } | ||
98 | |||
99 | static int __devinit xenkbd_probe(struct xenbus_device *dev, | ||
100 | const struct xenbus_device_id *id) | ||
101 | { | ||
102 | int ret, i; | ||
103 | struct xenkbd_info *info; | ||
104 | struct input_dev *kbd, *ptr; | ||
105 | |||
106 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
107 | if (!info) { | ||
108 | xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); | ||
109 | return -ENOMEM; | ||
110 | } | ||
111 | dev->dev.driver_data = info; | ||
112 | info->xbdev = dev; | ||
113 | info->irq = -1; | ||
114 | snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename); | ||
115 | |||
116 | info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
117 | if (!info->page) | ||
118 | goto error_nomem; | ||
119 | |||
120 | /* keyboard */ | ||
121 | kbd = input_allocate_device(); | ||
122 | if (!kbd) | ||
123 | goto error_nomem; | ||
124 | kbd->name = "Xen Virtual Keyboard"; | ||
125 | kbd->phys = info->phys; | ||
126 | kbd->id.bustype = BUS_PCI; | ||
127 | kbd->id.vendor = 0x5853; | ||
128 | kbd->id.product = 0xffff; | ||
129 | kbd->evbit[0] = BIT(EV_KEY); | ||
130 | for (i = KEY_ESC; i < KEY_UNKNOWN; i++) | ||
131 | set_bit(i, kbd->keybit); | ||
132 | for (i = KEY_OK; i < KEY_MAX; i++) | ||
133 | set_bit(i, kbd->keybit); | ||
134 | |||
135 | ret = input_register_device(kbd); | ||
136 | if (ret) { | ||
137 | input_free_device(kbd); | ||
138 | xenbus_dev_fatal(dev, ret, "input_register_device(kbd)"); | ||
139 | goto error; | ||
140 | } | ||
141 | info->kbd = kbd; | ||
142 | |||
143 | /* pointing device */ | ||
144 | ptr = input_allocate_device(); | ||
145 | if (!ptr) | ||
146 | goto error_nomem; | ||
147 | ptr->name = "Xen Virtual Pointer"; | ||
148 | ptr->phys = info->phys; | ||
149 | ptr->id.bustype = BUS_PCI; | ||
150 | ptr->id.vendor = 0x5853; | ||
151 | ptr->id.product = 0xfffe; | ||
152 | ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); | ||
153 | for (i = BTN_LEFT; i <= BTN_TASK; i++) | ||
154 | set_bit(i, ptr->keybit); | ||
155 | ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y); | ||
156 | input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); | ||
157 | input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); | ||
158 | |||
159 | ret = input_register_device(ptr); | ||
160 | if (ret) { | ||
161 | input_free_device(ptr); | ||
162 | xenbus_dev_fatal(dev, ret, "input_register_device(ptr)"); | ||
163 | goto error; | ||
164 | } | ||
165 | info->ptr = ptr; | ||
166 | |||
167 | ret = xenkbd_connect_backend(dev, info); | ||
168 | if (ret < 0) | ||
169 | goto error; | ||
170 | |||
171 | return 0; | ||
172 | |||
173 | error_nomem: | ||
174 | ret = -ENOMEM; | ||
175 | xenbus_dev_fatal(dev, ret, "allocating device memory"); | ||
176 | error: | ||
177 | xenkbd_remove(dev); | ||
178 | return ret; | ||
179 | } | ||
180 | |||
181 | static int xenkbd_resume(struct xenbus_device *dev) | ||
182 | { | ||
183 | struct xenkbd_info *info = dev->dev.driver_data; | ||
184 | |||
185 | xenkbd_disconnect_backend(info); | ||
186 | memset(info->page, 0, PAGE_SIZE); | ||
187 | return xenkbd_connect_backend(dev, info); | ||
188 | } | ||
189 | |||
190 | static int xenkbd_remove(struct xenbus_device *dev) | ||
191 | { | ||
192 | struct xenkbd_info *info = dev->dev.driver_data; | ||
193 | |||
194 | xenkbd_disconnect_backend(info); | ||
195 | if (info->kbd) | ||
196 | input_unregister_device(info->kbd); | ||
197 | if (info->ptr) | ||
198 | input_unregister_device(info->ptr); | ||
199 | free_page((unsigned long)info->page); | ||
200 | kfree(info); | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | static int xenkbd_connect_backend(struct xenbus_device *dev, | ||
205 | struct xenkbd_info *info) | ||
206 | { | ||
207 | int ret, evtchn; | ||
208 | struct xenbus_transaction xbt; | ||
209 | |||
210 | ret = xenbus_alloc_evtchn(dev, &evtchn); | ||
211 | if (ret) | ||
212 | return ret; | ||
213 | ret = bind_evtchn_to_irqhandler(evtchn, input_handler, | ||
214 | 0, dev->devicetype, info); | ||
215 | if (ret < 0) { | ||
216 | xenbus_free_evtchn(dev, evtchn); | ||
217 | xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler"); | ||
218 | return ret; | ||
219 | } | ||
220 | info->irq = ret; | ||
221 | |||
222 | again: | ||
223 | ret = xenbus_transaction_start(&xbt); | ||
224 | if (ret) { | ||
225 | xenbus_dev_fatal(dev, ret, "starting transaction"); | ||
226 | return ret; | ||
227 | } | ||
228 | ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu", | ||
229 | virt_to_mfn(info->page)); | ||
230 | if (ret) | ||
231 | goto error_xenbus; | ||
232 | ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", | ||
233 | evtchn); | ||
234 | if (ret) | ||
235 | goto error_xenbus; | ||
236 | ret = xenbus_transaction_end(xbt, 0); | ||
237 | if (ret) { | ||
238 | if (ret == -EAGAIN) | ||
239 | goto again; | ||
240 | xenbus_dev_fatal(dev, ret, "completing transaction"); | ||
241 | return ret; | ||
242 | } | ||
243 | |||
244 | xenbus_switch_state(dev, XenbusStateInitialised); | ||
245 | return 0; | ||
246 | |||
247 | error_xenbus: | ||
248 | xenbus_transaction_end(xbt, 1); | ||
249 | xenbus_dev_fatal(dev, ret, "writing xenstore"); | ||
250 | return ret; | ||
251 | } | ||
252 | |||
253 | static void xenkbd_disconnect_backend(struct xenkbd_info *info) | ||
254 | { | ||
255 | if (info->irq >= 0) | ||
256 | unbind_from_irqhandler(info->irq, info); | ||
257 | info->irq = -1; | ||
258 | } | ||
259 | |||
260 | static void xenkbd_backend_changed(struct xenbus_device *dev, | ||
261 | enum xenbus_state backend_state) | ||
262 | { | ||
263 | struct xenkbd_info *info = dev->dev.driver_data; | ||
264 | int ret, val; | ||
265 | |||
266 | switch (backend_state) { | ||
267 | case XenbusStateInitialising: | ||
268 | case XenbusStateInitialised: | ||
269 | case XenbusStateUnknown: | ||
270 | case XenbusStateClosed: | ||
271 | break; | ||
272 | |||
273 | case XenbusStateInitWait: | ||
274 | InitWait: | ||
275 | ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend, | ||
276 | "feature-abs-pointer", "%d", &val); | ||
277 | if (ret < 0) | ||
278 | val = 0; | ||
279 | if (val) { | ||
280 | ret = xenbus_printf(XBT_NIL, info->xbdev->nodename, | ||
281 | "request-abs-pointer", "1"); | ||
282 | if (ret) | ||
283 | printk(KERN_WARNING | ||
284 | "xenkbd: can't request abs-pointer"); | ||
285 | } | ||
286 | xenbus_switch_state(dev, XenbusStateConnected); | ||
287 | break; | ||
288 | |||
289 | case XenbusStateConnected: | ||
290 | /* | ||
291 | * Work around xenbus race condition: If backend goes | ||
292 | * through InitWait to Connected fast enough, we can | ||
293 | * get Connected twice here. | ||
294 | */ | ||
295 | if (dev->state != XenbusStateConnected) | ||
296 | goto InitWait; /* no InitWait seen yet, fudge it */ | ||
297 | break; | ||
298 | |||
299 | case XenbusStateClosing: | ||
300 | xenbus_frontend_closed(dev); | ||
301 | break; | ||
302 | } | ||
303 | } | ||
304 | |||
305 | static struct xenbus_device_id xenkbd_ids[] = { | ||
306 | { "vkbd" }, | ||
307 | { "" } | ||
308 | }; | ||
309 | |||
310 | static struct xenbus_driver xenkbd = { | ||
311 | .name = "vkbd", | ||
312 | .owner = THIS_MODULE, | ||
313 | .ids = xenkbd_ids, | ||
314 | .probe = xenkbd_probe, | ||
315 | .remove = xenkbd_remove, | ||
316 | .resume = xenkbd_resume, | ||
317 | .otherend_changed = xenkbd_backend_changed, | ||
318 | }; | ||
319 | |||
320 | static int __init xenkbd_init(void) | ||
321 | { | ||
322 | if (!is_running_on_xen()) | ||
323 | return -ENODEV; | ||
324 | |||
325 | /* Nothing to do if running in dom0. */ | ||
326 | if (is_initial_xendomain()) | ||
327 | return -ENODEV; | ||
328 | |||
329 | return xenbus_register_frontend(&xenkbd); | ||
330 | } | ||
331 | |||
332 | static void __exit xenkbd_cleanup(void) | ||
333 | { | ||
334 | xenbus_unregister_driver(&xenkbd); | ||
335 | } | ||
336 | |||
337 | module_init(xenkbd_init); | ||
338 | module_exit(xenkbd_cleanup); | ||
339 | |||
340 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 7483d45bc5bc..e62018a36133 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c | |||
@@ -1809,3 +1809,5 @@ module_exit(netif_exit); | |||
1809 | 1809 | ||
1810 | MODULE_DESCRIPTION("Xen virtual network device frontend"); | 1810 | MODULE_DESCRIPTION("Xen virtual network device frontend"); |
1811 | MODULE_LICENSE("GPL"); | 1811 | MODULE_LICENSE("GPL"); |
1812 | MODULE_ALIAS("xen:vif"); | ||
1813 | MODULE_ALIAS("xennet"); | ||
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 1bd5fb30237d..e3dc8f8d0c3e 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig | |||
@@ -1930,6 +1930,20 @@ config FB_VIRTUAL | |||
1930 | 1930 | ||
1931 | If unsure, say N. | 1931 | If unsure, say N. |
1932 | 1932 | ||
1933 | config XEN_FBDEV_FRONTEND | ||
1934 | tristate "Xen virtual frame buffer support" | ||
1935 | depends on FB && XEN | ||
1936 | select FB_SYS_FILLRECT | ||
1937 | select FB_SYS_COPYAREA | ||
1938 | select FB_SYS_IMAGEBLIT | ||
1939 | select FB_SYS_FOPS | ||
1940 | select FB_DEFERRED_IO | ||
1941 | default y | ||
1942 | help | ||
1943 | This driver implements the front-end of the Xen virtual | ||
1944 | frame buffer driver. It communicates with a back-end | ||
1945 | in another domain. | ||
1946 | |||
1933 | source "drivers/video/omap/Kconfig" | 1947 | source "drivers/video/omap/Kconfig" |
1934 | 1948 | ||
1935 | source "drivers/video/backlight/Kconfig" | 1949 | source "drivers/video/backlight/Kconfig" |
diff --git a/drivers/video/Makefile b/drivers/video/Makefile index 11c0e5e05f21..f172b9b73314 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile | |||
@@ -114,6 +114,7 @@ obj-$(CONFIG_FB_PS3) += ps3fb.o | |||
114 | obj-$(CONFIG_FB_SM501) += sm501fb.o | 114 | obj-$(CONFIG_FB_SM501) += sm501fb.o |
115 | obj-$(CONFIG_FB_XILINX) += xilinxfb.o | 115 | obj-$(CONFIG_FB_XILINX) += xilinxfb.o |
116 | obj-$(CONFIG_FB_OMAP) += omap/ | 116 | obj-$(CONFIG_FB_OMAP) += omap/ |
117 | obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o | ||
117 | 118 | ||
118 | # Platform or fallback drivers go here | 119 | # Platform or fallback drivers go here |
119 | obj-$(CONFIG_FB_UVESA) += uvesafb.o | 120 | obj-$(CONFIG_FB_UVESA) += uvesafb.o |
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c new file mode 100644 index 000000000000..619a6f8d65a2 --- /dev/null +++ b/drivers/video/xen-fbfront.c | |||
@@ -0,0 +1,550 @@ | |||
1 | /* | ||
2 | * Xen para-virtual frame buffer device | ||
3 | * | ||
4 | * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com> | ||
5 | * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> | ||
6 | * | ||
7 | * Based on linux/drivers/video/q40fb.c | ||
8 | * | ||
9 | * This file is subject to the terms and conditions of the GNU General Public | ||
10 | * License. See the file COPYING in the main directory of this archive for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | /* | ||
15 | * TODO: | ||
16 | * | ||
17 | * Switch to grant tables when they become capable of dealing with the | ||
18 | * frame buffer. | ||
19 | */ | ||
20 | |||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/errno.h> | ||
23 | #include <linux/fb.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/vmalloc.h> | ||
26 | #include <linux/mm.h> | ||
27 | #include <asm/xen/hypervisor.h> | ||
28 | #include <xen/events.h> | ||
29 | #include <xen/page.h> | ||
30 | #include <xen/interface/io/fbif.h> | ||
31 | #include <xen/interface/io/protocols.h> | ||
32 | #include <xen/xenbus.h> | ||
33 | |||
34 | struct xenfb_info { | ||
35 | unsigned char *fb; | ||
36 | struct fb_info *fb_info; | ||
37 | int x1, y1, x2, y2; /* dirty rectangle, | ||
38 | protected by dirty_lock */ | ||
39 | spinlock_t dirty_lock; | ||
40 | int nr_pages; | ||
41 | int irq; | ||
42 | struct xenfb_page *page; | ||
43 | unsigned long *mfns; | ||
44 | int update_wanted; /* XENFB_TYPE_UPDATE wanted */ | ||
45 | |||
46 | struct xenbus_device *xbdev; | ||
47 | }; | ||
48 | |||
49 | static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; | ||
50 | |||
51 | static int xenfb_remove(struct xenbus_device *); | ||
52 | static void xenfb_init_shared_page(struct xenfb_info *); | ||
53 | static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); | ||
54 | static void xenfb_disconnect_backend(struct xenfb_info *); | ||
55 | |||
56 | static void xenfb_do_update(struct xenfb_info *info, | ||
57 | int x, int y, int w, int h) | ||
58 | { | ||
59 | union xenfb_out_event event; | ||
60 | u32 prod; | ||
61 | |||
62 | event.type = XENFB_TYPE_UPDATE; | ||
63 | event.update.x = x; | ||
64 | event.update.y = y; | ||
65 | event.update.width = w; | ||
66 | event.update.height = h; | ||
67 | |||
68 | prod = info->page->out_prod; | ||
69 | /* caller ensures !xenfb_queue_full() */ | ||
70 | mb(); /* ensure ring space available */ | ||
71 | XENFB_OUT_RING_REF(info->page, prod) = event; | ||
72 | wmb(); /* ensure ring contents visible */ | ||
73 | info->page->out_prod = prod + 1; | ||
74 | |||
75 | notify_remote_via_irq(info->irq); | ||
76 | } | ||
77 | |||
78 | static int xenfb_queue_full(struct xenfb_info *info) | ||
79 | { | ||
80 | u32 cons, prod; | ||
81 | |||
82 | prod = info->page->out_prod; | ||
83 | cons = info->page->out_cons; | ||
84 | return prod - cons == XENFB_OUT_RING_LEN; | ||
85 | } | ||
86 | |||
87 | static void xenfb_refresh(struct xenfb_info *info, | ||
88 | int x1, int y1, int w, int h) | ||
89 | { | ||
90 | unsigned long flags; | ||
91 | int y2 = y1 + h - 1; | ||
92 | int x2 = x1 + w - 1; | ||
93 | |||
94 | if (!info->update_wanted) | ||
95 | return; | ||
96 | |||
97 | spin_lock_irqsave(&info->dirty_lock, flags); | ||
98 | |||
99 | /* Combine with dirty rectangle: */ | ||
100 | if (info->y1 < y1) | ||
101 | y1 = info->y1; | ||
102 | if (info->y2 > y2) | ||
103 | y2 = info->y2; | ||
104 | if (info->x1 < x1) | ||
105 | x1 = info->x1; | ||
106 | if (info->x2 > x2) | ||
107 | x2 = info->x2; | ||
108 | |||
109 | if (xenfb_queue_full(info)) { | ||
110 | /* Can't send right now, stash it in the dirty rectangle */ | ||
111 | info->x1 = x1; | ||
112 | info->x2 = x2; | ||
113 | info->y1 = y1; | ||
114 | info->y2 = y2; | ||
115 | spin_unlock_irqrestore(&info->dirty_lock, flags); | ||
116 | return; | ||
117 | } | ||
118 | |||
119 | /* Clear dirty rectangle: */ | ||
120 | info->x1 = info->y1 = INT_MAX; | ||
121 | info->x2 = info->y2 = 0; | ||
122 | |||
123 | spin_unlock_irqrestore(&info->dirty_lock, flags); | ||
124 | |||
125 | if (x1 <= x2 && y1 <= y2) | ||
126 | xenfb_do_update(info, x1, y1, x2 - x1 + 1, y2 - y1 + 1); | ||
127 | } | ||
128 | |||
129 | static void xenfb_deferred_io(struct fb_info *fb_info, | ||
130 | struct list_head *pagelist) | ||
131 | { | ||
132 | struct xenfb_info *info = fb_info->par; | ||
133 | struct page *page; | ||
134 | unsigned long beg, end; | ||
135 | int y1, y2, miny, maxy; | ||
136 | |||
137 | miny = INT_MAX; | ||
138 | maxy = 0; | ||
139 | list_for_each_entry(page, pagelist, lru) { | ||
140 | beg = page->index << PAGE_SHIFT; | ||
141 | end = beg + PAGE_SIZE - 1; | ||
142 | y1 = beg / fb_info->fix.line_length; | ||
143 | y2 = end / fb_info->fix.line_length; | ||
144 | if (y2 >= fb_info->var.yres) | ||
145 | y2 = fb_info->var.yres - 1; | ||
146 | if (miny > y1) | ||
147 | miny = y1; | ||
148 | if (maxy < y2) | ||
149 | maxy = y2; | ||
150 | } | ||
151 | xenfb_refresh(info, 0, miny, fb_info->var.xres, maxy - miny + 1); | ||
152 | } | ||
153 | |||
154 | static struct fb_deferred_io xenfb_defio = { | ||
155 | .delay = HZ / 20, | ||
156 | .deferred_io = xenfb_deferred_io, | ||
157 | }; | ||
158 | |||
159 | static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green, | ||
160 | unsigned blue, unsigned transp, | ||
161 | struct fb_info *info) | ||
162 | { | ||
163 | u32 v; | ||
164 | |||
165 | if (regno > info->cmap.len) | ||
166 | return 1; | ||
167 | |||
168 | #define CNVT_TOHW(val, width) ((((val)<<(width))+0x7FFF-(val))>>16) | ||
169 | red = CNVT_TOHW(red, info->var.red.length); | ||
170 | green = CNVT_TOHW(green, info->var.green.length); | ||
171 | blue = CNVT_TOHW(blue, info->var.blue.length); | ||
172 | transp = CNVT_TOHW(transp, info->var.transp.length); | ||
173 | #undef CNVT_TOHW | ||
174 | |||
175 | v = (red << info->var.red.offset) | | ||
176 | (green << info->var.green.offset) | | ||
177 | (blue << info->var.blue.offset); | ||
178 | |||
179 | switch (info->var.bits_per_pixel) { | ||
180 | case 16: | ||
181 | case 24: | ||
182 | case 32: | ||
183 | ((u32 *)info->pseudo_palette)[regno] = v; | ||
184 | break; | ||
185 | } | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect) | ||
191 | { | ||
192 | struct xenfb_info *info = p->par; | ||
193 | |||
194 | sys_fillrect(p, rect); | ||
195 | xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height); | ||
196 | } | ||
197 | |||
198 | static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image) | ||
199 | { | ||
200 | struct xenfb_info *info = p->par; | ||
201 | |||
202 | sys_imageblit(p, image); | ||
203 | xenfb_refresh(info, image->dx, image->dy, image->width, image->height); | ||
204 | } | ||
205 | |||
206 | static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) | ||
207 | { | ||
208 | struct xenfb_info *info = p->par; | ||
209 | |||
210 | sys_copyarea(p, area); | ||
211 | xenfb_refresh(info, area->dx, area->dy, area->width, area->height); | ||
212 | } | ||
213 | |||
214 | static ssize_t xenfb_write(struct fb_info *p, const char __user *buf, | ||
215 | size_t count, loff_t *ppos) | ||
216 | { | ||
217 | struct xenfb_info *info = p->par; | ||
218 | ssize_t res; | ||
219 | |||
220 | res = fb_sys_write(p, buf, count, ppos); | ||
221 | xenfb_refresh(info, 0, 0, info->page->width, info->page->height); | ||
222 | return res; | ||
223 | } | ||
224 | |||
225 | static struct fb_ops xenfb_fb_ops = { | ||
226 | .owner = THIS_MODULE, | ||
227 | .fb_read = fb_sys_read, | ||
228 | .fb_write = xenfb_write, | ||
229 | .fb_setcolreg = xenfb_setcolreg, | ||
230 | .fb_fillrect = xenfb_fillrect, | ||
231 | .fb_copyarea = xenfb_copyarea, | ||
232 | .fb_imageblit = xenfb_imageblit, | ||
233 | }; | ||
234 | |||
235 | static irqreturn_t xenfb_event_handler(int rq, void *dev_id) | ||
236 | { | ||
237 | /* | ||
238 | * No in events recognized, simply ignore them all. | ||
239 | * If you need to recognize some, see xen-kbdfront's | ||
240 | * input_handler() for how to do that. | ||
241 | */ | ||
242 | struct xenfb_info *info = dev_id; | ||
243 | struct xenfb_page *page = info->page; | ||
244 | |||
245 | if (page->in_cons != page->in_prod) { | ||
246 | info->page->in_cons = info->page->in_prod; | ||
247 | notify_remote_via_irq(info->irq); | ||
248 | } | ||
249 | |||
250 | /* Flush dirty rectangle: */ | ||
251 | xenfb_refresh(info, INT_MAX, INT_MAX, -INT_MAX, -INT_MAX); | ||
252 | |||
253 | return IRQ_HANDLED; | ||
254 | } | ||
255 | |||
256 | static int __devinit xenfb_probe(struct xenbus_device *dev, | ||
257 | const struct xenbus_device_id *id) | ||
258 | { | ||
259 | struct xenfb_info *info; | ||
260 | struct fb_info *fb_info; | ||
261 | int ret; | ||
262 | |||
263 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
264 | if (info == NULL) { | ||
265 | xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); | ||
266 | return -ENOMEM; | ||
267 | } | ||
268 | dev->dev.driver_data = info; | ||
269 | info->xbdev = dev; | ||
270 | info->irq = -1; | ||
271 | info->x1 = info->y1 = INT_MAX; | ||
272 | spin_lock_init(&info->dirty_lock); | ||
273 | |||
274 | info->fb = vmalloc(xenfb_mem_len); | ||
275 | if (info->fb == NULL) | ||
276 | goto error_nomem; | ||
277 | memset(info->fb, 0, xenfb_mem_len); | ||
278 | |||
279 | info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
280 | |||
281 | info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); | ||
282 | if (!info->mfns) | ||
283 | goto error_nomem; | ||
284 | |||
285 | /* set up shared page */ | ||
286 | info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
287 | if (!info->page) | ||
288 | goto error_nomem; | ||
289 | |||
290 | xenfb_init_shared_page(info); | ||
291 | |||
292 | /* abusing framebuffer_alloc() to allocate pseudo_palette */ | ||
293 | fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); | ||
294 | if (fb_info == NULL) | ||
295 | goto error_nomem; | ||
296 | |||
297 | /* complete the abuse: */ | ||
298 | fb_info->pseudo_palette = fb_info->par; | ||
299 | fb_info->par = info; | ||
300 | |||
301 | fb_info->screen_base = info->fb; | ||
302 | |||
303 | fb_info->fbops = &xenfb_fb_ops; | ||
304 | fb_info->var.xres_virtual = fb_info->var.xres = info->page->width; | ||
305 | fb_info->var.yres_virtual = fb_info->var.yres = info->page->height; | ||
306 | fb_info->var.bits_per_pixel = info->page->depth; | ||
307 | |||
308 | fb_info->var.red = (struct fb_bitfield){16, 8, 0}; | ||
309 | fb_info->var.green = (struct fb_bitfield){8, 8, 0}; | ||
310 | fb_info->var.blue = (struct fb_bitfield){0, 8, 0}; | ||
311 | |||
312 | fb_info->var.activate = FB_ACTIVATE_NOW; | ||
313 | fb_info->var.height = -1; | ||
314 | fb_info->var.width = -1; | ||
315 | fb_info->var.vmode = FB_VMODE_NONINTERLACED; | ||
316 | |||
317 | fb_info->fix.visual = FB_VISUAL_TRUECOLOR; | ||
318 | fb_info->fix.line_length = info->page->line_length; | ||
319 | fb_info->fix.smem_start = 0; | ||
320 | fb_info->fix.smem_len = xenfb_mem_len; | ||
321 | strcpy(fb_info->fix.id, "xen"); | ||
322 | fb_info->fix.type = FB_TYPE_PACKED_PIXELS; | ||
323 | fb_info->fix.accel = FB_ACCEL_NONE; | ||
324 | |||
325 | fb_info->flags = FBINFO_FLAG_DEFAULT; | ||
326 | |||
327 | ret = fb_alloc_cmap(&fb_info->cmap, 256, 0); | ||
328 | if (ret < 0) { | ||
329 | framebuffer_release(fb_info); | ||
330 | xenbus_dev_fatal(dev, ret, "fb_alloc_cmap"); | ||
331 | goto error; | ||
332 | } | ||
333 | |||
334 | fb_info->fbdefio = &xenfb_defio; | ||
335 | fb_deferred_io_init(fb_info); | ||
336 | |||
337 | ret = register_framebuffer(fb_info); | ||
338 | if (ret) { | ||
339 | fb_deferred_io_cleanup(fb_info); | ||
340 | fb_dealloc_cmap(&fb_info->cmap); | ||
341 | framebuffer_release(fb_info); | ||
342 | xenbus_dev_fatal(dev, ret, "register_framebuffer"); | ||
343 | goto error; | ||
344 | } | ||
345 | info->fb_info = fb_info; | ||
346 | |||
347 | ret = xenfb_connect_backend(dev, info); | ||
348 | if (ret < 0) | ||
349 | goto error; | ||
350 | |||
351 | return 0; | ||
352 | |||
353 | error_nomem: | ||
354 | ret = -ENOMEM; | ||
355 | xenbus_dev_fatal(dev, ret, "allocating device memory"); | ||
356 | error: | ||
357 | xenfb_remove(dev); | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | static int xenfb_resume(struct xenbus_device *dev) | ||
362 | { | ||
363 | struct xenfb_info *info = dev->dev.driver_data; | ||
364 | |||
365 | xenfb_disconnect_backend(info); | ||
366 | xenfb_init_shared_page(info); | ||
367 | return xenfb_connect_backend(dev, info); | ||
368 | } | ||
369 | |||
370 | static int xenfb_remove(struct xenbus_device *dev) | ||
371 | { | ||
372 | struct xenfb_info *info = dev->dev.driver_data; | ||
373 | |||
374 | xenfb_disconnect_backend(info); | ||
375 | if (info->fb_info) { | ||
376 | fb_deferred_io_cleanup(info->fb_info); | ||
377 | unregister_framebuffer(info->fb_info); | ||
378 | fb_dealloc_cmap(&info->fb_info->cmap); | ||
379 | framebuffer_release(info->fb_info); | ||
380 | } | ||
381 | free_page((unsigned long)info->page); | ||
382 | vfree(info->mfns); | ||
383 | vfree(info->fb); | ||
384 | kfree(info); | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | static unsigned long vmalloc_to_mfn(void *address) | ||
390 | { | ||
391 | return pfn_to_mfn(vmalloc_to_pfn(address)); | ||
392 | } | ||
393 | |||
394 | static void xenfb_init_shared_page(struct xenfb_info *info) | ||
395 | { | ||
396 | int i; | ||
397 | |||
398 | for (i = 0; i < info->nr_pages; i++) | ||
399 | info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); | ||
400 | |||
401 | info->page->pd[0] = vmalloc_to_mfn(info->mfns); | ||
402 | info->page->pd[1] = 0; | ||
403 | info->page->width = XENFB_WIDTH; | ||
404 | info->page->height = XENFB_HEIGHT; | ||
405 | info->page->depth = XENFB_DEPTH; | ||
406 | info->page->line_length = (info->page->depth / 8) * info->page->width; | ||
407 | info->page->mem_length = xenfb_mem_len; | ||
408 | info->page->in_cons = info->page->in_prod = 0; | ||
409 | info->page->out_cons = info->page->out_prod = 0; | ||
410 | } | ||
411 | |||
412 | static int xenfb_connect_backend(struct xenbus_device *dev, | ||
413 | struct xenfb_info *info) | ||
414 | { | ||
415 | int ret, evtchn; | ||
416 | struct xenbus_transaction xbt; | ||
417 | |||
418 | ret = xenbus_alloc_evtchn(dev, &evtchn); | ||
419 | if (ret) | ||
420 | return ret; | ||
421 | ret = bind_evtchn_to_irqhandler(evtchn, xenfb_event_handler, | ||
422 | 0, dev->devicetype, info); | ||
423 | if (ret < 0) { | ||
424 | xenbus_free_evtchn(dev, evtchn); | ||
425 | xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler"); | ||
426 | return ret; | ||
427 | } | ||
428 | info->irq = ret; | ||
429 | |||
430 | again: | ||
431 | ret = xenbus_transaction_start(&xbt); | ||
432 | if (ret) { | ||
433 | xenbus_dev_fatal(dev, ret, "starting transaction"); | ||
434 | return ret; | ||
435 | } | ||
436 | ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu", | ||
437 | virt_to_mfn(info->page)); | ||
438 | if (ret) | ||
439 | goto error_xenbus; | ||
440 | ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", | ||
441 | evtchn); | ||
442 | if (ret) | ||
443 | goto error_xenbus; | ||
444 | ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s", | ||
445 | XEN_IO_PROTO_ABI_NATIVE); | ||
446 | if (ret) | ||
447 | goto error_xenbus; | ||
448 | ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1"); | ||
449 | if (ret) | ||
450 | goto error_xenbus; | ||
451 | ret = xenbus_transaction_end(xbt, 0); | ||
452 | if (ret) { | ||
453 | if (ret == -EAGAIN) | ||
454 | goto again; | ||
455 | xenbus_dev_fatal(dev, ret, "completing transaction"); | ||
456 | return ret; | ||
457 | } | ||
458 | |||
459 | xenbus_switch_state(dev, XenbusStateInitialised); | ||
460 | return 0; | ||
461 | |||
462 | error_xenbus: | ||
463 | xenbus_transaction_end(xbt, 1); | ||
464 | xenbus_dev_fatal(dev, ret, "writing xenstore"); | ||
465 | return ret; | ||
466 | } | ||
467 | |||
468 | static void xenfb_disconnect_backend(struct xenfb_info *info) | ||
469 | { | ||
470 | if (info->irq >= 0) | ||
471 | unbind_from_irqhandler(info->irq, info); | ||
472 | info->irq = -1; | ||
473 | } | ||
474 | |||
475 | static void xenfb_backend_changed(struct xenbus_device *dev, | ||
476 | enum xenbus_state backend_state) | ||
477 | { | ||
478 | struct xenfb_info *info = dev->dev.driver_data; | ||
479 | int val; | ||
480 | |||
481 | switch (backend_state) { | ||
482 | case XenbusStateInitialising: | ||
483 | case XenbusStateInitialised: | ||
484 | case XenbusStateUnknown: | ||
485 | case XenbusStateClosed: | ||
486 | break; | ||
487 | |||
488 | case XenbusStateInitWait: | ||
489 | InitWait: | ||
490 | xenbus_switch_state(dev, XenbusStateConnected); | ||
491 | break; | ||
492 | |||
493 | case XenbusStateConnected: | ||
494 | /* | ||
495 | * Work around xenbus race condition: If backend goes | ||
496 | * through InitWait to Connected fast enough, we can | ||
497 | * get Connected twice here. | ||
498 | */ | ||
499 | if (dev->state != XenbusStateConnected) | ||
500 | goto InitWait; /* no InitWait seen yet, fudge it */ | ||
501 | |||
502 | if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, | ||
503 | "request-update", "%d", &val) < 0) | ||
504 | val = 0; | ||
505 | if (val) | ||
506 | info->update_wanted = 1; | ||
507 | break; | ||
508 | |||
509 | case XenbusStateClosing: | ||
510 | xenbus_frontend_closed(dev); | ||
511 | break; | ||
512 | } | ||
513 | } | ||
514 | |||
515 | static struct xenbus_device_id xenfb_ids[] = { | ||
516 | { "vfb" }, | ||
517 | { "" } | ||
518 | }; | ||
519 | |||
520 | static struct xenbus_driver xenfb = { | ||
521 | .name = "vfb", | ||
522 | .owner = THIS_MODULE, | ||
523 | .ids = xenfb_ids, | ||
524 | .probe = xenfb_probe, | ||
525 | .remove = xenfb_remove, | ||
526 | .resume = xenfb_resume, | ||
527 | .otherend_changed = xenfb_backend_changed, | ||
528 | }; | ||
529 | |||
530 | static int __init xenfb_init(void) | ||
531 | { | ||
532 | if (!is_running_on_xen()) | ||
533 | return -ENODEV; | ||
534 | |||
535 | /* Nothing to do if running in dom0. */ | ||
536 | if (is_initial_xendomain()) | ||
537 | return -ENODEV; | ||
538 | |||
539 | return xenbus_register_frontend(&xenfb); | ||
540 | } | ||
541 | |||
542 | static void __exit xenfb_cleanup(void) | ||
543 | { | ||
544 | xenbus_unregister_driver(&xenfb); | ||
545 | } | ||
546 | |||
547 | module_init(xenfb_init); | ||
548 | module_exit(xenfb_cleanup); | ||
549 | |||
550 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig new file mode 100644 index 000000000000..4b75a16de009 --- /dev/null +++ b/drivers/xen/Kconfig | |||
@@ -0,0 +1,19 @@ | |||
1 | config XEN_BALLOON | ||
2 | bool "Xen memory balloon driver" | ||
3 | depends on XEN | ||
4 | default y | ||
5 | help | ||
6 | The balloon driver allows the Xen domain to request more memory from | ||
7 | the system to expand the domain's memory allocation, or alternatively | ||
8 | return unneeded memory to the system. | ||
9 | |||
10 | config XEN_SCRUB_PAGES | ||
11 | bool "Scrub pages before returning them to system" | ||
12 | depends on XEN_BALLOON | ||
13 | default y | ||
14 | help | ||
15 | Scrub pages before returning them to the system for reuse by | ||
16 | other domains. This makes sure that any confidential data | ||
17 | is not accidentally visible to other domains. Is it more | ||
18 | secure, but slightly less efficient. | ||
19 | If in doubt, say yes. | ||
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 56592f0d6cef..37af04f1ffd9 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,2 +1,4 @@ | |||
1 | obj-y += grant-table.o | 1 | obj-y += grant-table.o features.o events.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | ||
4 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | ||
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c new file mode 100644 index 000000000000..ab25ba6cbbb9 --- /dev/null +++ b/drivers/xen/balloon.c | |||
@@ -0,0 +1,712 @@ | |||
1 | /****************************************************************************** | ||
2 | * balloon.c | ||
3 | * | ||
4 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | ||
5 | * | ||
6 | * Copyright (c) 2003, B Dragovic | ||
7 | * Copyright (c) 2003-2004, M Williamson, K Fraser | ||
8 | * Copyright (c) 2005 Dan M. Smith, IBM Corporation | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License version 2 | ||
12 | * as published by the Free Software Foundation; or, when distributed | ||
13 | * separately from the Linux kernel or incorporated into other | ||
14 | * software packages, subject to the following license: | ||
15 | * | ||
16 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
17 | * of this source file (the "Software"), to deal in the Software without | ||
18 | * restriction, including without limitation the rights to use, copy, modify, | ||
19 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
20 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
21 | * the following conditions: | ||
22 | * | ||
23 | * The above copyright notice and this permission notice shall be included in | ||
24 | * all copies or substantial portions of the Software. | ||
25 | * | ||
26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
27 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
28 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
29 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
30 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
31 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
32 | * IN THE SOFTWARE. | ||
33 | */ | ||
34 | |||
35 | #include <linux/kernel.h> | ||
36 | #include <linux/module.h> | ||
37 | #include <linux/sched.h> | ||
38 | #include <linux/errno.h> | ||
39 | #include <linux/mm.h> | ||
40 | #include <linux/bootmem.h> | ||
41 | #include <linux/pagemap.h> | ||
42 | #include <linux/highmem.h> | ||
43 | #include <linux/mutex.h> | ||
44 | #include <linux/highmem.h> | ||
45 | #include <linux/list.h> | ||
46 | #include <linux/sysdev.h> | ||
47 | |||
48 | #include <asm/xen/hypervisor.h> | ||
49 | #include <asm/page.h> | ||
50 | #include <asm/pgalloc.h> | ||
51 | #include <asm/pgtable.h> | ||
52 | #include <asm/uaccess.h> | ||
53 | #include <asm/tlb.h> | ||
54 | |||
55 | #include <xen/interface/memory.h> | ||
56 | #include <xen/balloon.h> | ||
57 | #include <xen/xenbus.h> | ||
58 | #include <xen/features.h> | ||
59 | #include <xen/page.h> | ||
60 | |||
61 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | ||
62 | |||
63 | #define BALLOON_CLASS_NAME "memory" | ||
64 | |||
65 | struct balloon_stats { | ||
66 | /* We aim for 'current allocation' == 'target allocation'. */ | ||
67 | unsigned long current_pages; | ||
68 | unsigned long target_pages; | ||
69 | /* We may hit the hard limit in Xen. If we do then we remember it. */ | ||
70 | unsigned long hard_limit; | ||
71 | /* | ||
72 | * Drivers may alter the memory reservation independently, but they | ||
73 | * must inform the balloon driver so we avoid hitting the hard limit. | ||
74 | */ | ||
75 | unsigned long driver_pages; | ||
76 | /* Number of pages in high- and low-memory balloons. */ | ||
77 | unsigned long balloon_low; | ||
78 | unsigned long balloon_high; | ||
79 | }; | ||
80 | |||
81 | static DEFINE_MUTEX(balloon_mutex); | ||
82 | |||
83 | static struct sys_device balloon_sysdev; | ||
84 | |||
85 | static int register_balloon(struct sys_device *sysdev); | ||
86 | |||
87 | /* | ||
88 | * Protects atomic reservation decrease/increase against concurrent increases. | ||
89 | * Also protects non-atomic updates of current_pages and driver_pages, and | ||
90 | * balloon lists. | ||
91 | */ | ||
92 | static DEFINE_SPINLOCK(balloon_lock); | ||
93 | |||
94 | static struct balloon_stats balloon_stats; | ||
95 | |||
96 | /* We increase/decrease in batches which fit in a page */ | ||
97 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; | ||
98 | |||
99 | /* VM /proc information for memory */ | ||
100 | extern unsigned long totalram_pages; | ||
101 | |||
102 | #ifdef CONFIG_HIGHMEM | ||
103 | extern unsigned long totalhigh_pages; | ||
104 | #define inc_totalhigh_pages() (totalhigh_pages++) | ||
105 | #define dec_totalhigh_pages() (totalhigh_pages--) | ||
106 | #else | ||
107 | #define inc_totalhigh_pages() do {} while(0) | ||
108 | #define dec_totalhigh_pages() do {} while(0) | ||
109 | #endif | ||
110 | |||
111 | /* List of ballooned pages, threaded through the mem_map array. */ | ||
112 | static LIST_HEAD(ballooned_pages); | ||
113 | |||
114 | /* Main work function, always executed in process context. */ | ||
115 | static void balloon_process(struct work_struct *work); | ||
116 | static DECLARE_WORK(balloon_worker, balloon_process); | ||
117 | static struct timer_list balloon_timer; | ||
118 | |||
119 | /* When ballooning out (allocating memory to return to Xen) we don't really | ||
120 | want the kernel to try too hard since that can trigger the oom killer. */ | ||
121 | #define GFP_BALLOON \ | ||
122 | (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) | ||
123 | |||
124 | static void scrub_page(struct page *page) | ||
125 | { | ||
126 | #ifdef CONFIG_XEN_SCRUB_PAGES | ||
127 | if (PageHighMem(page)) { | ||
128 | void *v = kmap(page); | ||
129 | clear_page(v); | ||
130 | kunmap(v); | ||
131 | } else { | ||
132 | void *v = page_address(page); | ||
133 | clear_page(v); | ||
134 | } | ||
135 | #endif | ||
136 | } | ||
137 | |||
138 | /* balloon_append: add the given page to the balloon. */ | ||
139 | static void balloon_append(struct page *page) | ||
140 | { | ||
141 | /* Lowmem is re-populated first, so highmem pages go at list tail. */ | ||
142 | if (PageHighMem(page)) { | ||
143 | list_add_tail(&page->lru, &ballooned_pages); | ||
144 | balloon_stats.balloon_high++; | ||
145 | dec_totalhigh_pages(); | ||
146 | } else { | ||
147 | list_add(&page->lru, &ballooned_pages); | ||
148 | balloon_stats.balloon_low++; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ | ||
153 | static struct page *balloon_retrieve(void) | ||
154 | { | ||
155 | struct page *page; | ||
156 | |||
157 | if (list_empty(&ballooned_pages)) | ||
158 | return NULL; | ||
159 | |||
160 | page = list_entry(ballooned_pages.next, struct page, lru); | ||
161 | list_del(&page->lru); | ||
162 | |||
163 | if (PageHighMem(page)) { | ||
164 | balloon_stats.balloon_high--; | ||
165 | inc_totalhigh_pages(); | ||
166 | } | ||
167 | else | ||
168 | balloon_stats.balloon_low--; | ||
169 | |||
170 | return page; | ||
171 | } | ||
172 | |||
173 | static struct page *balloon_first_page(void) | ||
174 | { | ||
175 | if (list_empty(&ballooned_pages)) | ||
176 | return NULL; | ||
177 | return list_entry(ballooned_pages.next, struct page, lru); | ||
178 | } | ||
179 | |||
180 | static struct page *balloon_next_page(struct page *page) | ||
181 | { | ||
182 | struct list_head *next = page->lru.next; | ||
183 | if (next == &ballooned_pages) | ||
184 | return NULL; | ||
185 | return list_entry(next, struct page, lru); | ||
186 | } | ||
187 | |||
188 | static void balloon_alarm(unsigned long unused) | ||
189 | { | ||
190 | schedule_work(&balloon_worker); | ||
191 | } | ||
192 | |||
193 | static unsigned long current_target(void) | ||
194 | { | ||
195 | unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit); | ||
196 | |||
197 | target = min(target, | ||
198 | balloon_stats.current_pages + | ||
199 | balloon_stats.balloon_low + | ||
200 | balloon_stats.balloon_high); | ||
201 | |||
202 | return target; | ||
203 | } | ||
204 | |||
205 | static int increase_reservation(unsigned long nr_pages) | ||
206 | { | ||
207 | unsigned long pfn, i, flags; | ||
208 | struct page *page; | ||
209 | long rc; | ||
210 | struct xen_memory_reservation reservation = { | ||
211 | .address_bits = 0, | ||
212 | .extent_order = 0, | ||
213 | .domid = DOMID_SELF | ||
214 | }; | ||
215 | |||
216 | if (nr_pages > ARRAY_SIZE(frame_list)) | ||
217 | nr_pages = ARRAY_SIZE(frame_list); | ||
218 | |||
219 | spin_lock_irqsave(&balloon_lock, flags); | ||
220 | |||
221 | page = balloon_first_page(); | ||
222 | for (i = 0; i < nr_pages; i++) { | ||
223 | BUG_ON(page == NULL); | ||
224 | frame_list[i] = page_to_pfn(page);; | ||
225 | page = balloon_next_page(page); | ||
226 | } | ||
227 | |||
228 | reservation.extent_start = (unsigned long)frame_list; | ||
229 | reservation.nr_extents = nr_pages; | ||
230 | rc = HYPERVISOR_memory_op( | ||
231 | XENMEM_populate_physmap, &reservation); | ||
232 | if (rc < nr_pages) { | ||
233 | if (rc > 0) { | ||
234 | int ret; | ||
235 | |||
236 | /* We hit the Xen hard limit: reprobe. */ | ||
237 | reservation.nr_extents = rc; | ||
238 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
239 | &reservation); | ||
240 | BUG_ON(ret != rc); | ||
241 | } | ||
242 | if (rc >= 0) | ||
243 | balloon_stats.hard_limit = (balloon_stats.current_pages + rc - | ||
244 | balloon_stats.driver_pages); | ||
245 | goto out; | ||
246 | } | ||
247 | |||
248 | for (i = 0; i < nr_pages; i++) { | ||
249 | page = balloon_retrieve(); | ||
250 | BUG_ON(page == NULL); | ||
251 | |||
252 | pfn = page_to_pfn(page); | ||
253 | BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && | ||
254 | phys_to_machine_mapping_valid(pfn)); | ||
255 | |||
256 | set_phys_to_machine(pfn, frame_list[i]); | ||
257 | |||
258 | /* Link back into the page tables if not highmem. */ | ||
259 | if (pfn < max_low_pfn) { | ||
260 | int ret; | ||
261 | ret = HYPERVISOR_update_va_mapping( | ||
262 | (unsigned long)__va(pfn << PAGE_SHIFT), | ||
263 | mfn_pte(frame_list[i], PAGE_KERNEL), | ||
264 | 0); | ||
265 | BUG_ON(ret); | ||
266 | } | ||
267 | |||
268 | /* Relinquish the page back to the allocator. */ | ||
269 | ClearPageReserved(page); | ||
270 | init_page_count(page); | ||
271 | __free_page(page); | ||
272 | } | ||
273 | |||
274 | balloon_stats.current_pages += nr_pages; | ||
275 | totalram_pages = balloon_stats.current_pages; | ||
276 | |||
277 | out: | ||
278 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int decrease_reservation(unsigned long nr_pages) | ||
284 | { | ||
285 | unsigned long pfn, i, flags; | ||
286 | struct page *page; | ||
287 | int need_sleep = 0; | ||
288 | int ret; | ||
289 | struct xen_memory_reservation reservation = { | ||
290 | .address_bits = 0, | ||
291 | .extent_order = 0, | ||
292 | .domid = DOMID_SELF | ||
293 | }; | ||
294 | |||
295 | if (nr_pages > ARRAY_SIZE(frame_list)) | ||
296 | nr_pages = ARRAY_SIZE(frame_list); | ||
297 | |||
298 | for (i = 0; i < nr_pages; i++) { | ||
299 | if ((page = alloc_page(GFP_BALLOON)) == NULL) { | ||
300 | nr_pages = i; | ||
301 | need_sleep = 1; | ||
302 | break; | ||
303 | } | ||
304 | |||
305 | pfn = page_to_pfn(page); | ||
306 | frame_list[i] = pfn_to_mfn(pfn); | ||
307 | |||
308 | scrub_page(page); | ||
309 | } | ||
310 | |||
311 | /* Ensure that ballooned highmem pages don't have kmaps. */ | ||
312 | kmap_flush_unused(); | ||
313 | flush_tlb_all(); | ||
314 | |||
315 | spin_lock_irqsave(&balloon_lock, flags); | ||
316 | |||
317 | /* No more mappings: invalidate P2M and add to balloon. */ | ||
318 | for (i = 0; i < nr_pages; i++) { | ||
319 | pfn = mfn_to_pfn(frame_list[i]); | ||
320 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | ||
321 | balloon_append(pfn_to_page(pfn)); | ||
322 | } | ||
323 | |||
324 | reservation.extent_start = (unsigned long)frame_list; | ||
325 | reservation.nr_extents = nr_pages; | ||
326 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | ||
327 | BUG_ON(ret != nr_pages); | ||
328 | |||
329 | balloon_stats.current_pages -= nr_pages; | ||
330 | totalram_pages = balloon_stats.current_pages; | ||
331 | |||
332 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
333 | |||
334 | return need_sleep; | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * We avoid multiple worker processes conflicting via the balloon mutex. | ||
339 | * We may of course race updates of the target counts (which are protected | ||
340 | * by the balloon lock), or with changes to the Xen hard limit, but we will | ||
341 | * recover from these in time. | ||
342 | */ | ||
343 | static void balloon_process(struct work_struct *work) | ||
344 | { | ||
345 | int need_sleep = 0; | ||
346 | long credit; | ||
347 | |||
348 | mutex_lock(&balloon_mutex); | ||
349 | |||
350 | do { | ||
351 | credit = current_target() - balloon_stats.current_pages; | ||
352 | if (credit > 0) | ||
353 | need_sleep = (increase_reservation(credit) != 0); | ||
354 | if (credit < 0) | ||
355 | need_sleep = (decrease_reservation(-credit) != 0); | ||
356 | |||
357 | #ifndef CONFIG_PREEMPT | ||
358 | if (need_resched()) | ||
359 | schedule(); | ||
360 | #endif | ||
361 | } while ((credit != 0) && !need_sleep); | ||
362 | |||
363 | /* Schedule more work if there is some still to be done. */ | ||
364 | if (current_target() != balloon_stats.current_pages) | ||
365 | mod_timer(&balloon_timer, jiffies + HZ); | ||
366 | |||
367 | mutex_unlock(&balloon_mutex); | ||
368 | } | ||
369 | |||
370 | /* Resets the Xen limit, sets new target, and kicks off processing. */ | ||
371 | void balloon_set_new_target(unsigned long target) | ||
372 | { | ||
373 | /* No need for lock. Not read-modify-write updates. */ | ||
374 | balloon_stats.hard_limit = ~0UL; | ||
375 | balloon_stats.target_pages = target; | ||
376 | schedule_work(&balloon_worker); | ||
377 | } | ||
378 | |||
379 | static struct xenbus_watch target_watch = | ||
380 | { | ||
381 | .node = "memory/target" | ||
382 | }; | ||
383 | |||
384 | /* React to a change in the target key */ | ||
385 | static void watch_target(struct xenbus_watch *watch, | ||
386 | const char **vec, unsigned int len) | ||
387 | { | ||
388 | unsigned long long new_target; | ||
389 | int err; | ||
390 | |||
391 | err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | ||
392 | if (err != 1) { | ||
393 | /* This is ok (for domain0 at least) - so just return */ | ||
394 | return; | ||
395 | } | ||
396 | |||
397 | /* The given memory/target value is in KiB, so it needs converting to | ||
398 | * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | ||
399 | */ | ||
400 | balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | ||
401 | } | ||
402 | |||
403 | static int balloon_init_watcher(struct notifier_block *notifier, | ||
404 | unsigned long event, | ||
405 | void *data) | ||
406 | { | ||
407 | int err; | ||
408 | |||
409 | err = register_xenbus_watch(&target_watch); | ||
410 | if (err) | ||
411 | printk(KERN_ERR "Failed to set balloon watcher\n"); | ||
412 | |||
413 | return NOTIFY_DONE; | ||
414 | } | ||
415 | |||
416 | static struct notifier_block xenstore_notifier; | ||
417 | |||
418 | static int __init balloon_init(void) | ||
419 | { | ||
420 | unsigned long pfn; | ||
421 | struct page *page; | ||
422 | |||
423 | if (!is_running_on_xen()) | ||
424 | return -ENODEV; | ||
425 | |||
426 | pr_info("xen_balloon: Initialising balloon driver.\n"); | ||
427 | |||
428 | balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); | ||
429 | totalram_pages = balloon_stats.current_pages; | ||
430 | balloon_stats.target_pages = balloon_stats.current_pages; | ||
431 | balloon_stats.balloon_low = 0; | ||
432 | balloon_stats.balloon_high = 0; | ||
433 | balloon_stats.driver_pages = 0UL; | ||
434 | balloon_stats.hard_limit = ~0UL; | ||
435 | |||
436 | init_timer(&balloon_timer); | ||
437 | balloon_timer.data = 0; | ||
438 | balloon_timer.function = balloon_alarm; | ||
439 | |||
440 | register_balloon(&balloon_sysdev); | ||
441 | |||
442 | /* Initialise the balloon with excess memory space. */ | ||
443 | for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { | ||
444 | page = pfn_to_page(pfn); | ||
445 | if (!PageReserved(page)) | ||
446 | balloon_append(page); | ||
447 | } | ||
448 | |||
449 | target_watch.callback = watch_target; | ||
450 | xenstore_notifier.notifier_call = balloon_init_watcher; | ||
451 | |||
452 | register_xenstore_notifier(&xenstore_notifier); | ||
453 | |||
454 | return 0; | ||
455 | } | ||
456 | |||
457 | subsys_initcall(balloon_init); | ||
458 | |||
459 | static void balloon_exit(void) | ||
460 | { | ||
461 | /* XXX - release balloon here */ | ||
462 | return; | ||
463 | } | ||
464 | |||
465 | module_exit(balloon_exit); | ||
466 | |||
467 | static void balloon_update_driver_allowance(long delta) | ||
468 | { | ||
469 | unsigned long flags; | ||
470 | |||
471 | spin_lock_irqsave(&balloon_lock, flags); | ||
472 | balloon_stats.driver_pages += delta; | ||
473 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
474 | } | ||
475 | |||
476 | static int dealloc_pte_fn( | ||
477 | pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | ||
478 | { | ||
479 | unsigned long mfn = pte_mfn(*pte); | ||
480 | int ret; | ||
481 | struct xen_memory_reservation reservation = { | ||
482 | .nr_extents = 1, | ||
483 | .extent_order = 0, | ||
484 | .domid = DOMID_SELF | ||
485 | }; | ||
486 | reservation.extent_start = (unsigned long)&mfn; | ||
487 | set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); | ||
488 | set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); | ||
489 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | ||
490 | BUG_ON(ret != 1); | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | static struct page **alloc_empty_pages_and_pagevec(int nr_pages) | ||
495 | { | ||
496 | unsigned long vaddr, flags; | ||
497 | struct page *page, **pagevec; | ||
498 | int i, ret; | ||
499 | |||
500 | pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); | ||
501 | if (pagevec == NULL) | ||
502 | return NULL; | ||
503 | |||
504 | for (i = 0; i < nr_pages; i++) { | ||
505 | page = pagevec[i] = alloc_page(GFP_KERNEL); | ||
506 | if (page == NULL) | ||
507 | goto err; | ||
508 | |||
509 | vaddr = (unsigned long)page_address(page); | ||
510 | |||
511 | scrub_page(page); | ||
512 | |||
513 | spin_lock_irqsave(&balloon_lock, flags); | ||
514 | |||
515 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
516 | unsigned long gmfn = page_to_pfn(page); | ||
517 | struct xen_memory_reservation reservation = { | ||
518 | .nr_extents = 1, | ||
519 | .extent_order = 0, | ||
520 | .domid = DOMID_SELF | ||
521 | }; | ||
522 | reservation.extent_start = (unsigned long)&gmfn; | ||
523 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
524 | &reservation); | ||
525 | if (ret == 1) | ||
526 | ret = 0; /* success */ | ||
527 | } else { | ||
528 | ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, | ||
529 | dealloc_pte_fn, NULL); | ||
530 | } | ||
531 | |||
532 | if (ret != 0) { | ||
533 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
534 | __free_page(page); | ||
535 | goto err; | ||
536 | } | ||
537 | |||
538 | totalram_pages = --balloon_stats.current_pages; | ||
539 | |||
540 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
541 | } | ||
542 | |||
543 | out: | ||
544 | schedule_work(&balloon_worker); | ||
545 | flush_tlb_all(); | ||
546 | return pagevec; | ||
547 | |||
548 | err: | ||
549 | spin_lock_irqsave(&balloon_lock, flags); | ||
550 | while (--i >= 0) | ||
551 | balloon_append(pagevec[i]); | ||
552 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
553 | kfree(pagevec); | ||
554 | pagevec = NULL; | ||
555 | goto out; | ||
556 | } | ||
557 | |||
558 | static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) | ||
559 | { | ||
560 | unsigned long flags; | ||
561 | int i; | ||
562 | |||
563 | if (pagevec == NULL) | ||
564 | return; | ||
565 | |||
566 | spin_lock_irqsave(&balloon_lock, flags); | ||
567 | for (i = 0; i < nr_pages; i++) { | ||
568 | BUG_ON(page_count(pagevec[i]) != 1); | ||
569 | balloon_append(pagevec[i]); | ||
570 | } | ||
571 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
572 | |||
573 | kfree(pagevec); | ||
574 | |||
575 | schedule_work(&balloon_worker); | ||
576 | } | ||
577 | |||
578 | static void balloon_release_driver_page(struct page *page) | ||
579 | { | ||
580 | unsigned long flags; | ||
581 | |||
582 | spin_lock_irqsave(&balloon_lock, flags); | ||
583 | balloon_append(page); | ||
584 | balloon_stats.driver_pages--; | ||
585 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
586 | |||
587 | schedule_work(&balloon_worker); | ||
588 | } | ||
589 | |||
590 | |||
591 | #define BALLOON_SHOW(name, format, args...) \ | ||
592 | static ssize_t show_##name(struct sys_device *dev, \ | ||
593 | char *buf) \ | ||
594 | { \ | ||
595 | return sprintf(buf, format, ##args); \ | ||
596 | } \ | ||
597 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | ||
598 | |||
599 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | ||
600 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); | ||
601 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); | ||
602 | BALLOON_SHOW(hard_limit_kb, | ||
603 | (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n", | ||
604 | (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); | ||
605 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); | ||
606 | |||
607 | static ssize_t show_target_kb(struct sys_device *dev, char *buf) | ||
608 | { | ||
609 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | ||
610 | } | ||
611 | |||
612 | static ssize_t store_target_kb(struct sys_device *dev, | ||
613 | const char *buf, | ||
614 | size_t count) | ||
615 | { | ||
616 | char memstring[64], *endchar; | ||
617 | unsigned long long target_bytes; | ||
618 | |||
619 | if (!capable(CAP_SYS_ADMIN)) | ||
620 | return -EPERM; | ||
621 | |||
622 | if (count <= 1) | ||
623 | return -EBADMSG; /* runt */ | ||
624 | if (count > sizeof(memstring)) | ||
625 | return -EFBIG; /* too long */ | ||
626 | strcpy(memstring, buf); | ||
627 | |||
628 | target_bytes = memparse(memstring, &endchar); | ||
629 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
630 | |||
631 | return count; | ||
632 | } | ||
633 | |||
634 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | ||
635 | show_target_kb, store_target_kb); | ||
636 | |||
637 | static struct sysdev_attribute *balloon_attrs[] = { | ||
638 | &attr_target_kb, | ||
639 | }; | ||
640 | |||
641 | static struct attribute *balloon_info_attrs[] = { | ||
642 | &attr_current_kb.attr, | ||
643 | &attr_low_kb.attr, | ||
644 | &attr_high_kb.attr, | ||
645 | &attr_hard_limit_kb.attr, | ||
646 | &attr_driver_kb.attr, | ||
647 | NULL | ||
648 | }; | ||
649 | |||
650 | static struct attribute_group balloon_info_group = { | ||
651 | .name = "info", | ||
652 | .attrs = balloon_info_attrs, | ||
653 | }; | ||
654 | |||
655 | static struct sysdev_class balloon_sysdev_class = { | ||
656 | .name = BALLOON_CLASS_NAME, | ||
657 | }; | ||
658 | |||
659 | static int register_balloon(struct sys_device *sysdev) | ||
660 | { | ||
661 | int i, error; | ||
662 | |||
663 | error = sysdev_class_register(&balloon_sysdev_class); | ||
664 | if (error) | ||
665 | return error; | ||
666 | |||
667 | sysdev->id = 0; | ||
668 | sysdev->cls = &balloon_sysdev_class; | ||
669 | |||
670 | error = sysdev_register(sysdev); | ||
671 | if (error) { | ||
672 | sysdev_class_unregister(&balloon_sysdev_class); | ||
673 | return error; | ||
674 | } | ||
675 | |||
676 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | ||
677 | error = sysdev_create_file(sysdev, balloon_attrs[i]); | ||
678 | if (error) | ||
679 | goto fail; | ||
680 | } | ||
681 | |||
682 | error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | ||
683 | if (error) | ||
684 | goto fail; | ||
685 | |||
686 | return 0; | ||
687 | |||
688 | fail: | ||
689 | while (--i >= 0) | ||
690 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
691 | sysdev_unregister(sysdev); | ||
692 | sysdev_class_unregister(&balloon_sysdev_class); | ||
693 | return error; | ||
694 | } | ||
695 | |||
696 | static void unregister_balloon(struct sys_device *sysdev) | ||
697 | { | ||
698 | int i; | ||
699 | |||
700 | sysfs_remove_group(&sysdev->kobj, &balloon_info_group); | ||
701 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) | ||
702 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
703 | sysdev_unregister(sysdev); | ||
704 | sysdev_class_unregister(&balloon_sysdev_class); | ||
705 | } | ||
706 | |||
707 | static void balloon_sysfs_exit(void) | ||
708 | { | ||
709 | unregister_balloon(&balloon_sysdev); | ||
710 | } | ||
711 | |||
712 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/xen/events.c b/drivers/xen/events.c index dcf613e17581..4f0f22b020ea 100644 --- a/arch/x86/xen/events.c +++ b/drivers/xen/events.c | |||
@@ -33,12 +33,11 @@ | |||
33 | #include <asm/xen/hypercall.h> | 33 | #include <asm/xen/hypercall.h> |
34 | #include <asm/xen/hypervisor.h> | 34 | #include <asm/xen/hypervisor.h> |
35 | 35 | ||
36 | #include <xen/xen-ops.h> | ||
36 | #include <xen/events.h> | 37 | #include <xen/events.h> |
37 | #include <xen/interface/xen.h> | 38 | #include <xen/interface/xen.h> |
38 | #include <xen/interface/event_channel.h> | 39 | #include <xen/interface/event_channel.h> |
39 | 40 | ||
40 | #include "xen-ops.h" | ||
41 | |||
42 | /* | 41 | /* |
43 | * This lock protects updates to the following mapping and reference-count | 42 | * This lock protects updates to the following mapping and reference-count |
44 | * arrays. The lock does not need to be acquired to read the mapping tables. | 43 | * arrays. The lock does not need to be acquired to read the mapping tables. |
@@ -455,6 +454,53 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) | |||
455 | notify_remote_via_irq(irq); | 454 | notify_remote_via_irq(irq); |
456 | } | 455 | } |
457 | 456 | ||
457 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id) | ||
458 | { | ||
459 | struct shared_info *sh = HYPERVISOR_shared_info; | ||
460 | int cpu = smp_processor_id(); | ||
461 | int i; | ||
462 | unsigned long flags; | ||
463 | static DEFINE_SPINLOCK(debug_lock); | ||
464 | |||
465 | spin_lock_irqsave(&debug_lock, flags); | ||
466 | |||
467 | printk("vcpu %d\n ", cpu); | ||
468 | |||
469 | for_each_online_cpu(i) { | ||
470 | struct vcpu_info *v = per_cpu(xen_vcpu, i); | ||
471 | printk("%d: masked=%d pending=%d event_sel %08lx\n ", i, | ||
472 | (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask, | ||
473 | v->evtchn_upcall_pending, | ||
474 | v->evtchn_pending_sel); | ||
475 | } | ||
476 | printk("pending:\n "); | ||
477 | for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) | ||
478 | printk("%08lx%s", sh->evtchn_pending[i], | ||
479 | i % 8 == 0 ? "\n " : " "); | ||
480 | printk("\nmasks:\n "); | ||
481 | for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | ||
482 | printk("%08lx%s", sh->evtchn_mask[i], | ||
483 | i % 8 == 0 ? "\n " : " "); | ||
484 | |||
485 | printk("\nunmasked:\n "); | ||
486 | for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | ||
487 | printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i], | ||
488 | i % 8 == 0 ? "\n " : " "); | ||
489 | |||
490 | printk("\npending list:\n"); | ||
491 | for(i = 0; i < NR_EVENT_CHANNELS; i++) { | ||
492 | if (sync_test_bit(i, sh->evtchn_pending)) { | ||
493 | printk(" %d: event %d -> irq %d\n", | ||
494 | cpu_evtchn[i], i, | ||
495 | evtchn_to_irq[i]); | ||
496 | } | ||
497 | } | ||
498 | |||
499 | spin_unlock_irqrestore(&debug_lock, flags); | ||
500 | |||
501 | return IRQ_HANDLED; | ||
502 | } | ||
503 | |||
458 | 504 | ||
459 | /* | 505 | /* |
460 | * Search the CPUs pending events bitmasks. For each one found, map | 506 | * Search the CPUs pending events bitmasks. For each one found, map |
@@ -470,29 +516,44 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) | |||
470 | int cpu = get_cpu(); | 516 | int cpu = get_cpu(); |
471 | struct shared_info *s = HYPERVISOR_shared_info; | 517 | struct shared_info *s = HYPERVISOR_shared_info; |
472 | struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); | 518 | struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); |
473 | unsigned long pending_words; | 519 | static DEFINE_PER_CPU(unsigned, nesting_count); |
520 | unsigned count; | ||
474 | 521 | ||
475 | vcpu_info->evtchn_upcall_pending = 0; | 522 | do { |
523 | unsigned long pending_words; | ||
476 | 524 | ||
477 | /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ | 525 | vcpu_info->evtchn_upcall_pending = 0; |
478 | pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); | ||
479 | while (pending_words != 0) { | ||
480 | unsigned long pending_bits; | ||
481 | int word_idx = __ffs(pending_words); | ||
482 | pending_words &= ~(1UL << word_idx); | ||
483 | 526 | ||
484 | while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { | 527 | if (__get_cpu_var(nesting_count)++) |
485 | int bit_idx = __ffs(pending_bits); | 528 | goto out; |
486 | int port = (word_idx * BITS_PER_LONG) + bit_idx; | ||
487 | int irq = evtchn_to_irq[port]; | ||
488 | 529 | ||
489 | if (irq != -1) { | 530 | #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ |
490 | regs->orig_ax = ~irq; | 531 | /* Clear master flag /before/ clearing selector flag. */ |
491 | do_IRQ(regs); | 532 | rmb(); |
533 | #endif | ||
534 | pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); | ||
535 | while (pending_words != 0) { | ||
536 | unsigned long pending_bits; | ||
537 | int word_idx = __ffs(pending_words); | ||
538 | pending_words &= ~(1UL << word_idx); | ||
539 | |||
540 | while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { | ||
541 | int bit_idx = __ffs(pending_bits); | ||
542 | int port = (word_idx * BITS_PER_LONG) + bit_idx; | ||
543 | int irq = evtchn_to_irq[port]; | ||
544 | |||
545 | if (irq != -1) | ||
546 | xen_do_IRQ(irq, regs); | ||
492 | } | 547 | } |
493 | } | 548 | } |
494 | } | ||
495 | 549 | ||
550 | BUG_ON(!irqs_disabled()); | ||
551 | |||
552 | count = __get_cpu_var(nesting_count); | ||
553 | __get_cpu_var(nesting_count) = 0; | ||
554 | } while(count != 1); | ||
555 | |||
556 | out: | ||
496 | put_cpu(); | 557 | put_cpu(); |
497 | } | 558 | } |
498 | 559 | ||
@@ -525,6 +586,22 @@ static void set_affinity_irq(unsigned irq, cpumask_t dest) | |||
525 | rebind_irq_to_cpu(irq, tcpu); | 586 | rebind_irq_to_cpu(irq, tcpu); |
526 | } | 587 | } |
527 | 588 | ||
589 | int resend_irq_on_evtchn(unsigned int irq) | ||
590 | { | ||
591 | int masked, evtchn = evtchn_from_irq(irq); | ||
592 | struct shared_info *s = HYPERVISOR_shared_info; | ||
593 | |||
594 | if (!VALID_EVTCHN(evtchn)) | ||
595 | return 1; | ||
596 | |||
597 | masked = sync_test_and_set_bit(evtchn, s->evtchn_mask); | ||
598 | sync_set_bit(evtchn, s->evtchn_pending); | ||
599 | if (!masked) | ||
600 | unmask_evtchn(evtchn); | ||
601 | |||
602 | return 1; | ||
603 | } | ||
604 | |||
528 | static void enable_dynirq(unsigned int irq) | 605 | static void enable_dynirq(unsigned int irq) |
529 | { | 606 | { |
530 | int evtchn = evtchn_from_irq(irq); | 607 | int evtchn = evtchn_from_irq(irq); |
@@ -554,10 +631,16 @@ static void ack_dynirq(unsigned int irq) | |||
554 | static int retrigger_dynirq(unsigned int irq) | 631 | static int retrigger_dynirq(unsigned int irq) |
555 | { | 632 | { |
556 | int evtchn = evtchn_from_irq(irq); | 633 | int evtchn = evtchn_from_irq(irq); |
634 | struct shared_info *sh = HYPERVISOR_shared_info; | ||
557 | int ret = 0; | 635 | int ret = 0; |
558 | 636 | ||
559 | if (VALID_EVTCHN(evtchn)) { | 637 | if (VALID_EVTCHN(evtchn)) { |
560 | set_evtchn(evtchn); | 638 | int masked; |
639 | |||
640 | masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask); | ||
641 | sync_set_bit(evtchn, sh->evtchn_pending); | ||
642 | if (!masked) | ||
643 | unmask_evtchn(evtchn); | ||
561 | ret = 1; | 644 | ret = 1; |
562 | } | 645 | } |
563 | 646 | ||
diff --git a/arch/x86/xen/features.c b/drivers/xen/features.c index 0707714e40d6..0707714e40d6 100644 --- a/arch/x86/xen/features.c +++ b/drivers/xen/features.c | |||
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index d85dc6d41c2a..52b6b41b909d 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -439,24 +439,6 @@ static inline unsigned int max_nr_grant_frames(void) | |||
439 | return xen_max; | 439 | return xen_max; |
440 | } | 440 | } |
441 | 441 | ||
442 | static int map_pte_fn(pte_t *pte, struct page *pmd_page, | ||
443 | unsigned long addr, void *data) | ||
444 | { | ||
445 | unsigned long **frames = (unsigned long **)data; | ||
446 | |||
447 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | ||
448 | (*frames)++; | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | ||
453 | unsigned long addr, void *data) | ||
454 | { | ||
455 | |||
456 | set_pte_at(&init_mm, addr, pte, __pte(0)); | ||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | 442 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) |
461 | { | 443 | { |
462 | struct gnttab_setup_table setup; | 444 | struct gnttab_setup_table setup; |
@@ -470,7 +452,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | |||
470 | 452 | ||
471 | setup.dom = DOMID_SELF; | 453 | setup.dom = DOMID_SELF; |
472 | setup.nr_frames = nr_gframes; | 454 | setup.nr_frames = nr_gframes; |
473 | setup.frame_list = frames; | 455 | set_xen_guest_handle(setup.frame_list, frames); |
474 | 456 | ||
475 | rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); | 457 | rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); |
476 | if (rc == -ENOSYS) { | 458 | if (rc == -ENOSYS) { |
@@ -480,17 +462,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | |||
480 | 462 | ||
481 | BUG_ON(rc || setup.status); | 463 | BUG_ON(rc || setup.status); |
482 | 464 | ||
483 | if (shared == NULL) { | 465 | rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(), |
484 | struct vm_struct *area; | 466 | &shared); |
485 | area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames()); | ||
486 | BUG_ON(area == NULL); | ||
487 | shared = area->addr; | ||
488 | } | ||
489 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | ||
490 | PAGE_SIZE * nr_gframes, | ||
491 | map_pte_fn, &frames); | ||
492 | BUG_ON(rc); | 467 | BUG_ON(rc); |
493 | frames -= nr_gframes; /* adjust after map_pte_fn() */ | ||
494 | 468 | ||
495 | kfree(frames); | 469 | kfree(frames); |
496 | 470 | ||
@@ -506,10 +480,7 @@ static int gnttab_resume(void) | |||
506 | 480 | ||
507 | static int gnttab_suspend(void) | 481 | static int gnttab_suspend(void) |
508 | { | 482 | { |
509 | apply_to_page_range(&init_mm, (unsigned long)shared, | 483 | arch_gnttab_unmap_shared(shared, nr_grant_frames); |
510 | PAGE_SIZE * nr_grant_frames, | ||
511 | unmap_pte_fn, NULL); | ||
512 | |||
513 | return 0; | 484 | return 0; |
514 | } | 485 | } |
515 | 486 | ||
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 9fd2f70ab46d..0f86b0ff7879 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c | |||
@@ -399,7 +399,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) | |||
399 | 399 | ||
400 | *vaddr = NULL; | 400 | *vaddr = NULL; |
401 | 401 | ||
402 | area = alloc_vm_area(PAGE_SIZE); | 402 | area = xen_alloc_vm_area(PAGE_SIZE); |
403 | if (!area) | 403 | if (!area) |
404 | return -ENOMEM; | 404 | return -ENOMEM; |
405 | 405 | ||
@@ -409,7 +409,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) | |||
409 | BUG(); | 409 | BUG(); |
410 | 410 | ||
411 | if (op.status != GNTST_okay) { | 411 | if (op.status != GNTST_okay) { |
412 | free_vm_area(area); | 412 | xen_free_vm_area(area); |
413 | xenbus_dev_fatal(dev, op.status, | 413 | xenbus_dev_fatal(dev, op.status, |
414 | "mapping in shared page %d from domain %d", | 414 | "mapping in shared page %d from domain %d", |
415 | gnt_ref, dev->otherend_id); | 415 | gnt_ref, dev->otherend_id); |
@@ -508,7 +508,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) | |||
508 | BUG(); | 508 | BUG(); |
509 | 509 | ||
510 | if (op.status == GNTST_okay) | 510 | if (op.status == GNTST_okay) |
511 | free_vm_area(area); | 511 | xen_free_vm_area(area); |
512 | else | 512 | else |
513 | xenbus_dev_error(dev, op.status, | 513 | xenbus_dev_error(dev, op.status, |
514 | "unmapping page at handle %d error %d", | 514 | "unmapping page at handle %d error %d", |
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 4750de316ad3..57ceb5346b74 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -88,6 +88,16 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv) | |||
88 | return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; | 88 | return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; |
89 | } | 89 | } |
90 | 90 | ||
91 | static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env) | ||
92 | { | ||
93 | struct xenbus_device *dev = to_xenbus_device(_dev); | ||
94 | |||
95 | if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) | ||
96 | return -ENOMEM; | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
91 | /* device/<type>/<id> => <type>-<id> */ | 101 | /* device/<type>/<id> => <type>-<id> */ |
92 | static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) | 102 | static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) |
93 | { | 103 | { |
@@ -166,6 +176,7 @@ static struct xen_bus_type xenbus_frontend = { | |||
166 | .bus = { | 176 | .bus = { |
167 | .name = "xen", | 177 | .name = "xen", |
168 | .match = xenbus_match, | 178 | .match = xenbus_match, |
179 | .uevent = xenbus_uevent, | ||
169 | .probe = xenbus_dev_probe, | 180 | .probe = xenbus_dev_probe, |
170 | .remove = xenbus_dev_remove, | 181 | .remove = xenbus_dev_remove, |
171 | .shutdown = xenbus_dev_shutdown, | 182 | .shutdown = xenbus_dev_shutdown, |
@@ -438,6 +449,12 @@ static ssize_t xendev_show_devtype(struct device *dev, | |||
438 | } | 449 | } |
439 | DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); | 450 | DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); |
440 | 451 | ||
452 | static ssize_t xendev_show_modalias(struct device *dev, | ||
453 | struct device_attribute *attr, char *buf) | ||
454 | { | ||
455 | return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); | ||
456 | } | ||
457 | DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); | ||
441 | 458 | ||
442 | int xenbus_probe_node(struct xen_bus_type *bus, | 459 | int xenbus_probe_node(struct xen_bus_type *bus, |
443 | const char *type, | 460 | const char *type, |
@@ -492,10 +509,16 @@ int xenbus_probe_node(struct xen_bus_type *bus, | |||
492 | 509 | ||
493 | err = device_create_file(&xendev->dev, &dev_attr_devtype); | 510 | err = device_create_file(&xendev->dev, &dev_attr_devtype); |
494 | if (err) | 511 | if (err) |
495 | goto fail_remove_file; | 512 | goto fail_remove_nodename; |
513 | |||
514 | err = device_create_file(&xendev->dev, &dev_attr_modalias); | ||
515 | if (err) | ||
516 | goto fail_remove_devtype; | ||
496 | 517 | ||
497 | return 0; | 518 | return 0; |
498 | fail_remove_file: | 519 | fail_remove_devtype: |
520 | device_remove_file(&xendev->dev, &dev_attr_devtype); | ||
521 | fail_remove_nodename: | ||
499 | device_remove_file(&xendev->dev, &dev_attr_nodename); | 522 | device_remove_file(&xendev->dev, &dev_attr_nodename); |
500 | fail_unregister: | 523 | fail_unregister: |
501 | device_unregister(&xendev->dev); | 524 | device_unregister(&xendev->dev); |
@@ -846,6 +869,7 @@ static int is_disconnected_device(struct device *dev, void *data) | |||
846 | { | 869 | { |
847 | struct xenbus_device *xendev = to_xenbus_device(dev); | 870 | struct xenbus_device *xendev = to_xenbus_device(dev); |
848 | struct device_driver *drv = data; | 871 | struct device_driver *drv = data; |
872 | struct xenbus_driver *xendrv; | ||
849 | 873 | ||
850 | /* | 874 | /* |
851 | * A device with no driver will never connect. We care only about | 875 | * A device with no driver will never connect. We care only about |
@@ -858,7 +882,9 @@ static int is_disconnected_device(struct device *dev, void *data) | |||
858 | if (drv && (dev->driver != drv)) | 882 | if (drv && (dev->driver != drv)) |
859 | return 0; | 883 | return 0; |
860 | 884 | ||
861 | return (xendev->state != XenbusStateConnected); | 885 | xendrv = to_xenbus_driver(dev->driver); |
886 | return (xendev->state != XenbusStateConnected || | ||
887 | (xendrv->is_ready && !xendrv->is_ready(xendev))); | ||
862 | } | 888 | } |
863 | 889 | ||
864 | static int exists_disconnected_device(struct device_driver *drv) | 890 | static int exists_disconnected_device(struct device_driver *drv) |
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c new file mode 100644 index 000000000000..797cb4e31f07 --- /dev/null +++ b/drivers/xen/xencomm.c | |||
@@ -0,0 +1,232 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
15 | * | ||
16 | * Copyright (C) IBM Corp. 2006 | ||
17 | * | ||
18 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
19 | */ | ||
20 | |||
21 | #include <linux/gfp.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <asm/page.h> | ||
24 | #include <xen/xencomm.h> | ||
25 | #include <xen/interface/xen.h> | ||
26 | #ifdef __ia64__ | ||
27 | #include <asm/xen/xencomm.h> /* for is_kern_addr() */ | ||
28 | #endif | ||
29 | |||
30 | #ifdef HAVE_XEN_PLATFORM_COMPAT_H | ||
31 | #include <xen/platform-compat.h> | ||
32 | #endif | ||
33 | |||
34 | static int xencomm_init(struct xencomm_desc *desc, | ||
35 | void *buffer, unsigned long bytes) | ||
36 | { | ||
37 | unsigned long recorded = 0; | ||
38 | int i = 0; | ||
39 | |||
40 | while ((recorded < bytes) && (i < desc->nr_addrs)) { | ||
41 | unsigned long vaddr = (unsigned long)buffer + recorded; | ||
42 | unsigned long paddr; | ||
43 | int offset; | ||
44 | int chunksz; | ||
45 | |||
46 | offset = vaddr % PAGE_SIZE; /* handle partial pages */ | ||
47 | chunksz = min(PAGE_SIZE - offset, bytes - recorded); | ||
48 | |||
49 | paddr = xencomm_vtop(vaddr); | ||
50 | if (paddr == ~0UL) { | ||
51 | printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n", | ||
52 | __func__, vaddr); | ||
53 | return -EINVAL; | ||
54 | } | ||
55 | |||
56 | desc->address[i++] = paddr; | ||
57 | recorded += chunksz; | ||
58 | } | ||
59 | |||
60 | if (recorded < bytes) { | ||
61 | printk(KERN_DEBUG | ||
62 | "%s: could only translate %ld of %ld bytes\n", | ||
63 | __func__, recorded, bytes); | ||
64 | return -ENOSPC; | ||
65 | } | ||
66 | |||
67 | /* mark remaining addresses invalid (just for safety) */ | ||
68 | while (i < desc->nr_addrs) | ||
69 | desc->address[i++] = XENCOMM_INVALID; | ||
70 | |||
71 | desc->magic = XENCOMM_MAGIC; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask, | ||
77 | void *buffer, unsigned long bytes) | ||
78 | { | ||
79 | struct xencomm_desc *desc; | ||
80 | unsigned long buffer_ulong = (unsigned long)buffer; | ||
81 | unsigned long start = buffer_ulong & PAGE_MASK; | ||
82 | unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK; | ||
83 | unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT; | ||
84 | unsigned long size = sizeof(*desc) + | ||
85 | sizeof(desc->address[0]) * nr_addrs; | ||
86 | |||
87 | /* | ||
88 | * slab allocator returns at least sizeof(void*) aligned pointer. | ||
89 | * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might | ||
90 | * cross page boundary. | ||
91 | */ | ||
92 | if (sizeof(*desc) > sizeof(void *)) { | ||
93 | unsigned long order = get_order(size); | ||
94 | desc = (struct xencomm_desc *)__get_free_pages(gfp_mask, | ||
95 | order); | ||
96 | if (desc == NULL) | ||
97 | return NULL; | ||
98 | |||
99 | desc->nr_addrs = | ||
100 | ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) / | ||
101 | sizeof(*desc->address); | ||
102 | } else { | ||
103 | desc = kmalloc(size, gfp_mask); | ||
104 | if (desc == NULL) | ||
105 | return NULL; | ||
106 | |||
107 | desc->nr_addrs = nr_addrs; | ||
108 | } | ||
109 | return desc; | ||
110 | } | ||
111 | |||
112 | void xencomm_free(struct xencomm_handle *desc) | ||
113 | { | ||
114 | if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) { | ||
115 | struct xencomm_desc *desc__ = (struct xencomm_desc *)desc; | ||
116 | if (sizeof(*desc__) > sizeof(void *)) { | ||
117 | unsigned long size = sizeof(*desc__) + | ||
118 | sizeof(desc__->address[0]) * desc__->nr_addrs; | ||
119 | unsigned long order = get_order(size); | ||
120 | free_pages((unsigned long)__va(desc), order); | ||
121 | } else | ||
122 | kfree(__va(desc)); | ||
123 | } | ||
124 | } | ||
125 | |||
126 | static int xencomm_create(void *buffer, unsigned long bytes, | ||
127 | struct xencomm_desc **ret, gfp_t gfp_mask) | ||
128 | { | ||
129 | struct xencomm_desc *desc; | ||
130 | int rc; | ||
131 | |||
132 | pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes); | ||
133 | |||
134 | if (bytes == 0) { | ||
135 | /* don't create a descriptor; Xen recognizes NULL. */ | ||
136 | BUG_ON(buffer != NULL); | ||
137 | *ret = NULL; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | BUG_ON(buffer == NULL); /* 'bytes' is non-zero */ | ||
142 | |||
143 | desc = xencomm_alloc(gfp_mask, buffer, bytes); | ||
144 | if (!desc) { | ||
145 | printk(KERN_DEBUG "%s failure\n", "xencomm_alloc"); | ||
146 | return -ENOMEM; | ||
147 | } | ||
148 | |||
149 | rc = xencomm_init(desc, buffer, bytes); | ||
150 | if (rc) { | ||
151 | printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc); | ||
152 | xencomm_free((struct xencomm_handle *)__pa(desc)); | ||
153 | return rc; | ||
154 | } | ||
155 | |||
156 | *ret = desc; | ||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | /* check if memory address is within VMALLOC region */ | ||
161 | static int is_phys_contiguous(unsigned long addr) | ||
162 | { | ||
163 | if (!is_kernel_addr(addr)) | ||
164 | return 0; | ||
165 | |||
166 | return (addr < VMALLOC_START) || (addr >= VMALLOC_END); | ||
167 | } | ||
168 | |||
169 | static struct xencomm_handle *xencomm_create_inline(void *ptr) | ||
170 | { | ||
171 | unsigned long paddr; | ||
172 | |||
173 | BUG_ON(!is_phys_contiguous((unsigned long)ptr)); | ||
174 | |||
175 | paddr = (unsigned long)xencomm_pa(ptr); | ||
176 | BUG_ON(paddr & XENCOMM_INLINE_FLAG); | ||
177 | return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG); | ||
178 | } | ||
179 | |||
180 | /* "mini" routine, for stack-based communications: */ | ||
181 | static int xencomm_create_mini(void *buffer, | ||
182 | unsigned long bytes, struct xencomm_mini *xc_desc, | ||
183 | struct xencomm_desc **ret) | ||
184 | { | ||
185 | int rc = 0; | ||
186 | struct xencomm_desc *desc; | ||
187 | BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0); | ||
188 | |||
189 | desc = (void *)xc_desc; | ||
190 | |||
191 | desc->nr_addrs = XENCOMM_MINI_ADDRS; | ||
192 | |||
193 | rc = xencomm_init(desc, buffer, bytes); | ||
194 | if (!rc) | ||
195 | *ret = desc; | ||
196 | |||
197 | return rc; | ||
198 | } | ||
199 | |||
200 | struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes) | ||
201 | { | ||
202 | int rc; | ||
203 | struct xencomm_desc *desc; | ||
204 | |||
205 | if (is_phys_contiguous((unsigned long)ptr)) | ||
206 | return xencomm_create_inline(ptr); | ||
207 | |||
208 | rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL); | ||
209 | |||
210 | if (rc || desc == NULL) | ||
211 | return NULL; | ||
212 | |||
213 | return xencomm_pa(desc); | ||
214 | } | ||
215 | |||
216 | struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes, | ||
217 | struct xencomm_mini *xc_desc) | ||
218 | { | ||
219 | int rc; | ||
220 | struct xencomm_desc *desc = NULL; | ||
221 | |||
222 | if (is_phys_contiguous((unsigned long)ptr)) | ||
223 | return xencomm_create_inline(ptr); | ||
224 | |||
225 | rc = xencomm_create_mini(ptr, bytes, xc_desc, | ||
226 | &desc); | ||
227 | |||
228 | if (rc) | ||
229 | return NULL; | ||
230 | |||
231 | return xencomm_pa(desc); | ||
232 | } | ||
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 3d419398499b..0f13b945e240 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h | |||
@@ -220,11 +220,13 @@ struct pv_mmu_ops { | |||
220 | unsigned long va); | 220 | unsigned long va); |
221 | 221 | ||
222 | /* Hooks for allocating/releasing pagetable pages */ | 222 | /* Hooks for allocating/releasing pagetable pages */ |
223 | void (*alloc_pt)(struct mm_struct *mm, u32 pfn); | 223 | void (*alloc_pte)(struct mm_struct *mm, u32 pfn); |
224 | void (*alloc_pd)(struct mm_struct *mm, u32 pfn); | 224 | void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); |
225 | void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); | 225 | void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); |
226 | void (*release_pt)(u32 pfn); | 226 | void (*alloc_pud)(struct mm_struct *mm, u32 pfn); |
227 | void (*release_pd)(u32 pfn); | 227 | void (*release_pte)(u32 pfn); |
228 | void (*release_pmd)(u32 pfn); | ||
229 | void (*release_pud)(u32 pfn); | ||
228 | 230 | ||
229 | /* Pagetable manipulation functions */ | 231 | /* Pagetable manipulation functions */ |
230 | void (*set_pte)(pte_t *ptep, pte_t pteval); | 232 | void (*set_pte)(pte_t *ptep, pte_t pteval); |
@@ -910,28 +912,37 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
910 | PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); | 912 | PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); |
911 | } | 913 | } |
912 | 914 | ||
913 | static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn) | 915 | static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) |
914 | { | 916 | { |
915 | PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn); | 917 | PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); |
916 | } | 918 | } |
917 | static inline void paravirt_release_pt(unsigned pfn) | 919 | static inline void paravirt_release_pte(unsigned pfn) |
918 | { | 920 | { |
919 | PVOP_VCALL1(pv_mmu_ops.release_pt, pfn); | 921 | PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); |
920 | } | 922 | } |
921 | 923 | ||
922 | static inline void paravirt_alloc_pd(struct mm_struct *mm, unsigned pfn) | 924 | static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn) |
923 | { | 925 | { |
924 | PVOP_VCALL2(pv_mmu_ops.alloc_pd, mm, pfn); | 926 | PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); |
925 | } | 927 | } |
926 | 928 | ||
927 | static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, | 929 | static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn, |
928 | unsigned start, unsigned count) | 930 | unsigned start, unsigned count) |
929 | { | 931 | { |
930 | PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count); | 932 | PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); |
931 | } | 933 | } |
932 | static inline void paravirt_release_pd(unsigned pfn) | 934 | static inline void paravirt_release_pmd(unsigned pfn) |
933 | { | 935 | { |
934 | PVOP_VCALL1(pv_mmu_ops.release_pd, pfn); | 936 | PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); |
937 | } | ||
938 | |||
939 | static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn) | ||
940 | { | ||
941 | PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); | ||
942 | } | ||
943 | static inline void paravirt_release_pud(unsigned pfn) | ||
944 | { | ||
945 | PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); | ||
935 | } | 946 | } |
936 | 947 | ||
937 | #ifdef CONFIG_HIGHPTE | 948 | #ifdef CONFIG_HIGHPTE |
diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h index 5886eed05886..91e4641f3f31 100644 --- a/include/asm-x86/pgalloc.h +++ b/include/asm-x86/pgalloc.h | |||
@@ -1,5 +1,110 @@ | |||
1 | #ifdef CONFIG_X86_32 | 1 | #ifndef _ASM_X86_PGALLOC_H |
2 | # include "pgalloc_32.h" | 2 | #define _ASM_X86_PGALLOC_H |
3 | |||
4 | #include <linux/threads.h> | ||
5 | #include <linux/mm.h> /* for struct page */ | ||
6 | #include <linux/pagemap.h> | ||
7 | |||
8 | #ifdef CONFIG_PARAVIRT | ||
9 | #include <asm/paravirt.h> | ||
3 | #else | 10 | #else |
4 | # include "pgalloc_64.h" | 11 | static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} |
12 | static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} | ||
13 | static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, | ||
14 | unsigned long start, unsigned long count) {} | ||
15 | static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {} | ||
16 | static inline void paravirt_release_pte(unsigned long pfn) {} | ||
17 | static inline void paravirt_release_pmd(unsigned long pfn) {} | ||
18 | static inline void paravirt_release_pud(unsigned long pfn) {} | ||
5 | #endif | 19 | #endif |
20 | |||
21 | /* | ||
22 | * Allocate and free page tables. | ||
23 | */ | ||
24 | extern pgd_t *pgd_alloc(struct mm_struct *); | ||
25 | extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); | ||
26 | |||
27 | extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); | ||
28 | extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long); | ||
29 | |||
30 | /* Should really implement gc for free page table pages. This could be | ||
31 | done with a reference count in struct page. */ | ||
32 | |||
33 | static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) | ||
34 | { | ||
35 | BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); | ||
36 | free_page((unsigned long)pte); | ||
37 | } | ||
38 | |||
39 | static inline void pte_free(struct mm_struct *mm, struct page *pte) | ||
40 | { | ||
41 | __free_page(pte); | ||
42 | } | ||
43 | |||
44 | extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte); | ||
45 | |||
46 | static inline void pmd_populate_kernel(struct mm_struct *mm, | ||
47 | pmd_t *pmd, pte_t *pte) | ||
48 | { | ||
49 | paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); | ||
50 | set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); | ||
51 | } | ||
52 | |||
53 | static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, | ||
54 | struct page *pte) | ||
55 | { | ||
56 | unsigned long pfn = page_to_pfn(pte); | ||
57 | |||
58 | paravirt_alloc_pte(mm, pfn); | ||
59 | set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); | ||
60 | } | ||
61 | |||
62 | #define pmd_pgtable(pmd) pmd_page(pmd) | ||
63 | |||
64 | #if PAGETABLE_LEVELS > 2 | ||
65 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | ||
66 | { | ||
67 | return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | ||
68 | } | ||
69 | |||
70 | static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | ||
71 | { | ||
72 | BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); | ||
73 | free_page((unsigned long)pmd); | ||
74 | } | ||
75 | |||
76 | extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); | ||
77 | |||
78 | #ifdef CONFIG_X86_PAE | ||
79 | extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); | ||
80 | #else /* !CONFIG_X86_PAE */ | ||
81 | static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | ||
82 | { | ||
83 | paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); | ||
84 | set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); | ||
85 | } | ||
86 | #endif /* CONFIG_X86_PAE */ | ||
87 | |||
88 | #if PAGETABLE_LEVELS > 3 | ||
89 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | ||
90 | { | ||
91 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); | ||
92 | set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud))); | ||
93 | } | ||
94 | |||
95 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) | ||
96 | { | ||
97 | return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | ||
98 | } | ||
99 | |||
100 | static inline void pud_free(struct mm_struct *mm, pud_t *pud) | ||
101 | { | ||
102 | BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); | ||
103 | free_page((unsigned long)pud); | ||
104 | } | ||
105 | |||
106 | extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); | ||
107 | #endif /* PAGETABLE_LEVELS > 3 */ | ||
108 | #endif /* PAGETABLE_LEVELS > 2 */ | ||
109 | |||
110 | #endif /* _ASM_X86_PGALLOC_H */ | ||
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h deleted file mode 100644 index 6bea6e5b5ee5..000000000000 --- a/include/asm-x86/pgalloc_32.h +++ /dev/null | |||
@@ -1,95 +0,0 @@ | |||
1 | #ifndef _I386_PGALLOC_H | ||
2 | #define _I386_PGALLOC_H | ||
3 | |||
4 | #include <linux/threads.h> | ||
5 | #include <linux/mm.h> /* for struct page */ | ||
6 | #include <linux/pagemap.h> | ||
7 | #include <asm/tlb.h> | ||
8 | #include <asm-generic/tlb.h> | ||
9 | |||
10 | #ifdef CONFIG_PARAVIRT | ||
11 | #include <asm/paravirt.h> | ||
12 | #else | ||
13 | #define paravirt_alloc_pt(mm, pfn) do { } while (0) | ||
14 | #define paravirt_alloc_pd(mm, pfn) do { } while (0) | ||
15 | #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) | ||
16 | #define paravirt_release_pt(pfn) do { } while (0) | ||
17 | #define paravirt_release_pd(pfn) do { } while (0) | ||
18 | #endif | ||
19 | |||
20 | static inline void pmd_populate_kernel(struct mm_struct *mm, | ||
21 | pmd_t *pmd, pte_t *pte) | ||
22 | { | ||
23 | paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); | ||
24 | set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); | ||
25 | } | ||
26 | |||
27 | static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) | ||
28 | { | ||
29 | unsigned long pfn = page_to_pfn(pte); | ||
30 | |||
31 | paravirt_alloc_pt(mm, pfn); | ||
32 | set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); | ||
33 | } | ||
34 | #define pmd_pgtable(pmd) pmd_page(pmd) | ||
35 | |||
36 | /* | ||
37 | * Allocate and free page tables. | ||
38 | */ | ||
39 | extern pgd_t *pgd_alloc(struct mm_struct *); | ||
40 | extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); | ||
41 | |||
42 | extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); | ||
43 | extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long); | ||
44 | |||
45 | static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) | ||
46 | { | ||
47 | free_page((unsigned long)pte); | ||
48 | } | ||
49 | |||
50 | static inline void pte_free(struct mm_struct *mm, pgtable_t pte) | ||
51 | { | ||
52 | pgtable_page_dtor(pte); | ||
53 | __free_page(pte); | ||
54 | } | ||
55 | |||
56 | |||
57 | extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte); | ||
58 | |||
59 | #ifdef CONFIG_X86_PAE | ||
60 | /* | ||
61 | * In the PAE case we free the pmds as part of the pgd. | ||
62 | */ | ||
63 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | ||
64 | { | ||
65 | return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | ||
66 | } | ||
67 | |||
68 | static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | ||
69 | { | ||
70 | BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); | ||
71 | free_page((unsigned long)pmd); | ||
72 | } | ||
73 | |||
74 | extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); | ||
75 | |||
76 | static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | ||
77 | { | ||
78 | paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT); | ||
79 | |||
80 | /* Note: almost everything apart from _PAGE_PRESENT is | ||
81 | reserved at the pmd (PDPT) level. */ | ||
82 | set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); | ||
83 | |||
84 | /* | ||
85 | * According to Intel App note "TLBs, Paging-Structure Caches, | ||
86 | * and Their Invalidation", April 2007, document 317080-001, | ||
87 | * section 8.1: in PAE mode we explicitly have to flush the | ||
88 | * TLB via cr3 if the top-level pgd is changed... | ||
89 | */ | ||
90 | if (mm == current->active_mm) | ||
91 | write_cr3(read_cr3()); | ||
92 | } | ||
93 | #endif /* CONFIG_X86_PAE */ | ||
94 | |||
95 | #endif /* _I386_PGALLOC_H */ | ||
diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h deleted file mode 100644 index 8d6722320dcc..000000000000 --- a/include/asm-x86/pgalloc_64.h +++ /dev/null | |||
@@ -1,133 +0,0 @@ | |||
1 | #ifndef _X86_64_PGALLOC_H | ||
2 | #define _X86_64_PGALLOC_H | ||
3 | |||
4 | #include <asm/pda.h> | ||
5 | #include <linux/threads.h> | ||
6 | #include <linux/mm.h> | ||
7 | |||
8 | #define pmd_populate_kernel(mm, pmd, pte) \ | ||
9 | set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) | ||
10 | #define pud_populate(mm, pud, pmd) \ | ||
11 | set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))) | ||
12 | #define pgd_populate(mm, pgd, pud) \ | ||
13 | set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud))) | ||
14 | |||
15 | #define pmd_pgtable(pmd) pmd_page(pmd) | ||
16 | |||
17 | static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) | ||
18 | { | ||
19 | set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); | ||
20 | } | ||
21 | |||
22 | static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | ||
23 | { | ||
24 | BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); | ||
25 | free_page((unsigned long)pmd); | ||
26 | } | ||
27 | |||
28 | static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr) | ||
29 | { | ||
30 | return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | ||
31 | } | ||
32 | |||
33 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) | ||
34 | { | ||
35 | return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | ||
36 | } | ||
37 | |||
38 | static inline void pud_free(struct mm_struct *mm, pud_t *pud) | ||
39 | { | ||
40 | BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); | ||
41 | free_page((unsigned long)pud); | ||
42 | } | ||
43 | |||
44 | static inline void pgd_list_add(pgd_t *pgd) | ||
45 | { | ||
46 | struct page *page = virt_to_page(pgd); | ||
47 | unsigned long flags; | ||
48 | |||
49 | spin_lock_irqsave(&pgd_lock, flags); | ||
50 | list_add(&page->lru, &pgd_list); | ||
51 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
52 | } | ||
53 | |||
54 | static inline void pgd_list_del(pgd_t *pgd) | ||
55 | { | ||
56 | struct page *page = virt_to_page(pgd); | ||
57 | unsigned long flags; | ||
58 | |||
59 | spin_lock_irqsave(&pgd_lock, flags); | ||
60 | list_del(&page->lru); | ||
61 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
62 | } | ||
63 | |||
64 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) | ||
65 | { | ||
66 | unsigned boundary; | ||
67 | pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); | ||
68 | if (!pgd) | ||
69 | return NULL; | ||
70 | pgd_list_add(pgd); | ||
71 | /* | ||
72 | * Copy kernel pointers in from init. | ||
73 | * Could keep a freelist or slab cache of those because the kernel | ||
74 | * part never changes. | ||
75 | */ | ||
76 | boundary = pgd_index(__PAGE_OFFSET); | ||
77 | memset(pgd, 0, boundary * sizeof(pgd_t)); | ||
78 | memcpy(pgd + boundary, | ||
79 | init_level4_pgt + boundary, | ||
80 | (PTRS_PER_PGD - boundary) * sizeof(pgd_t)); | ||
81 | return pgd; | ||
82 | } | ||
83 | |||
84 | static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
85 | { | ||
86 | BUG_ON((unsigned long)pgd & (PAGE_SIZE-1)); | ||
87 | pgd_list_del(pgd); | ||
88 | free_page((unsigned long)pgd); | ||
89 | } | ||
90 | |||
91 | static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | ||
92 | { | ||
93 | return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | ||
94 | } | ||
95 | |||
96 | static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | ||
97 | { | ||
98 | struct page *page; | ||
99 | void *p; | ||
100 | |||
101 | p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | ||
102 | if (!p) | ||
103 | return NULL; | ||
104 | page = virt_to_page(p); | ||
105 | pgtable_page_ctor(page); | ||
106 | return page; | ||
107 | } | ||
108 | |||
109 | /* Should really implement gc for free page table pages. This could be | ||
110 | done with a reference count in struct page. */ | ||
111 | |||
112 | static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) | ||
113 | { | ||
114 | BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); | ||
115 | free_page((unsigned long)pte); | ||
116 | } | ||
117 | |||
118 | static inline void pte_free(struct mm_struct *mm, pgtable_t pte) | ||
119 | { | ||
120 | pgtable_page_dtor(pte); | ||
121 | __free_page(pte); | ||
122 | } | ||
123 | |||
124 | #define __pte_free_tlb(tlb,pte) \ | ||
125 | do { \ | ||
126 | pgtable_page_dtor((pte)); \ | ||
127 | tlb_remove_page((tlb), (pte)); \ | ||
128 | } while (0) | ||
129 | |||
130 | #define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) | ||
131 | #define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) | ||
132 | |||
133 | #endif /* _X86_64_PGALLOC_H */ | ||
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index f1d9f4a03f6f..b8a08bd7bd48 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_PGTABLE_H | 1 | #ifndef _ASM_X86_PGTABLE_H |
2 | #define _ASM_X86_PGTABLE_H | 2 | #define _ASM_X86_PGTABLE_H |
3 | 3 | ||
4 | #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) | ||
5 | #define FIRST_USER_ADDRESS 0 | 4 | #define FIRST_USER_ADDRESS 0 |
6 | 5 | ||
7 | #define _PAGE_BIT_PRESENT 0 /* is present */ | 6 | #define _PAGE_BIT_PRESENT 0 /* is present */ |
@@ -330,6 +329,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |||
330 | # include "pgtable_64.h" | 329 | # include "pgtable_64.h" |
331 | #endif | 330 | #endif |
332 | 331 | ||
332 | #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) | ||
333 | #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) | ||
334 | |||
333 | #ifndef __ASSEMBLY__ | 335 | #ifndef __ASSEMBLY__ |
334 | 336 | ||
335 | enum { | 337 | enum { |
@@ -389,37 +391,17 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
389 | * bit at the same time. | 391 | * bit at the same time. |
390 | */ | 392 | */ |
391 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | 393 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
392 | #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ | 394 | extern int ptep_set_access_flags(struct vm_area_struct *vma, |
393 | ({ \ | 395 | unsigned long address, pte_t *ptep, |
394 | int __changed = !pte_same(*(ptep), entry); \ | 396 | pte_t entry, int dirty); |
395 | if (__changed && dirty) { \ | ||
396 | *ptep = entry; \ | ||
397 | pte_update_defer((vma)->vm_mm, (address), (ptep)); \ | ||
398 | flush_tlb_page(vma, address); \ | ||
399 | } \ | ||
400 | __changed; \ | ||
401 | }) | ||
402 | 397 | ||
403 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | 398 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
404 | #define ptep_test_and_clear_young(vma, addr, ptep) ({ \ | 399 | extern int ptep_test_and_clear_young(struct vm_area_struct *vma, |
405 | int __ret = 0; \ | 400 | unsigned long addr, pte_t *ptep); |
406 | if (pte_young(*(ptep))) \ | ||
407 | __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ | ||
408 | &(ptep)->pte); \ | ||
409 | if (__ret) \ | ||
410 | pte_update((vma)->vm_mm, addr, ptep); \ | ||
411 | __ret; \ | ||
412 | }) | ||
413 | 401 | ||
414 | #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH | 402 | #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH |
415 | #define ptep_clear_flush_young(vma, address, ptep) \ | 403 | extern int ptep_clear_flush_young(struct vm_area_struct *vma, |
416 | ({ \ | 404 | unsigned long address, pte_t *ptep); |
417 | int __young; \ | ||
418 | __young = ptep_test_and_clear_young((vma), (address), (ptep)); \ | ||
419 | if (__young) \ | ||
420 | flush_tlb_page(vma, address); \ | ||
421 | __young; \ | ||
422 | }) | ||
423 | 405 | ||
424 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR | 406 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
425 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | 407 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
@@ -456,6 +438,22 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, | |||
456 | pte_update(mm, addr, ptep); | 438 | pte_update(mm, addr, ptep); |
457 | } | 439 | } |
458 | 440 | ||
441 | /* | ||
442 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); | ||
443 | * | ||
444 | * dst - pointer to pgd range anwhere on a pgd page | ||
445 | * src - "" | ||
446 | * count - the number of pgds to copy. | ||
447 | * | ||
448 | * dst and src can be on the same page, but the range must not overlap, | ||
449 | * and must not cross a page boundary. | ||
450 | */ | ||
451 | static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) | ||
452 | { | ||
453 | memcpy(dst, src, count * sizeof(pgd_t)); | ||
454 | } | ||
455 | |||
456 | |||
459 | #include <asm-generic/pgtable.h> | 457 | #include <asm-generic/pgtable.h> |
460 | #endif /* __ASSEMBLY__ */ | 458 | #endif /* __ASSEMBLY__ */ |
461 | 459 | ||
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index c4a643674458..168b6447cf18 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h | |||
@@ -48,9 +48,6 @@ void paging_init(void); | |||
48 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) | 48 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) |
49 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | 49 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) |
50 | 50 | ||
51 | #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) | ||
52 | #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) | ||
53 | |||
54 | /* Just any arbitrary offset to the start of the vmalloc VM area: the | 51 | /* Just any arbitrary offset to the start of the vmalloc VM area: the |
55 | * current 8MB value just means that there will be a 8MB "hole" after the | 52 | * current 8MB value just means that there will be a 8MB "hole" after the |
56 | * physical memory until the kernel virtual memory starts. That means that | 53 | * physical memory until the kernel virtual memory starts. That means that |
@@ -109,21 +106,6 @@ extern int pmd_bad(pmd_t pmd); | |||
109 | #endif | 106 | #endif |
110 | 107 | ||
111 | /* | 108 | /* |
112 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); | ||
113 | * | ||
114 | * dst - pointer to pgd range anwhere on a pgd page | ||
115 | * src - "" | ||
116 | * count - the number of pgds to copy. | ||
117 | * | ||
118 | * dst and src can be on the same page, but the range must not overlap, | ||
119 | * and must not cross a page boundary. | ||
120 | */ | ||
121 | static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) | ||
122 | { | ||
123 | memcpy(dst, src, count * sizeof(pgd_t)); | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Macro to mark a page protection value as "uncacheable". | 109 | * Macro to mark a page protection value as "uncacheable". |
128 | * On processors which do not support it, this is a no-op. | 110 | * On processors which do not support it, this is a no-op. |
129 | */ | 111 | */ |
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 9fd87d0b6477..a3bbf8766c1d 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h | |||
@@ -24,7 +24,7 @@ extern void paging_init(void); | |||
24 | 24 | ||
25 | #endif /* !__ASSEMBLY__ */ | 25 | #endif /* !__ASSEMBLY__ */ |
26 | 26 | ||
27 | #define SHARED_KERNEL_PMD 1 | 27 | #define SHARED_KERNEL_PMD 0 |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * PGDIR_SHIFT determines what a top-level page table entry can map | 30 | * PGDIR_SHIFT determines what a top-level page table entry can map |
diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h new file mode 100644 index 000000000000..596312a7bfc9 --- /dev/null +++ b/include/asm-x86/xen/events.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef __XEN_EVENTS_H | ||
2 | #define __XEN_EVENTS_H | ||
3 | |||
4 | enum ipi_vector { | ||
5 | XEN_RESCHEDULE_VECTOR, | ||
6 | XEN_CALL_FUNCTION_VECTOR, | ||
7 | |||
8 | XEN_NR_IPIS, | ||
9 | }; | ||
10 | |||
11 | static inline int xen_irqs_disabled(struct pt_regs *regs) | ||
12 | { | ||
13 | return raw_irqs_disabled_flags(regs->flags); | ||
14 | } | ||
15 | |||
16 | static inline void xen_do_IRQ(int irq, struct pt_regs *regs) | ||
17 | { | ||
18 | regs->orig_ax = ~irq; | ||
19 | do_IRQ(regs); | ||
20 | } | ||
21 | |||
22 | #endif /* __XEN_EVENTS_H */ | ||
diff --git a/include/asm-x86/xen/grant_table.h b/include/asm-x86/xen/grant_table.h new file mode 100644 index 000000000000..2444d4593a3b --- /dev/null +++ b/include/asm-x86/xen/grant_table.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef __XEN_GRANT_TABLE_H | ||
2 | #define __XEN_GRANT_TABLE_H | ||
3 | |||
4 | #define xen_alloc_vm_area(size) alloc_vm_area(size) | ||
5 | #define xen_free_vm_area(area) free_vm_area(area) | ||
6 | |||
7 | #endif /* __XEN_GRANT_TABLE_H */ | ||
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index bc0ee7d961ca..c2ccd997ed35 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h | |||
@@ -164,6 +164,12 @@ HYPERVISOR_set_callbacks(unsigned long event_selector, | |||
164 | } | 164 | } |
165 | 165 | ||
166 | static inline int | 166 | static inline int |
167 | HYPERVISOR_callback_op(int cmd, void *arg) | ||
168 | { | ||
169 | return _hypercall2(int, callback_op, cmd, arg); | ||
170 | } | ||
171 | |||
172 | static inline int | ||
167 | HYPERVISOR_fpu_taskswitch(int set) | 173 | HYPERVISOR_fpu_taskswitch(int set) |
168 | { | 174 | { |
169 | return _hypercall1(int, fpu_taskswitch, set); | 175 | return _hypercall1(int, fpu_taskswitch, set); |
diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h index 165c3968e138..6227000a1e84 100644 --- a/include/asm-x86/xen/interface.h +++ b/include/asm-x86/xen/interface.h | |||
@@ -22,6 +22,30 @@ | |||
22 | #define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) | 22 | #define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) |
23 | #define GUEST_HANDLE(name) __guest_handle_ ## name | 23 | #define GUEST_HANDLE(name) __guest_handle_ ## name |
24 | 24 | ||
25 | #ifdef __XEN__ | ||
26 | #if defined(__i386__) | ||
27 | #define set_xen_guest_handle(hnd, val) \ | ||
28 | do { \ | ||
29 | if (sizeof(hnd) == 8) \ | ||
30 | *(uint64_t *)&(hnd) = 0; \ | ||
31 | (hnd).p = val; \ | ||
32 | } while (0) | ||
33 | #elif defined(__x86_64__) | ||
34 | #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) | ||
35 | #endif | ||
36 | #else | ||
37 | #if defined(__i386__) | ||
38 | #define set_xen_guest_handle(hnd, val) \ | ||
39 | do { \ | ||
40 | if (sizeof(hnd) == 8) \ | ||
41 | *(uint64_t *)&(hnd) = 0; \ | ||
42 | (hnd) = val; \ | ||
43 | } while (0) | ||
44 | #elif defined(__x86_64__) | ||
45 | #define set_xen_guest_handle(hnd, val) do { (hnd) = val; } while (0) | ||
46 | #endif | ||
47 | #endif | ||
48 | |||
25 | #ifndef __ASSEMBLY__ | 49 | #ifndef __ASSEMBLY__ |
26 | /* Guest handles for primitive C types. */ | 50 | /* Guest handles for primitive C types. */ |
27 | __DEFINE_GUEST_HANDLE(uchar, unsigned char); | 51 | __DEFINE_GUEST_HANDLE(uchar, unsigned char); |
@@ -171,6 +195,10 @@ struct arch_vcpu_info { | |||
171 | unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ | 195 | unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ |
172 | }; | 196 | }; |
173 | 197 | ||
198 | struct xen_callback { | ||
199 | unsigned long cs; | ||
200 | unsigned long eip; | ||
201 | }; | ||
174 | #endif /* !__ASSEMBLY__ */ | 202 | #endif /* !__ASSEMBLY__ */ |
175 | 203 | ||
176 | /* | 204 | /* |
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h new file mode 100644 index 000000000000..01799305f02a --- /dev/null +++ b/include/asm-x86/xen/page.h | |||
@@ -0,0 +1,168 @@ | |||
1 | #ifndef __XEN_PAGE_H | ||
2 | #define __XEN_PAGE_H | ||
3 | |||
4 | #include <linux/pfn.h> | ||
5 | |||
6 | #include <asm/uaccess.h> | ||
7 | #include <asm/pgtable.h> | ||
8 | |||
9 | #include <xen/features.h> | ||
10 | |||
11 | /* Xen machine address */ | ||
12 | typedef struct xmaddr { | ||
13 | phys_addr_t maddr; | ||
14 | } xmaddr_t; | ||
15 | |||
16 | /* Xen pseudo-physical address */ | ||
17 | typedef struct xpaddr { | ||
18 | phys_addr_t paddr; | ||
19 | } xpaddr_t; | ||
20 | |||
21 | #define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) | ||
22 | #define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) | ||
23 | |||
24 | /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ | ||
25 | #define INVALID_P2M_ENTRY (~0UL) | ||
26 | #define FOREIGN_FRAME_BIT (1UL<<31) | ||
27 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) | ||
28 | |||
29 | extern unsigned long *phys_to_machine_mapping; | ||
30 | |||
31 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | ||
32 | { | ||
33 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
34 | return pfn; | ||
35 | |||
36 | return phys_to_machine_mapping[(unsigned int)(pfn)] & | ||
37 | ~FOREIGN_FRAME_BIT; | ||
38 | } | ||
39 | |||
40 | static inline int phys_to_machine_mapping_valid(unsigned long pfn) | ||
41 | { | ||
42 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
43 | return 1; | ||
44 | |||
45 | return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); | ||
46 | } | ||
47 | |||
48 | static inline unsigned long mfn_to_pfn(unsigned long mfn) | ||
49 | { | ||
50 | unsigned long pfn; | ||
51 | |||
52 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
53 | return mfn; | ||
54 | |||
55 | #if 0 | ||
56 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | ||
57 | return max_mapnr; | ||
58 | #endif | ||
59 | |||
60 | pfn = 0; | ||
61 | /* | ||
62 | * The array access can fail (e.g., device space beyond end of RAM). | ||
63 | * In such cases it doesn't matter what we return (we return garbage), | ||
64 | * but we must handle the fault without crashing! | ||
65 | */ | ||
66 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | ||
67 | |||
68 | return pfn; | ||
69 | } | ||
70 | |||
71 | static inline xmaddr_t phys_to_machine(xpaddr_t phys) | ||
72 | { | ||
73 | unsigned offset = phys.paddr & ~PAGE_MASK; | ||
74 | return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); | ||
75 | } | ||
76 | |||
77 | static inline xpaddr_t machine_to_phys(xmaddr_t machine) | ||
78 | { | ||
79 | unsigned offset = machine.maddr & ~PAGE_MASK; | ||
80 | return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * We detect special mappings in one of two ways: | ||
85 | * 1. If the MFN is an I/O page then Xen will set the m2p entry | ||
86 | * to be outside our maximum possible pseudophys range. | ||
87 | * 2. If the MFN belongs to a different domain then we will certainly | ||
88 | * not have MFN in our p2m table. Conversely, if the page is ours, | ||
89 | * then we'll have p2m(m2p(MFN))==MFN. | ||
90 | * If we detect a special mapping then it doesn't have a 'struct page'. | ||
91 | * We force !pfn_valid() by returning an out-of-range pointer. | ||
92 | * | ||
93 | * NB. These checks require that, for any MFN that is not in our reservation, | ||
94 | * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if | ||
95 | * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. | ||
96 | * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. | ||
97 | * | ||
98 | * NB2. When deliberately mapping foreign pages into the p2m table, you *must* | ||
99 | * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we | ||
100 | * require. In all the cases we care about, the FOREIGN_FRAME bit is | ||
101 | * masked (e.g., pfn_to_mfn()) so behaviour there is correct. | ||
102 | */ | ||
103 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | ||
104 | { | ||
105 | extern unsigned long max_mapnr; | ||
106 | unsigned long pfn = mfn_to_pfn(mfn); | ||
107 | if ((pfn < max_mapnr) | ||
108 | && !xen_feature(XENFEAT_auto_translated_physmap) | ||
109 | && (phys_to_machine_mapping[pfn] != mfn)) | ||
110 | return max_mapnr; /* force !pfn_valid() */ | ||
111 | return pfn; | ||
112 | } | ||
113 | |||
114 | static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
115 | { | ||
116 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
117 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
118 | return; | ||
119 | } | ||
120 | phys_to_machine_mapping[pfn] = mfn; | ||
121 | } | ||
122 | |||
123 | /* VIRT <-> MACHINE conversion */ | ||
124 | #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) | ||
125 | #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) | ||
126 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) | ||
127 | |||
128 | static inline unsigned long pte_mfn(pte_t pte) | ||
129 | { | ||
130 | return (pte.pte & ~_PAGE_NX) >> PAGE_SHIFT; | ||
131 | } | ||
132 | |||
133 | static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) | ||
134 | { | ||
135 | pte_t pte; | ||
136 | |||
137 | pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | | ||
138 | (pgprot_val(pgprot) & __supported_pte_mask); | ||
139 | |||
140 | return pte; | ||
141 | } | ||
142 | |||
143 | static inline pteval_t pte_val_ma(pte_t pte) | ||
144 | { | ||
145 | return pte.pte; | ||
146 | } | ||
147 | |||
148 | static inline pte_t __pte_ma(pteval_t x) | ||
149 | { | ||
150 | return (pte_t) { .pte = x }; | ||
151 | } | ||
152 | |||
153 | #ifdef CONFIG_X86_PAE | ||
154 | #define pmd_val_ma(v) ((v).pmd) | ||
155 | #define pud_val_ma(v) ((v).pgd.pgd) | ||
156 | #define __pmd_ma(x) ((pmd_t) { (x) } ) | ||
157 | #else /* !X86_PAE */ | ||
158 | #define pmd_val_ma(v) ((v).pud.pgd.pgd) | ||
159 | #endif /* CONFIG_X86_PAE */ | ||
160 | |||
161 | #define pgd_val_ma(x) ((x).pgd) | ||
162 | |||
163 | |||
164 | xmaddr_t arbitrary_virt_to_machine(unsigned long address); | ||
165 | void make_lowmem_page_readonly(void *vaddr); | ||
166 | void make_lowmem_page_readwrite(void *vaddr); | ||
167 | |||
168 | #endif /* __XEN_PAGE_H */ | ||
diff --git a/include/xen/balloon.h b/include/xen/balloon.h new file mode 100644 index 000000000000..fe43b0f3c86a --- /dev/null +++ b/include/xen/balloon.h | |||
@@ -0,0 +1,61 @@ | |||
1 | /****************************************************************************** | ||
2 | * balloon.h | ||
3 | * | ||
4 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | ||
5 | * | ||
6 | * Copyright (c) 2003, B Dragovic | ||
7 | * Copyright (c) 2003-2004, M Williamson, K Fraser | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #ifndef __XEN_BALLOON_H__ | ||
35 | #define __XEN_BALLOON_H__ | ||
36 | |||
37 | #include <linux/spinlock.h> | ||
38 | |||
39 | #if 0 | ||
40 | /* | ||
41 | * Inform the balloon driver that it should allow some slop for device-driver | ||
42 | * memory activities. | ||
43 | */ | ||
44 | void balloon_update_driver_allowance(long delta); | ||
45 | |||
46 | /* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */ | ||
47 | struct page **alloc_empty_pages_and_pagevec(int nr_pages); | ||
48 | void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages); | ||
49 | |||
50 | void balloon_release_driver_page(struct page *page); | ||
51 | |||
52 | /* | ||
53 | * Prevent the balloon driver from changing the memory reservation during | ||
54 | * a driver critical region. | ||
55 | */ | ||
56 | extern spinlock_t balloon_lock; | ||
57 | #define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags) | ||
58 | #define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags) | ||
59 | #endif | ||
60 | |||
61 | #endif /* __XEN_BALLOON_H__ */ | ||
diff --git a/include/xen/events.h b/include/xen/events.h index 2bde54d29be5..acd8e062c85f 100644 --- a/include/xen/events.h +++ b/include/xen/events.h | |||
@@ -5,13 +5,7 @@ | |||
5 | 5 | ||
6 | #include <xen/interface/event_channel.h> | 6 | #include <xen/interface/event_channel.h> |
7 | #include <asm/xen/hypercall.h> | 7 | #include <asm/xen/hypercall.h> |
8 | 8 | #include <asm/xen/events.h> | |
9 | enum ipi_vector { | ||
10 | XEN_RESCHEDULE_VECTOR, | ||
11 | XEN_CALL_FUNCTION_VECTOR, | ||
12 | |||
13 | XEN_NR_IPIS, | ||
14 | }; | ||
15 | 9 | ||
16 | int bind_evtchn_to_irq(unsigned int evtchn); | 10 | int bind_evtchn_to_irq(unsigned int evtchn); |
17 | int bind_evtchn_to_irqhandler(unsigned int evtchn, | 11 | int bind_evtchn_to_irqhandler(unsigned int evtchn, |
@@ -37,6 +31,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, | |||
37 | void unbind_from_irqhandler(unsigned int irq, void *dev_id); | 31 | void unbind_from_irqhandler(unsigned int irq, void *dev_id); |
38 | 32 | ||
39 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); | 33 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); |
34 | int resend_irq_on_evtchn(unsigned int irq); | ||
40 | 35 | ||
41 | static inline void notify_remote_via_evtchn(int port) | 36 | static inline void notify_remote_via_evtchn(int port) |
42 | { | 37 | { |
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 761c83498e03..466204846121 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h | |||
@@ -39,6 +39,7 @@ | |||
39 | 39 | ||
40 | #include <asm/xen/hypervisor.h> | 40 | #include <asm/xen/hypervisor.h> |
41 | #include <xen/interface/grant_table.h> | 41 | #include <xen/interface/grant_table.h> |
42 | #include <asm/xen/grant_table.h> | ||
42 | 43 | ||
43 | /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ | 44 | /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ |
44 | #define NR_GRANT_FRAMES 4 | 45 | #define NR_GRANT_FRAMES 4 |
@@ -102,6 +103,12 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, | |||
102 | void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, | 103 | void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, |
103 | unsigned long pfn); | 104 | unsigned long pfn); |
104 | 105 | ||
106 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | ||
107 | unsigned long max_nr_gframes, | ||
108 | struct grant_entry **__shared); | ||
109 | void arch_gnttab_unmap_shared(struct grant_entry *shared, | ||
110 | unsigned long nr_gframes); | ||
111 | |||
105 | #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) | 112 | #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) |
106 | 113 | ||
107 | #endif /* __ASM_GNTTAB_H__ */ | 114 | #endif /* __ASM_GNTTAB_H__ */ |
diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h new file mode 100644 index 000000000000..4aadcba31af9 --- /dev/null +++ b/include/xen/interface/callback.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /****************************************************************************** | ||
2 | * callback.h | ||
3 | * | ||
4 | * Register guest OS callbacks with Xen. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
7 | * of this software and associated documentation files (the "Software"), to | ||
8 | * deal in the Software without restriction, including without limitation the | ||
9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
10 | * sell copies of the Software, and to permit persons to whom the Software is | ||
11 | * furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | * | ||
24 | * Copyright (c) 2006, Ian Campbell | ||
25 | */ | ||
26 | |||
27 | #ifndef __XEN_PUBLIC_CALLBACK_H__ | ||
28 | #define __XEN_PUBLIC_CALLBACK_H__ | ||
29 | |||
30 | #include "xen.h" | ||
31 | |||
32 | /* | ||
33 | * Prototype for this hypercall is: | ||
34 | * long callback_op(int cmd, void *extra_args) | ||
35 | * @cmd == CALLBACKOP_??? (callback operation). | ||
36 | * @extra_args == Operation-specific extra arguments (NULL if none). | ||
37 | */ | ||
38 | |||
39 | /* ia64, x86: Callback for event delivery. */ | ||
40 | #define CALLBACKTYPE_event 0 | ||
41 | |||
42 | /* x86: Failsafe callback when guest state cannot be restored by Xen. */ | ||
43 | #define CALLBACKTYPE_failsafe 1 | ||
44 | |||
45 | /* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */ | ||
46 | #define CALLBACKTYPE_syscall 2 | ||
47 | |||
48 | /* | ||
49 | * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel | ||
50 | * feature is enabled. Do not use this callback type in new code. | ||
51 | */ | ||
52 | #define CALLBACKTYPE_sysenter_deprecated 3 | ||
53 | |||
54 | /* x86: Callback for NMI delivery. */ | ||
55 | #define CALLBACKTYPE_nmi 4 | ||
56 | |||
57 | /* | ||
58 | * x86: sysenter is only available as follows: | ||
59 | * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled | ||
60 | * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs | ||
61 | * ('32-on-32-on-64', '32-on-64-on-64') | ||
62 | * [nb. also 64-bit guest applications on Intel CPUs | ||
63 | * ('64-on-64-on-64'), but syscall is preferred] | ||
64 | */ | ||
65 | #define CALLBACKTYPE_sysenter 5 | ||
66 | |||
67 | /* | ||
68 | * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs | ||
69 | * ('32-on-32-on-64', '32-on-64-on-64') | ||
70 | */ | ||
71 | #define CALLBACKTYPE_syscall32 7 | ||
72 | |||
73 | /* | ||
74 | * Disable event deliver during callback? This flag is ignored for event and | ||
75 | * NMI callbacks: event delivery is unconditionally disabled. | ||
76 | */ | ||
77 | #define _CALLBACKF_mask_events 0 | ||
78 | #define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) | ||
79 | |||
80 | /* | ||
81 | * Register a callback. | ||
82 | */ | ||
83 | #define CALLBACKOP_register 0 | ||
84 | struct callback_register { | ||
85 | uint16_t type; | ||
86 | uint16_t flags; | ||
87 | struct xen_callback address; | ||
88 | }; | ||
89 | |||
90 | /* | ||
91 | * Unregister a callback. | ||
92 | * | ||
93 | * Not all callbacks can be unregistered. -EINVAL will be returned if | ||
94 | * you attempt to unregister such a callback. | ||
95 | */ | ||
96 | #define CALLBACKOP_unregister 1 | ||
97 | struct callback_unregister { | ||
98 | uint16_t type; | ||
99 | uint16_t _unused; | ||
100 | }; | ||
101 | |||
102 | #endif /* __XEN_PUBLIC_CALLBACK_H__ */ | ||
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index 219049802cf2..39da93c21de0 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h | |||
@@ -185,6 +185,7 @@ struct gnttab_map_grant_ref { | |||
185 | grant_handle_t handle; | 185 | grant_handle_t handle; |
186 | uint64_t dev_bus_addr; | 186 | uint64_t dev_bus_addr; |
187 | }; | 187 | }; |
188 | DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); | ||
188 | 189 | ||
189 | /* | 190 | /* |
190 | * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings | 191 | * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings |
@@ -206,6 +207,7 @@ struct gnttab_unmap_grant_ref { | |||
206 | /* OUT parameters. */ | 207 | /* OUT parameters. */ |
207 | int16_t status; /* GNTST_* */ | 208 | int16_t status; /* GNTST_* */ |
208 | }; | 209 | }; |
210 | DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); | ||
209 | 211 | ||
210 | /* | 212 | /* |
211 | * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least | 213 | * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least |
@@ -223,8 +225,9 @@ struct gnttab_setup_table { | |||
223 | uint32_t nr_frames; | 225 | uint32_t nr_frames; |
224 | /* OUT parameters. */ | 226 | /* OUT parameters. */ |
225 | int16_t status; /* GNTST_* */ | 227 | int16_t status; /* GNTST_* */ |
226 | ulong *frame_list; | 228 | GUEST_HANDLE(ulong) frame_list; |
227 | }; | 229 | }; |
230 | DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); | ||
228 | 231 | ||
229 | /* | 232 | /* |
230 | * GNTTABOP_dump_table: Dump the contents of the grant table to the | 233 | * GNTTABOP_dump_table: Dump the contents of the grant table to the |
@@ -237,6 +240,7 @@ struct gnttab_dump_table { | |||
237 | /* OUT parameters. */ | 240 | /* OUT parameters. */ |
238 | int16_t status; /* GNTST_* */ | 241 | int16_t status; /* GNTST_* */ |
239 | }; | 242 | }; |
243 | DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); | ||
240 | 244 | ||
241 | /* | 245 | /* |
242 | * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The | 246 | * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The |
@@ -255,7 +259,7 @@ struct gnttab_transfer { | |||
255 | /* OUT parameters. */ | 259 | /* OUT parameters. */ |
256 | int16_t status; | 260 | int16_t status; |
257 | }; | 261 | }; |
258 | 262 | DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); | |
259 | 263 | ||
260 | /* | 264 | /* |
261 | * GNTTABOP_copy: Hypervisor based copy | 265 | * GNTTABOP_copy: Hypervisor based copy |
@@ -296,6 +300,7 @@ struct gnttab_copy { | |||
296 | /* OUT parameters. */ | 300 | /* OUT parameters. */ |
297 | int16_t status; | 301 | int16_t status; |
298 | }; | 302 | }; |
303 | DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); | ||
299 | 304 | ||
300 | /* | 305 | /* |
301 | * GNTTABOP_query_size: Query the current and maximum sizes of the shared | 306 | * GNTTABOP_query_size: Query the current and maximum sizes of the shared |
@@ -313,7 +318,7 @@ struct gnttab_query_size { | |||
313 | uint32_t max_nr_frames; | 318 | uint32_t max_nr_frames; |
314 | int16_t status; /* GNTST_* */ | 319 | int16_t status; /* GNTST_* */ |
315 | }; | 320 | }; |
316 | 321 | DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); | |
317 | 322 | ||
318 | /* | 323 | /* |
319 | * Bitfield values for update_pin_status.flags. | 324 | * Bitfield values for update_pin_status.flags. |
diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h new file mode 100644 index 000000000000..5a934dd7796d --- /dev/null +++ b/include/xen/interface/io/fbif.h | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * fbif.h -- Xen virtual frame buffer device | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
5 | * of this software and associated documentation files (the "Software"), to | ||
6 | * deal in the Software without restriction, including without limitation the | ||
7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
8 | * sell copies of the Software, and to permit persons to whom the Software is | ||
9 | * furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> | ||
23 | * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> | ||
24 | */ | ||
25 | |||
26 | #ifndef __XEN_PUBLIC_IO_FBIF_H__ | ||
27 | #define __XEN_PUBLIC_IO_FBIF_H__ | ||
28 | |||
29 | /* Out events (frontend -> backend) */ | ||
30 | |||
31 | /* | ||
32 | * Out events may be sent only when requested by backend, and receipt | ||
33 | * of an unknown out event is an error. | ||
34 | */ | ||
35 | |||
36 | /* Event type 1 currently not used */ | ||
37 | /* | ||
38 | * Framebuffer update notification event | ||
39 | * Capable frontend sets feature-update in xenstore. | ||
40 | * Backend requests it by setting request-update in xenstore. | ||
41 | */ | ||
42 | #define XENFB_TYPE_UPDATE 2 | ||
43 | |||
44 | struct xenfb_update { | ||
45 | uint8_t type; /* XENFB_TYPE_UPDATE */ | ||
46 | int32_t x; /* source x */ | ||
47 | int32_t y; /* source y */ | ||
48 | int32_t width; /* rect width */ | ||
49 | int32_t height; /* rect height */ | ||
50 | }; | ||
51 | |||
52 | #define XENFB_OUT_EVENT_SIZE 40 | ||
53 | |||
54 | union xenfb_out_event { | ||
55 | uint8_t type; | ||
56 | struct xenfb_update update; | ||
57 | char pad[XENFB_OUT_EVENT_SIZE]; | ||
58 | }; | ||
59 | |||
60 | /* In events (backend -> frontend) */ | ||
61 | |||
62 | /* | ||
63 | * Frontends should ignore unknown in events. | ||
64 | * No in events currently defined. | ||
65 | */ | ||
66 | |||
67 | #define XENFB_IN_EVENT_SIZE 40 | ||
68 | |||
69 | union xenfb_in_event { | ||
70 | uint8_t type; | ||
71 | char pad[XENFB_IN_EVENT_SIZE]; | ||
72 | }; | ||
73 | |||
74 | /* shared page */ | ||
75 | |||
76 | #define XENFB_IN_RING_SIZE 1024 | ||
77 | #define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) | ||
78 | #define XENFB_IN_RING_OFFS 1024 | ||
79 | #define XENFB_IN_RING(page) \ | ||
80 | ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) | ||
81 | #define XENFB_IN_RING_REF(page, idx) \ | ||
82 | (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) | ||
83 | |||
84 | #define XENFB_OUT_RING_SIZE 2048 | ||
85 | #define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) | ||
86 | #define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) | ||
87 | #define XENFB_OUT_RING(page) \ | ||
88 | ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) | ||
89 | #define XENFB_OUT_RING_REF(page, idx) \ | ||
90 | (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) | ||
91 | |||
92 | struct xenfb_page { | ||
93 | uint32_t in_cons, in_prod; | ||
94 | uint32_t out_cons, out_prod; | ||
95 | |||
96 | int32_t width; /* width of the framebuffer (in pixels) */ | ||
97 | int32_t height; /* height of the framebuffer (in pixels) */ | ||
98 | uint32_t line_length; /* length of a row of pixels (in bytes) */ | ||
99 | uint32_t mem_length; /* length of the framebuffer (in bytes) */ | ||
100 | uint8_t depth; /* depth of a pixel (in bits) */ | ||
101 | |||
102 | /* | ||
103 | * Framebuffer page directory | ||
104 | * | ||
105 | * Each directory page holds PAGE_SIZE / sizeof(*pd) | ||
106 | * framebuffer pages, and can thus map up to PAGE_SIZE * | ||
107 | * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and | ||
108 | * sizeof(unsigned long) == 4, that's 4 Megs. Two directory | ||
109 | * pages should be enough for a while. | ||
110 | */ | ||
111 | unsigned long pd[2]; | ||
112 | }; | ||
113 | |||
114 | /* | ||
115 | * Wart: xenkbd needs to know resolution. Put it here until a better | ||
116 | * solution is found, but don't leak it to the backend. | ||
117 | */ | ||
118 | #ifdef __KERNEL__ | ||
119 | #define XENFB_WIDTH 800 | ||
120 | #define XENFB_HEIGHT 600 | ||
121 | #define XENFB_DEPTH 32 | ||
122 | #endif | ||
123 | |||
124 | #endif | ||
diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h new file mode 100644 index 000000000000..fb97f4284ffd --- /dev/null +++ b/include/xen/interface/io/kbdif.h | |||
@@ -0,0 +1,114 @@ | |||
1 | /* | ||
2 | * kbdif.h -- Xen virtual keyboard/mouse | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
5 | * of this software and associated documentation files (the "Software"), to | ||
6 | * deal in the Software without restriction, including without limitation the | ||
7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
8 | * sell copies of the Software, and to permit persons to whom the Software is | ||
9 | * furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> | ||
23 | * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> | ||
24 | */ | ||
25 | |||
26 | #ifndef __XEN_PUBLIC_IO_KBDIF_H__ | ||
27 | #define __XEN_PUBLIC_IO_KBDIF_H__ | ||
28 | |||
29 | /* In events (backend -> frontend) */ | ||
30 | |||
31 | /* | ||
32 | * Frontends should ignore unknown in events. | ||
33 | */ | ||
34 | |||
35 | /* Pointer movement event */ | ||
36 | #define XENKBD_TYPE_MOTION 1 | ||
37 | /* Event type 2 currently not used */ | ||
38 | /* Key event (includes pointer buttons) */ | ||
39 | #define XENKBD_TYPE_KEY 3 | ||
40 | /* | ||
41 | * Pointer position event | ||
42 | * Capable backend sets feature-abs-pointer in xenstore. | ||
43 | * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting | ||
44 | * request-abs-update in xenstore. | ||
45 | */ | ||
46 | #define XENKBD_TYPE_POS 4 | ||
47 | |||
48 | struct xenkbd_motion { | ||
49 | uint8_t type; /* XENKBD_TYPE_MOTION */ | ||
50 | int32_t rel_x; /* relative X motion */ | ||
51 | int32_t rel_y; /* relative Y motion */ | ||
52 | }; | ||
53 | |||
54 | struct xenkbd_key { | ||
55 | uint8_t type; /* XENKBD_TYPE_KEY */ | ||
56 | uint8_t pressed; /* 1 if pressed; 0 otherwise */ | ||
57 | uint32_t keycode; /* KEY_* from linux/input.h */ | ||
58 | }; | ||
59 | |||
60 | struct xenkbd_position { | ||
61 | uint8_t type; /* XENKBD_TYPE_POS */ | ||
62 | int32_t abs_x; /* absolute X position (in FB pixels) */ | ||
63 | int32_t abs_y; /* absolute Y position (in FB pixels) */ | ||
64 | }; | ||
65 | |||
66 | #define XENKBD_IN_EVENT_SIZE 40 | ||
67 | |||
68 | union xenkbd_in_event { | ||
69 | uint8_t type; | ||
70 | struct xenkbd_motion motion; | ||
71 | struct xenkbd_key key; | ||
72 | struct xenkbd_position pos; | ||
73 | char pad[XENKBD_IN_EVENT_SIZE]; | ||
74 | }; | ||
75 | |||
76 | /* Out events (frontend -> backend) */ | ||
77 | |||
78 | /* | ||
79 | * Out events may be sent only when requested by backend, and receipt | ||
80 | * of an unknown out event is an error. | ||
81 | * No out events currently defined. | ||
82 | */ | ||
83 | |||
84 | #define XENKBD_OUT_EVENT_SIZE 40 | ||
85 | |||
86 | union xenkbd_out_event { | ||
87 | uint8_t type; | ||
88 | char pad[XENKBD_OUT_EVENT_SIZE]; | ||
89 | }; | ||
90 | |||
91 | /* shared page */ | ||
92 | |||
93 | #define XENKBD_IN_RING_SIZE 2048 | ||
94 | #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) | ||
95 | #define XENKBD_IN_RING_OFFS 1024 | ||
96 | #define XENKBD_IN_RING(page) \ | ||
97 | ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) | ||
98 | #define XENKBD_IN_RING_REF(page, idx) \ | ||
99 | (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) | ||
100 | |||
101 | #define XENKBD_OUT_RING_SIZE 1024 | ||
102 | #define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) | ||
103 | #define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) | ||
104 | #define XENKBD_OUT_RING(page) \ | ||
105 | ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) | ||
106 | #define XENKBD_OUT_RING_REF(page, idx) \ | ||
107 | (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) | ||
108 | |||
109 | struct xenkbd_page { | ||
110 | uint32_t in_cons, in_prod; | ||
111 | uint32_t out_cons, out_prod; | ||
112 | }; | ||
113 | |||
114 | #endif | ||
diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h new file mode 100644 index 000000000000..01fc8ae5f0b0 --- /dev/null +++ b/include/xen/interface/io/protocols.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #ifndef __XEN_PROTOCOLS_H__ | ||
2 | #define __XEN_PROTOCOLS_H__ | ||
3 | |||
4 | #define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" | ||
5 | #define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" | ||
6 | #define XEN_IO_PROTO_ABI_IA64 "ia64-abi" | ||
7 | #define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi" | ||
8 | |||
9 | #if defined(__i386__) | ||
10 | # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 | ||
11 | #elif defined(__x86_64__) | ||
12 | # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 | ||
13 | #elif defined(__ia64__) | ||
14 | # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 | ||
15 | #elif defined(__powerpc64__) | ||
16 | # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 | ||
17 | #else | ||
18 | # error arch fixup needed here | ||
19 | #endif | ||
20 | |||
21 | #endif | ||
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index af36ead16817..da768469aa92 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h | |||
@@ -29,7 +29,7 @@ struct xen_memory_reservation { | |||
29 | * OUT: GMFN bases of extents that were allocated | 29 | * OUT: GMFN bases of extents that were allocated |
30 | * (NB. This command also updates the mach_to_phys translation table) | 30 | * (NB. This command also updates the mach_to_phys translation table) |
31 | */ | 31 | */ |
32 | GUEST_HANDLE(ulong) extent_start; | 32 | ulong extent_start; |
33 | 33 | ||
34 | /* Number of extents, and size/alignment of each (2^extent_order pages). */ | 34 | /* Number of extents, and size/alignment of each (2^extent_order pages). */ |
35 | unsigned long nr_extents; | 35 | unsigned long nr_extents; |
@@ -50,7 +50,6 @@ struct xen_memory_reservation { | |||
50 | domid_t domid; | 50 | domid_t domid; |
51 | 51 | ||
52 | }; | 52 | }; |
53 | DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); | ||
54 | 53 | ||
55 | /* | 54 | /* |
56 | * Returns the maximum machine frame number of mapped RAM in this system. | 55 | * Returns the maximum machine frame number of mapped RAM in this system. |
@@ -86,7 +85,7 @@ struct xen_machphys_mfn_list { | |||
86 | * any large discontiguities in the machine address space, 2MB gaps in | 85 | * any large discontiguities in the machine address space, 2MB gaps in |
87 | * the machphys table will be represented by an MFN base of zero. | 86 | * the machphys table will be represented by an MFN base of zero. |
88 | */ | 87 | */ |
89 | GUEST_HANDLE(ulong) extent_start; | 88 | ulong extent_start; |
90 | 89 | ||
91 | /* | 90 | /* |
92 | * Number of extents written to the above array. This will be smaller | 91 | * Number of extents written to the above array. This will be smaller |
@@ -94,7 +93,6 @@ struct xen_machphys_mfn_list { | |||
94 | */ | 93 | */ |
95 | unsigned int nr_extents; | 94 | unsigned int nr_extents; |
96 | }; | 95 | }; |
97 | DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); | ||
98 | 96 | ||
99 | /* | 97 | /* |
100 | * Sets the GPFN at which a particular page appears in the specified guest's | 98 | * Sets the GPFN at which a particular page appears in the specified guest's |
@@ -117,7 +115,6 @@ struct xen_add_to_physmap { | |||
117 | /* GPFN where the source mapping page should appear. */ | 115 | /* GPFN where the source mapping page should appear. */ |
118 | unsigned long gpfn; | 116 | unsigned long gpfn; |
119 | }; | 117 | }; |
120 | DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); | ||
121 | 118 | ||
122 | /* | 119 | /* |
123 | * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error | 120 | * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error |
@@ -132,14 +129,13 @@ struct xen_translate_gpfn_list { | |||
132 | unsigned long nr_gpfns; | 129 | unsigned long nr_gpfns; |
133 | 130 | ||
134 | /* List of GPFNs to translate. */ | 131 | /* List of GPFNs to translate. */ |
135 | GUEST_HANDLE(ulong) gpfn_list; | 132 | ulong gpfn_list; |
136 | 133 | ||
137 | /* | 134 | /* |
138 | * Output list to contain MFN translations. May be the same as the input | 135 | * Output list to contain MFN translations. May be the same as the input |
139 | * list (in which case each input GPFN is overwritten with the output MFN). | 136 | * list (in which case each input GPFN is overwritten with the output MFN). |
140 | */ | 137 | */ |
141 | GUEST_HANDLE(ulong) mfn_list; | 138 | ulong mfn_list; |
142 | }; | 139 | }; |
143 | DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); | ||
144 | 140 | ||
145 | #endif /* __XEN_PUBLIC_MEMORY_H__ */ | 141 | #endif /* __XEN_PUBLIC_MEMORY_H__ */ |
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index b05d8a6d9143..87e6f8a48661 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h | |||
@@ -85,6 +85,7 @@ struct vcpu_runstate_info { | |||
85 | */ | 85 | */ |
86 | uint64_t time[4]; | 86 | uint64_t time[4]; |
87 | }; | 87 | }; |
88 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info); | ||
88 | 89 | ||
89 | /* VCPU is currently running on a physical CPU. */ | 90 | /* VCPU is currently running on a physical CPU. */ |
90 | #define RUNSTATE_running 0 | 91 | #define RUNSTATE_running 0 |
@@ -119,6 +120,7 @@ struct vcpu_runstate_info { | |||
119 | #define VCPUOP_register_runstate_memory_area 5 | 120 | #define VCPUOP_register_runstate_memory_area 5 |
120 | struct vcpu_register_runstate_memory_area { | 121 | struct vcpu_register_runstate_memory_area { |
121 | union { | 122 | union { |
123 | GUEST_HANDLE(vcpu_runstate_info) h; | ||
122 | struct vcpu_runstate_info *v; | 124 | struct vcpu_runstate_info *v; |
123 | uint64_t p; | 125 | uint64_t p; |
124 | } addr; | 126 | } addr; |
@@ -134,6 +136,7 @@ struct vcpu_register_runstate_memory_area { | |||
134 | struct vcpu_set_periodic_timer { | 136 | struct vcpu_set_periodic_timer { |
135 | uint64_t period_ns; | 137 | uint64_t period_ns; |
136 | }; | 138 | }; |
139 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer); | ||
137 | 140 | ||
138 | /* | 141 | /* |
139 | * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot | 142 | * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot |
@@ -145,6 +148,7 @@ struct vcpu_set_singleshot_timer { | |||
145 | uint64_t timeout_abs_ns; | 148 | uint64_t timeout_abs_ns; |
146 | uint32_t flags; /* VCPU_SSHOTTMR_??? */ | 149 | uint32_t flags; /* VCPU_SSHOTTMR_??? */ |
147 | }; | 150 | }; |
151 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer); | ||
148 | 152 | ||
149 | /* Flags to VCPUOP_set_singleshot_timer. */ | 153 | /* Flags to VCPUOP_set_singleshot_timer. */ |
150 | /* Require the timeout to be in the future (return -ETIME if it's passed). */ | 154 | /* Require the timeout to be in the future (return -ETIME if it's passed). */ |
@@ -164,5 +168,6 @@ struct vcpu_register_vcpu_info { | |||
164 | uint32_t offset; /* offset within page */ | 168 | uint32_t offset; /* offset within page */ |
165 | uint32_t rsvd; /* unused */ | 169 | uint32_t rsvd; /* unused */ |
166 | }; | 170 | }; |
171 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); | ||
167 | 172 | ||
168 | #endif /* __XEN_PUBLIC_VCPU_H__ */ | 173 | #endif /* __XEN_PUBLIC_VCPU_H__ */ |
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 518a5bf79ed3..9b018da48cf3 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h | |||
@@ -58,6 +58,16 @@ | |||
58 | #define __HYPERVISOR_physdev_op 33 | 58 | #define __HYPERVISOR_physdev_op 33 |
59 | #define __HYPERVISOR_hvm_op 34 | 59 | #define __HYPERVISOR_hvm_op 34 |
60 | 60 | ||
61 | /* Architecture-specific hypercall definitions. */ | ||
62 | #define __HYPERVISOR_arch_0 48 | ||
63 | #define __HYPERVISOR_arch_1 49 | ||
64 | #define __HYPERVISOR_arch_2 50 | ||
65 | #define __HYPERVISOR_arch_3 51 | ||
66 | #define __HYPERVISOR_arch_4 52 | ||
67 | #define __HYPERVISOR_arch_5 53 | ||
68 | #define __HYPERVISOR_arch_6 54 | ||
69 | #define __HYPERVISOR_arch_7 55 | ||
70 | |||
61 | /* | 71 | /* |
62 | * VIRTUAL INTERRUPTS | 72 | * VIRTUAL INTERRUPTS |
63 | * | 73 | * |
@@ -68,8 +78,18 @@ | |||
68 | #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ | 78 | #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ |
69 | #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ | 79 | #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ |
70 | #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ | 80 | #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ |
71 | #define NR_VIRQS 8 | ||
72 | 81 | ||
82 | /* Architecture-specific VIRQ definitions. */ | ||
83 | #define VIRQ_ARCH_0 16 | ||
84 | #define VIRQ_ARCH_1 17 | ||
85 | #define VIRQ_ARCH_2 18 | ||
86 | #define VIRQ_ARCH_3 19 | ||
87 | #define VIRQ_ARCH_4 20 | ||
88 | #define VIRQ_ARCH_5 21 | ||
89 | #define VIRQ_ARCH_6 22 | ||
90 | #define VIRQ_ARCH_7 23 | ||
91 | |||
92 | #define NR_VIRQS 24 | ||
73 | /* | 93 | /* |
74 | * MMU-UPDATE REQUESTS | 94 | * MMU-UPDATE REQUESTS |
75 | * | 95 | * |
diff --git a/include/xen/interface/xencomm.h b/include/xen/interface/xencomm.h new file mode 100644 index 000000000000..ac45e0712afa --- /dev/null +++ b/include/xen/interface/xencomm.h | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
3 | * of this software and associated documentation files (the "Software"), to | ||
4 | * deal in the Software without restriction, including without limitation the | ||
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
6 | * sell copies of the Software, and to permit persons to whom the Software is | ||
7 | * furnished to do so, subject to the following conditions: | ||
8 | * | ||
9 | * The above copyright notice and this permission notice shall be included in | ||
10 | * all copies or substantial portions of the Software. | ||
11 | * | ||
12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
18 | * DEALINGS IN THE SOFTWARE. | ||
19 | * | ||
20 | * Copyright (C) IBM Corp. 2006 | ||
21 | */ | ||
22 | |||
23 | #ifndef _XEN_XENCOMM_H_ | ||
24 | #define _XEN_XENCOMM_H_ | ||
25 | |||
26 | /* A xencomm descriptor is a scatter/gather list containing physical | ||
27 | * addresses corresponding to a virtually contiguous memory area. The | ||
28 | * hypervisor translates these physical addresses to machine addresses to copy | ||
29 | * to and from the virtually contiguous area. | ||
30 | */ | ||
31 | |||
32 | #define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */ | ||
33 | #define XENCOMM_INVALID (~0UL) | ||
34 | |||
35 | struct xencomm_desc { | ||
36 | uint32_t magic; | ||
37 | uint32_t nr_addrs; /* the number of entries in address[] */ | ||
38 | uint64_t address[0]; | ||
39 | }; | ||
40 | |||
41 | #endif /* _XEN_XENCOMM_H_ */ | ||
diff --git a/include/xen/page.h b/include/xen/page.h index 031ef22a971e..eaf85fab1263 100644 --- a/include/xen/page.h +++ b/include/xen/page.h | |||
@@ -1,180 +1 @@ | |||
1 | #ifndef __XEN_PAGE_H | #include <asm/xen/page.h> | |
2 | #define __XEN_PAGE_H | ||
3 | |||
4 | #include <linux/pfn.h> | ||
5 | |||
6 | #include <asm/uaccess.h> | ||
7 | #include <asm/pgtable.h> | ||
8 | |||
9 | #include <xen/features.h> | ||
10 | |||
11 | #ifdef CONFIG_X86_PAE | ||
12 | /* Xen machine address */ | ||
13 | typedef struct xmaddr { | ||
14 | unsigned long long maddr; | ||
15 | } xmaddr_t; | ||
16 | |||
17 | /* Xen pseudo-physical address */ | ||
18 | typedef struct xpaddr { | ||
19 | unsigned long long paddr; | ||
20 | } xpaddr_t; | ||
21 | #else | ||
22 | /* Xen machine address */ | ||
23 | typedef struct xmaddr { | ||
24 | unsigned long maddr; | ||
25 | } xmaddr_t; | ||
26 | |||
27 | /* Xen pseudo-physical address */ | ||
28 | typedef struct xpaddr { | ||
29 | unsigned long paddr; | ||
30 | } xpaddr_t; | ||
31 | #endif | ||
32 | |||
33 | #define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) | ||
34 | #define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) | ||
35 | |||
36 | /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ | ||
37 | #define INVALID_P2M_ENTRY (~0UL) | ||
38 | #define FOREIGN_FRAME_BIT (1UL<<31) | ||
39 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) | ||
40 | |||
41 | extern unsigned long *phys_to_machine_mapping; | ||
42 | |||
43 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | ||
44 | { | ||
45 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
46 | return pfn; | ||
47 | |||
48 | return phys_to_machine_mapping[(unsigned int)(pfn)] & | ||
49 | ~FOREIGN_FRAME_BIT; | ||
50 | } | ||
51 | |||
52 | static inline int phys_to_machine_mapping_valid(unsigned long pfn) | ||
53 | { | ||
54 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
55 | return 1; | ||
56 | |||
57 | return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); | ||
58 | } | ||
59 | |||
60 | static inline unsigned long mfn_to_pfn(unsigned long mfn) | ||
61 | { | ||
62 | unsigned long pfn; | ||
63 | |||
64 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
65 | return mfn; | ||
66 | |||
67 | #if 0 | ||
68 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | ||
69 | return max_mapnr; | ||
70 | #endif | ||
71 | |||
72 | pfn = 0; | ||
73 | /* | ||
74 | * The array access can fail (e.g., device space beyond end of RAM). | ||
75 | * In such cases it doesn't matter what we return (we return garbage), | ||
76 | * but we must handle the fault without crashing! | ||
77 | */ | ||
78 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | ||
79 | |||
80 | return pfn; | ||
81 | } | ||
82 | |||
83 | static inline xmaddr_t phys_to_machine(xpaddr_t phys) | ||
84 | { | ||
85 | unsigned offset = phys.paddr & ~PAGE_MASK; | ||
86 | return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); | ||
87 | } | ||
88 | |||
89 | static inline xpaddr_t machine_to_phys(xmaddr_t machine) | ||
90 | { | ||
91 | unsigned offset = machine.maddr & ~PAGE_MASK; | ||
92 | return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * We detect special mappings in one of two ways: | ||
97 | * 1. If the MFN is an I/O page then Xen will set the m2p entry | ||
98 | * to be outside our maximum possible pseudophys range. | ||
99 | * 2. If the MFN belongs to a different domain then we will certainly | ||
100 | * not have MFN in our p2m table. Conversely, if the page is ours, | ||
101 | * then we'll have p2m(m2p(MFN))==MFN. | ||
102 | * If we detect a special mapping then it doesn't have a 'struct page'. | ||
103 | * We force !pfn_valid() by returning an out-of-range pointer. | ||
104 | * | ||
105 | * NB. These checks require that, for any MFN that is not in our reservation, | ||
106 | * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if | ||
107 | * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. | ||
108 | * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. | ||
109 | * | ||
110 | * NB2. When deliberately mapping foreign pages into the p2m table, you *must* | ||
111 | * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we | ||
112 | * require. In all the cases we care about, the FOREIGN_FRAME bit is | ||
113 | * masked (e.g., pfn_to_mfn()) so behaviour there is correct. | ||
114 | */ | ||
115 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | ||
116 | { | ||
117 | extern unsigned long max_mapnr; | ||
118 | unsigned long pfn = mfn_to_pfn(mfn); | ||
119 | if ((pfn < max_mapnr) | ||
120 | && !xen_feature(XENFEAT_auto_translated_physmap) | ||
121 | && (phys_to_machine_mapping[pfn] != mfn)) | ||
122 | return max_mapnr; /* force !pfn_valid() */ | ||
123 | return pfn; | ||
124 | } | ||
125 | |||
126 | static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
127 | { | ||
128 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
129 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
130 | return; | ||
131 | } | ||
132 | phys_to_machine_mapping[pfn] = mfn; | ||
133 | } | ||
134 | |||
135 | /* VIRT <-> MACHINE conversion */ | ||
136 | #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) | ||
137 | #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) | ||
138 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) | ||
139 | |||
140 | #ifdef CONFIG_X86_PAE | ||
141 | #define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ | ||
142 | (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT))) | ||
143 | |||
144 | static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) | ||
145 | { | ||
146 | pte_t pte; | ||
147 | |||
148 | pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | | ||
149 | (pgprot_val(pgprot) >> 32); | ||
150 | pte.pte_high &= (__supported_pte_mask >> 32); | ||
151 | pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); | ||
152 | pte.pte_low &= __supported_pte_mask; | ||
153 | |||
154 | return pte; | ||
155 | } | ||
156 | |||
157 | static inline unsigned long long pte_val_ma(pte_t x) | ||
158 | { | ||
159 | return x.pte; | ||
160 | } | ||
161 | #define pmd_val_ma(v) ((v).pmd) | ||
162 | #define pud_val_ma(v) ((v).pgd.pgd) | ||
163 | #define __pte_ma(x) ((pte_t) { .pte = (x) }) | ||
164 | #define __pmd_ma(x) ((pmd_t) { (x) } ) | ||
165 | #else /* !X86_PAE */ | ||
166 | #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) | ||
167 | #define mfn_pte(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) | ||
168 | #define pte_val_ma(x) ((x).pte) | ||
169 | #define pmd_val_ma(v) ((v).pud.pgd.pgd) | ||
170 | #define __pte_ma(x) ((pte_t) { (x) } ) | ||
171 | #endif /* CONFIG_X86_PAE */ | ||
172 | |||
173 | #define pgd_val_ma(x) ((x).pgd) | ||
174 | |||
175 | |||
176 | xmaddr_t arbitrary_virt_to_machine(unsigned long address); | ||
177 | void make_lowmem_page_readonly(void *vaddr); | ||
178 | void make_lowmem_page_readwrite(void *vaddr); | ||
179 | |||
180 | #endif /* __XEN_PAGE_H */ | ||
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h new file mode 100644 index 000000000000..10ddfe0142d0 --- /dev/null +++ b/include/xen/xen-ops.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef INCLUDE_XEN_OPS_H | ||
2 | #define INCLUDE_XEN_OPS_H | ||
3 | |||
4 | #include <linux/percpu.h> | ||
5 | |||
6 | DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); | ||
7 | |||
8 | #endif /* INCLUDE_XEN_OPS_H */ | ||
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 6f7c290651ae..6369d89c25d5 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h | |||
@@ -97,6 +97,7 @@ struct xenbus_driver { | |||
97 | int (*uevent)(struct xenbus_device *, char **, int, char *, int); | 97 | int (*uevent)(struct xenbus_device *, char **, int, char *, int); |
98 | struct device_driver driver; | 98 | struct device_driver driver; |
99 | int (*read_otherend_details)(struct xenbus_device *dev); | 99 | int (*read_otherend_details)(struct xenbus_device *dev); |
100 | int (*is_ready)(struct xenbus_device *dev); | ||
100 | }; | 101 | }; |
101 | 102 | ||
102 | static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) | 103 | static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) |
diff --git a/include/xen/xencomm.h b/include/xen/xencomm.h new file mode 100644 index 000000000000..e43b039be112 --- /dev/null +++ b/include/xen/xencomm.h | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
15 | * | ||
16 | * Copyright (C) IBM Corp. 2006 | ||
17 | * | ||
18 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
19 | * Jerone Young <jyoung5@us.ibm.com> | ||
20 | */ | ||
21 | |||
22 | #ifndef _LINUX_XENCOMM_H_ | ||
23 | #define _LINUX_XENCOMM_H_ | ||
24 | |||
25 | #include <xen/interface/xencomm.h> | ||
26 | |||
27 | #define XENCOMM_MINI_ADDRS 3 | ||
28 | struct xencomm_mini { | ||
29 | struct xencomm_desc _desc; | ||
30 | uint64_t address[XENCOMM_MINI_ADDRS]; | ||
31 | }; | ||
32 | |||
33 | /* To avoid additionnal virt to phys conversion, an opaque structure is | ||
34 | presented. */ | ||
35 | struct xencomm_handle; | ||
36 | |||
37 | extern void xencomm_free(struct xencomm_handle *desc); | ||
38 | extern struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes); | ||
39 | extern struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, | ||
40 | unsigned long bytes, struct xencomm_mini *xc_area); | ||
41 | |||
42 | #if 0 | ||
43 | #define XENCOMM_MINI_ALIGNED(xc_desc, n) \ | ||
44 | struct xencomm_mini xc_desc ## _base[(n)] \ | ||
45 | __attribute__((__aligned__(sizeof(struct xencomm_mini)))); \ | ||
46 | struct xencomm_mini *xc_desc = &xc_desc ## _base[0]; | ||
47 | #else | ||
48 | /* | ||
49 | * gcc bug workaround: | ||
50 | * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16660 | ||
51 | * gcc doesn't handle properly stack variable with | ||
52 | * __attribute__((__align__(sizeof(struct xencomm_mini)))) | ||
53 | */ | ||
54 | #define XENCOMM_MINI_ALIGNED(xc_desc, n) \ | ||
55 | unsigned char xc_desc ## _base[((n) + 1 ) * \ | ||
56 | sizeof(struct xencomm_mini)]; \ | ||
57 | struct xencomm_mini *xc_desc = (struct xencomm_mini *) \ | ||
58 | ((unsigned long)xc_desc ## _base + \ | ||
59 | (sizeof(struct xencomm_mini) - \ | ||
60 | ((unsigned long)xc_desc ## _base) % \ | ||
61 | sizeof(struct xencomm_mini))); | ||
62 | #endif | ||
63 | #define xencomm_map_no_alloc(ptr, bytes) \ | ||
64 | ({ XENCOMM_MINI_ALIGNED(xc_desc, 1); \ | ||
65 | __xencomm_map_no_alloc(ptr, bytes, xc_desc); }) | ||
66 | |||
67 | /* provided by architecture code: */ | ||
68 | extern unsigned long xencomm_vtop(unsigned long vaddr); | ||
69 | |||
70 | static inline void *xencomm_pa(void *ptr) | ||
71 | { | ||
72 | return (void *)xencomm_vtop((unsigned long)ptr); | ||
73 | } | ||
74 | |||
75 | #define xen_guest_handle(hnd) ((hnd).p) | ||
76 | |||
77 | #endif /* _LINUX_XENCOMM_H_ */ | ||