diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 293 |
1 files changed, 288 insertions, 5 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 413b19b3d0fe..42086ac406af 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/debugfs.h> | 43 | #include <linux/debugfs.h> |
44 | #include <linux/bug.h> | 44 | #include <linux/bug.h> |
45 | #include <linux/vmalloc.h> | ||
45 | #include <linux/module.h> | 46 | #include <linux/module.h> |
46 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
47 | 48 | ||
@@ -51,15 +52,19 @@ | |||
51 | #include <asm/mmu_context.h> | 52 | #include <asm/mmu_context.h> |
52 | #include <asm/setup.h> | 53 | #include <asm/setup.h> |
53 | #include <asm/paravirt.h> | 54 | #include <asm/paravirt.h> |
55 | #include <asm/e820.h> | ||
54 | #include <asm/linkage.h> | 56 | #include <asm/linkage.h> |
57 | #include <asm/page.h> | ||
55 | 58 | ||
56 | #include <asm/xen/hypercall.h> | 59 | #include <asm/xen/hypercall.h> |
57 | #include <asm/xen/hypervisor.h> | 60 | #include <asm/xen/hypervisor.h> |
58 | 61 | ||
62 | #include <xen/xen.h> | ||
59 | #include <xen/page.h> | 63 | #include <xen/page.h> |
60 | #include <xen/interface/xen.h> | 64 | #include <xen/interface/xen.h> |
61 | #include <xen/interface/hvm/hvm_op.h> | 65 | #include <xen/interface/hvm/hvm_op.h> |
62 | #include <xen/interface/version.h> | 66 | #include <xen/interface/version.h> |
67 | #include <xen/interface/memory.h> | ||
63 | #include <xen/hvc-console.h> | 68 | #include <xen/hvc-console.h> |
64 | 69 | ||
65 | #include "multicalls.h" | 70 | #include "multicalls.h" |
@@ -68,6 +73,13 @@ | |||
68 | 73 | ||
69 | #define MMU_UPDATE_HISTO 30 | 74 | #define MMU_UPDATE_HISTO 30 |
70 | 75 | ||
76 | /* | ||
77 | * Protects atomic reservation decrease/increase against concurrent increases. | ||
78 | * Also protects non-atomic updates of current_pages and driver_pages, and | ||
79 | * balloon lists. | ||
80 | */ | ||
81 | DEFINE_SPINLOCK(xen_reservation_lock); | ||
82 | |||
71 | #ifdef CONFIG_XEN_DEBUG_FS | 83 | #ifdef CONFIG_XEN_DEBUG_FS |
72 | 84 | ||
73 | static struct { | 85 | static struct { |
@@ -378,6 +390,28 @@ static bool xen_page_pinned(void *ptr) | |||
378 | return PagePinned(page); | 390 | return PagePinned(page); |
379 | } | 391 | } |
380 | 392 | ||
393 | static bool xen_iomap_pte(pte_t pte) | ||
394 | { | ||
395 | return pte_flags(pte) & _PAGE_IOMAP; | ||
396 | } | ||
397 | |||
398 | static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval) | ||
399 | { | ||
400 | struct multicall_space mcs; | ||
401 | struct mmu_update *u; | ||
402 | |||
403 | mcs = xen_mc_entry(sizeof(*u)); | ||
404 | u = mcs.args; | ||
405 | |||
406 | /* ptep might be kmapped when using 32-bit HIGHPTE */ | ||
407 | u->ptr = arbitrary_virt_to_machine(ptep).maddr; | ||
408 | u->val = pte_val_ma(pteval); | ||
409 | |||
410 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO); | ||
411 | |||
412 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
413 | } | ||
414 | |||
381 | static void xen_extend_mmu_update(const struct mmu_update *update) | 415 | static void xen_extend_mmu_update(const struct mmu_update *update) |
382 | { | 416 | { |
383 | struct multicall_space mcs; | 417 | struct multicall_space mcs; |
@@ -454,6 +488,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | |||
454 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 488 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
455 | pte_t *ptep, pte_t pteval) | 489 | pte_t *ptep, pte_t pteval) |
456 | { | 490 | { |
491 | if (xen_iomap_pte(pteval)) { | ||
492 | xen_set_iomap_pte(ptep, pteval); | ||
493 | goto out; | ||
494 | } | ||
495 | |||
457 | ADD_STATS(set_pte_at, 1); | 496 | ADD_STATS(set_pte_at, 1); |
458 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | 497 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); |
459 | ADD_STATS(set_pte_at_current, mm == current->mm); | 498 | ADD_STATS(set_pte_at_current, mm == current->mm); |
@@ -524,8 +563,25 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
524 | return val; | 563 | return val; |
525 | } | 564 | } |
526 | 565 | ||
566 | static pteval_t iomap_pte(pteval_t val) | ||
567 | { | ||
568 | if (val & _PAGE_PRESENT) { | ||
569 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; | ||
570 | pteval_t flags = val & PTE_FLAGS_MASK; | ||
571 | |||
572 | /* We assume the pte frame number is a MFN, so | ||
573 | just use it as-is. */ | ||
574 | val = ((pteval_t)pfn << PAGE_SHIFT) | flags; | ||
575 | } | ||
576 | |||
577 | return val; | ||
578 | } | ||
579 | |||
527 | pteval_t xen_pte_val(pte_t pte) | 580 | pteval_t xen_pte_val(pte_t pte) |
528 | { | 581 | { |
582 | if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP)) | ||
583 | return pte.pte; | ||
584 | |||
529 | return pte_mfn_to_pfn(pte.pte); | 585 | return pte_mfn_to_pfn(pte.pte); |
530 | } | 586 | } |
531 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); | 587 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); |
@@ -538,7 +594,22 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); | |||
538 | 594 | ||
539 | pte_t xen_make_pte(pteval_t pte) | 595 | pte_t xen_make_pte(pteval_t pte) |
540 | { | 596 | { |
541 | pte = pte_pfn_to_mfn(pte); | 597 | phys_addr_t addr = (pte & PTE_PFN_MASK); |
598 | |||
599 | /* | ||
600 | * Unprivileged domains are allowed to do IOMAPpings for | ||
601 | * PCI passthrough, but not map ISA space. The ISA | ||
602 | * mappings are just dummy local mappings to keep other | ||
603 | * parts of the kernel happy. | ||
604 | */ | ||
605 | if (unlikely(pte & _PAGE_IOMAP) && | ||
606 | (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { | ||
607 | pte = iomap_pte(pte); | ||
608 | } else { | ||
609 | pte &= ~_PAGE_IOMAP; | ||
610 | pte = pte_pfn_to_mfn(pte); | ||
611 | } | ||
612 | |||
542 | return native_make_pte(pte); | 613 | return native_make_pte(pte); |
543 | } | 614 | } |
544 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); | 615 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); |
@@ -594,6 +665,11 @@ void xen_set_pud(pud_t *ptr, pud_t val) | |||
594 | 665 | ||
595 | void xen_set_pte(pte_t *ptep, pte_t pte) | 666 | void xen_set_pte(pte_t *ptep, pte_t pte) |
596 | { | 667 | { |
668 | if (xen_iomap_pte(pte)) { | ||
669 | xen_set_iomap_pte(ptep, pte); | ||
670 | return; | ||
671 | } | ||
672 | |||
597 | ADD_STATS(pte_update, 1); | 673 | ADD_STATS(pte_update, 1); |
598 | // ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); | 674 | // ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); |
599 | ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | 675 | ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); |
@@ -610,6 +686,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte) | |||
610 | #ifdef CONFIG_X86_PAE | 686 | #ifdef CONFIG_X86_PAE |
611 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | 687 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
612 | { | 688 | { |
689 | if (xen_iomap_pte(pte)) { | ||
690 | xen_set_iomap_pte(ptep, pte); | ||
691 | return; | ||
692 | } | ||
693 | |||
613 | set_64bit((u64 *)ptep, native_pte_val(pte)); | 694 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
614 | } | 695 | } |
615 | 696 | ||
@@ -936,8 +1017,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, | |||
936 | read-only, and can be pinned. */ | 1017 | read-only, and can be pinned. */ |
937 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) | 1018 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) |
938 | { | 1019 | { |
939 | vm_unmap_aliases(); | ||
940 | |||
941 | xen_mc_batch(); | 1020 | xen_mc_batch(); |
942 | 1021 | ||
943 | if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { | 1022 | if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { |
@@ -1501,7 +1580,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l | |||
1501 | if (PagePinned(virt_to_page(mm->pgd))) { | 1580 | if (PagePinned(virt_to_page(mm->pgd))) { |
1502 | SetPagePinned(page); | 1581 | SetPagePinned(page); |
1503 | 1582 | ||
1504 | vm_unmap_aliases(); | ||
1505 | if (!PageHighMem(page)) { | 1583 | if (!PageHighMem(page)) { |
1506 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); | 1584 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); |
1507 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 1585 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
@@ -1812,9 +1890,16 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1812 | pte = pfn_pte(phys, prot); | 1890 | pte = pfn_pte(phys, prot); |
1813 | break; | 1891 | break; |
1814 | 1892 | ||
1815 | default: | 1893 | case FIX_PARAVIRT_BOOTMAP: |
1894 | /* This is an MFN, but it isn't an IO mapping from the | ||
1895 | IO domain */ | ||
1816 | pte = mfn_pte(phys, prot); | 1896 | pte = mfn_pte(phys, prot); |
1817 | break; | 1897 | break; |
1898 | |||
1899 | default: | ||
1900 | /* By default, set_fixmap is used for hardware mappings */ | ||
1901 | pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); | ||
1902 | break; | ||
1818 | } | 1903 | } |
1819 | 1904 | ||
1820 | __native_set_fixmap(idx, pte); | 1905 | __native_set_fixmap(idx, pte); |
@@ -1940,7 +2025,205 @@ void __init xen_init_mmu_ops(void) | |||
1940 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; | 2025 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; |
1941 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | 2026 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; |
1942 | pv_mmu_ops = xen_mmu_ops; | 2027 | pv_mmu_ops = xen_mmu_ops; |
2028 | |||
2029 | vmap_lazy_unmap = false; | ||
2030 | } | ||
2031 | |||
2032 | /* Protected by xen_reservation_lock. */ | ||
2033 | #define MAX_CONTIG_ORDER 9 /* 2MB */ | ||
2034 | static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; | ||
2035 | |||
2036 | #define VOID_PTE (mfn_pte(0, __pgprot(0))) | ||
2037 | static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, | ||
2038 | unsigned long *in_frames, | ||
2039 | unsigned long *out_frames) | ||
2040 | { | ||
2041 | int i; | ||
2042 | struct multicall_space mcs; | ||
2043 | |||
2044 | xen_mc_batch(); | ||
2045 | for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) { | ||
2046 | mcs = __xen_mc_entry(0); | ||
2047 | |||
2048 | if (in_frames) | ||
2049 | in_frames[i] = virt_to_mfn(vaddr); | ||
2050 | |||
2051 | MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); | ||
2052 | set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); | ||
2053 | |||
2054 | if (out_frames) | ||
2055 | out_frames[i] = virt_to_pfn(vaddr); | ||
2056 | } | ||
2057 | xen_mc_issue(0); | ||
2058 | } | ||
2059 | |||
2060 | /* | ||
2061 | * Update the pfn-to-mfn mappings for a virtual address range, either to | ||
2062 | * point to an array of mfns, or contiguously from a single starting | ||
2063 | * mfn. | ||
2064 | */ | ||
2065 | static void xen_remap_exchanged_ptes(unsigned long vaddr, int order, | ||
2066 | unsigned long *mfns, | ||
2067 | unsigned long first_mfn) | ||
2068 | { | ||
2069 | unsigned i, limit; | ||
2070 | unsigned long mfn; | ||
2071 | |||
2072 | xen_mc_batch(); | ||
2073 | |||
2074 | limit = 1u << order; | ||
2075 | for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) { | ||
2076 | struct multicall_space mcs; | ||
2077 | unsigned flags; | ||
2078 | |||
2079 | mcs = __xen_mc_entry(0); | ||
2080 | if (mfns) | ||
2081 | mfn = mfns[i]; | ||
2082 | else | ||
2083 | mfn = first_mfn + i; | ||
2084 | |||
2085 | if (i < (limit - 1)) | ||
2086 | flags = 0; | ||
2087 | else { | ||
2088 | if (order == 0) | ||
2089 | flags = UVMF_INVLPG | UVMF_ALL; | ||
2090 | else | ||
2091 | flags = UVMF_TLB_FLUSH | UVMF_ALL; | ||
2092 | } | ||
2093 | |||
2094 | MULTI_update_va_mapping(mcs.mc, vaddr, | ||
2095 | mfn_pte(mfn, PAGE_KERNEL), flags); | ||
2096 | |||
2097 | set_phys_to_machine(virt_to_pfn(vaddr), mfn); | ||
2098 | } | ||
2099 | |||
2100 | xen_mc_issue(0); | ||
2101 | } | ||
2102 | |||
2103 | /* | ||
2104 | * Perform the hypercall to exchange a region of our pfns to point to | ||
2105 | * memory with the required contiguous alignment. Takes the pfns as | ||
2106 | * input, and populates mfns as output. | ||
2107 | * | ||
2108 | * Returns a success code indicating whether the hypervisor was able to | ||
2109 | * satisfy the request or not. | ||
2110 | */ | ||
2111 | static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in, | ||
2112 | unsigned long *pfns_in, | ||
2113 | unsigned long extents_out, | ||
2114 | unsigned int order_out, | ||
2115 | unsigned long *mfns_out, | ||
2116 | unsigned int address_bits) | ||
2117 | { | ||
2118 | long rc; | ||
2119 | int success; | ||
2120 | |||
2121 | struct xen_memory_exchange exchange = { | ||
2122 | .in = { | ||
2123 | .nr_extents = extents_in, | ||
2124 | .extent_order = order_in, | ||
2125 | .extent_start = pfns_in, | ||
2126 | .domid = DOMID_SELF | ||
2127 | }, | ||
2128 | .out = { | ||
2129 | .nr_extents = extents_out, | ||
2130 | .extent_order = order_out, | ||
2131 | .extent_start = mfns_out, | ||
2132 | .address_bits = address_bits, | ||
2133 | .domid = DOMID_SELF | ||
2134 | } | ||
2135 | }; | ||
2136 | |||
2137 | BUG_ON(extents_in << order_in != extents_out << order_out); | ||
2138 | |||
2139 | rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); | ||
2140 | success = (exchange.nr_exchanged == extents_in); | ||
2141 | |||
2142 | BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); | ||
2143 | BUG_ON(success && (rc != 0)); | ||
2144 | |||
2145 | return success; | ||
2146 | } | ||
2147 | |||
2148 | int xen_create_contiguous_region(unsigned long vstart, unsigned int order, | ||
2149 | unsigned int address_bits) | ||
2150 | { | ||
2151 | unsigned long *in_frames = discontig_frames, out_frame; | ||
2152 | unsigned long flags; | ||
2153 | int success; | ||
2154 | |||
2155 | /* | ||
2156 | * Currently an auto-translated guest will not perform I/O, nor will | ||
2157 | * it require PAE page directories below 4GB. Therefore any calls to | ||
2158 | * this function are redundant and can be ignored. | ||
2159 | */ | ||
2160 | |||
2161 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2162 | return 0; | ||
2163 | |||
2164 | if (unlikely(order > MAX_CONTIG_ORDER)) | ||
2165 | return -ENOMEM; | ||
2166 | |||
2167 | memset((void *) vstart, 0, PAGE_SIZE << order); | ||
2168 | |||
2169 | spin_lock_irqsave(&xen_reservation_lock, flags); | ||
2170 | |||
2171 | /* 1. Zap current PTEs, remembering MFNs. */ | ||
2172 | xen_zap_pfn_range(vstart, order, in_frames, NULL); | ||
2173 | |||
2174 | /* 2. Get a new contiguous memory extent. */ | ||
2175 | out_frame = virt_to_pfn(vstart); | ||
2176 | success = xen_exchange_memory(1UL << order, 0, in_frames, | ||
2177 | 1, order, &out_frame, | ||
2178 | address_bits); | ||
2179 | |||
2180 | /* 3. Map the new extent in place of old pages. */ | ||
2181 | if (success) | ||
2182 | xen_remap_exchanged_ptes(vstart, order, NULL, out_frame); | ||
2183 | else | ||
2184 | xen_remap_exchanged_ptes(vstart, order, in_frames, 0); | ||
2185 | |||
2186 | spin_unlock_irqrestore(&xen_reservation_lock, flags); | ||
2187 | |||
2188 | return success ? 0 : -ENOMEM; | ||
2189 | } | ||
2190 | EXPORT_SYMBOL_GPL(xen_create_contiguous_region); | ||
2191 | |||
2192 | void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) | ||
2193 | { | ||
2194 | unsigned long *out_frames = discontig_frames, in_frame; | ||
2195 | unsigned long flags; | ||
2196 | int success; | ||
2197 | |||
2198 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2199 | return; | ||
2200 | |||
2201 | if (unlikely(order > MAX_CONTIG_ORDER)) | ||
2202 | return; | ||
2203 | |||
2204 | memset((void *) vstart, 0, PAGE_SIZE << order); | ||
2205 | |||
2206 | spin_lock_irqsave(&xen_reservation_lock, flags); | ||
2207 | |||
2208 | /* 1. Find start MFN of contiguous extent. */ | ||
2209 | in_frame = virt_to_mfn(vstart); | ||
2210 | |||
2211 | /* 2. Zap current PTEs. */ | ||
2212 | xen_zap_pfn_range(vstart, order, NULL, out_frames); | ||
2213 | |||
2214 | /* 3. Do the exchange for non-contiguous MFNs. */ | ||
2215 | success = xen_exchange_memory(1, order, &in_frame, 1UL << order, | ||
2216 | 0, out_frames, 0); | ||
2217 | |||
2218 | /* 4. Map new pages in place of old pages. */ | ||
2219 | if (success) | ||
2220 | xen_remap_exchanged_ptes(vstart, order, out_frames, 0); | ||
2221 | else | ||
2222 | xen_remap_exchanged_ptes(vstart, order, NULL, in_frame); | ||
2223 | |||
2224 | spin_unlock_irqrestore(&xen_reservation_lock, flags); | ||
1943 | } | 2225 | } |
2226 | EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); | ||
1944 | 2227 | ||
1945 | #ifdef CONFIG_XEN_PVHVM | 2228 | #ifdef CONFIG_XEN_PVHVM |
1946 | static void xen_hvm_exit_mmap(struct mm_struct *mm) | 2229 | static void xen_hvm_exit_mmap(struct mm_struct *mm) |