aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r--arch/x86/xen/mmu.c293
1 files changed, 288 insertions, 5 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 413b19b3d0fe..42086ac406af 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,6 +42,7 @@
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/debugfs.h> 43#include <linux/debugfs.h>
44#include <linux/bug.h> 44#include <linux/bug.h>
45#include <linux/vmalloc.h>
45#include <linux/module.h> 46#include <linux/module.h>
46#include <linux/gfp.h> 47#include <linux/gfp.h>
47 48
@@ -51,15 +52,19 @@
51#include <asm/mmu_context.h> 52#include <asm/mmu_context.h>
52#include <asm/setup.h> 53#include <asm/setup.h>
53#include <asm/paravirt.h> 54#include <asm/paravirt.h>
55#include <asm/e820.h>
54#include <asm/linkage.h> 56#include <asm/linkage.h>
57#include <asm/page.h>
55 58
56#include <asm/xen/hypercall.h> 59#include <asm/xen/hypercall.h>
57#include <asm/xen/hypervisor.h> 60#include <asm/xen/hypervisor.h>
58 61
62#include <xen/xen.h>
59#include <xen/page.h> 63#include <xen/page.h>
60#include <xen/interface/xen.h> 64#include <xen/interface/xen.h>
61#include <xen/interface/hvm/hvm_op.h> 65#include <xen/interface/hvm/hvm_op.h>
62#include <xen/interface/version.h> 66#include <xen/interface/version.h>
67#include <xen/interface/memory.h>
63#include <xen/hvc-console.h> 68#include <xen/hvc-console.h>
64 69
65#include "multicalls.h" 70#include "multicalls.h"
@@ -68,6 +73,13 @@
68 73
69#define MMU_UPDATE_HISTO 30 74#define MMU_UPDATE_HISTO 30
70 75
76/*
77 * Protects atomic reservation decrease/increase against concurrent increases.
78 * Also protects non-atomic updates of current_pages and driver_pages, and
79 * balloon lists.
80 */
81DEFINE_SPINLOCK(xen_reservation_lock);
82
71#ifdef CONFIG_XEN_DEBUG_FS 83#ifdef CONFIG_XEN_DEBUG_FS
72 84
73static struct { 85static struct {
@@ -378,6 +390,28 @@ static bool xen_page_pinned(void *ptr)
378 return PagePinned(page); 390 return PagePinned(page);
379} 391}
380 392
393static bool xen_iomap_pte(pte_t pte)
394{
395 return pte_flags(pte) & _PAGE_IOMAP;
396}
397
398static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
399{
400 struct multicall_space mcs;
401 struct mmu_update *u;
402
403 mcs = xen_mc_entry(sizeof(*u));
404 u = mcs.args;
405
406 /* ptep might be kmapped when using 32-bit HIGHPTE */
407 u->ptr = arbitrary_virt_to_machine(ptep).maddr;
408 u->val = pte_val_ma(pteval);
409
410 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
411
412 xen_mc_issue(PARAVIRT_LAZY_MMU);
413}
414
381static void xen_extend_mmu_update(const struct mmu_update *update) 415static void xen_extend_mmu_update(const struct mmu_update *update)
382{ 416{
383 struct multicall_space mcs; 417 struct multicall_space mcs;
@@ -454,6 +488,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
454void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 488void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
455 pte_t *ptep, pte_t pteval) 489 pte_t *ptep, pte_t pteval)
456{ 490{
491 if (xen_iomap_pte(pteval)) {
492 xen_set_iomap_pte(ptep, pteval);
493 goto out;
494 }
495
457 ADD_STATS(set_pte_at, 1); 496 ADD_STATS(set_pte_at, 1);
458// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); 497// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
459 ADD_STATS(set_pte_at_current, mm == current->mm); 498 ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -524,8 +563,25 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
524 return val; 563 return val;
525} 564}
526 565
566static pteval_t iomap_pte(pteval_t val)
567{
568 if (val & _PAGE_PRESENT) {
569 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
570 pteval_t flags = val & PTE_FLAGS_MASK;
571
572 /* We assume the pte frame number is a MFN, so
573 just use it as-is. */
574 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
575 }
576
577 return val;
578}
579
527pteval_t xen_pte_val(pte_t pte) 580pteval_t xen_pte_val(pte_t pte)
528{ 581{
582 if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
583 return pte.pte;
584
529 return pte_mfn_to_pfn(pte.pte); 585 return pte_mfn_to_pfn(pte.pte);
530} 586}
531PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 587PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -538,7 +594,22 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
538 594
539pte_t xen_make_pte(pteval_t pte) 595pte_t xen_make_pte(pteval_t pte)
540{ 596{
541 pte = pte_pfn_to_mfn(pte); 597 phys_addr_t addr = (pte & PTE_PFN_MASK);
598
599 /*
600 * Unprivileged domains are allowed to do IOMAPpings for
601 * PCI passthrough, but not map ISA space. The ISA
602 * mappings are just dummy local mappings to keep other
603 * parts of the kernel happy.
604 */
605 if (unlikely(pte & _PAGE_IOMAP) &&
606 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
607 pte = iomap_pte(pte);
608 } else {
609 pte &= ~_PAGE_IOMAP;
610 pte = pte_pfn_to_mfn(pte);
611 }
612
542 return native_make_pte(pte); 613 return native_make_pte(pte);
543} 614}
544PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 615PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
@@ -594,6 +665,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
594 665
595void xen_set_pte(pte_t *ptep, pte_t pte) 666void xen_set_pte(pte_t *ptep, pte_t pte)
596{ 667{
668 if (xen_iomap_pte(pte)) {
669 xen_set_iomap_pte(ptep, pte);
670 return;
671 }
672
597 ADD_STATS(pte_update, 1); 673 ADD_STATS(pte_update, 1);
598// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); 674// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
599 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); 675 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
@@ -610,6 +686,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
610#ifdef CONFIG_X86_PAE 686#ifdef CONFIG_X86_PAE
611void xen_set_pte_atomic(pte_t *ptep, pte_t pte) 687void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
612{ 688{
689 if (xen_iomap_pte(pte)) {
690 xen_set_iomap_pte(ptep, pte);
691 return;
692 }
693
613 set_64bit((u64 *)ptep, native_pte_val(pte)); 694 set_64bit((u64 *)ptep, native_pte_val(pte));
614} 695}
615 696
@@ -936,8 +1017,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
936 read-only, and can be pinned. */ 1017 read-only, and can be pinned. */
937static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) 1018static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
938{ 1019{
939 vm_unmap_aliases();
940
941 xen_mc_batch(); 1020 xen_mc_batch();
942 1021
943 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { 1022 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
@@ -1501,7 +1580,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
1501 if (PagePinned(virt_to_page(mm->pgd))) { 1580 if (PagePinned(virt_to_page(mm->pgd))) {
1502 SetPagePinned(page); 1581 SetPagePinned(page);
1503 1582
1504 vm_unmap_aliases();
1505 if (!PageHighMem(page)) { 1583 if (!PageHighMem(page)) {
1506 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); 1584 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
1507 if (level == PT_PTE && USE_SPLIT_PTLOCKS) 1585 if (level == PT_PTE && USE_SPLIT_PTLOCKS)
@@ -1812,9 +1890,16 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1812 pte = pfn_pte(phys, prot); 1890 pte = pfn_pte(phys, prot);
1813 break; 1891 break;
1814 1892
1815 default: 1893 case FIX_PARAVIRT_BOOTMAP:
1894 /* This is an MFN, but it isn't an IO mapping from the
1895 IO domain */
1816 pte = mfn_pte(phys, prot); 1896 pte = mfn_pte(phys, prot);
1817 break; 1897 break;
1898
1899 default:
1900 /* By default, set_fixmap is used for hardware mappings */
1901 pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
1902 break;
1818 } 1903 }
1819 1904
1820 __native_set_fixmap(idx, pte); 1905 __native_set_fixmap(idx, pte);
@@ -1940,7 +2025,205 @@ void __init xen_init_mmu_ops(void)
1940 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; 2025 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
1941 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2026 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
1942 pv_mmu_ops = xen_mmu_ops; 2027 pv_mmu_ops = xen_mmu_ops;
2028
2029 vmap_lazy_unmap = false;
2030}
2031
2032/* Protected by xen_reservation_lock. */
2033#define MAX_CONTIG_ORDER 9 /* 2MB */
2034static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
2035
2036#define VOID_PTE (mfn_pte(0, __pgprot(0)))
2037static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2038 unsigned long *in_frames,
2039 unsigned long *out_frames)
2040{
2041 int i;
2042 struct multicall_space mcs;
2043
2044 xen_mc_batch();
2045 for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2046 mcs = __xen_mc_entry(0);
2047
2048 if (in_frames)
2049 in_frames[i] = virt_to_mfn(vaddr);
2050
2051 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2052 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2053
2054 if (out_frames)
2055 out_frames[i] = virt_to_pfn(vaddr);
2056 }
2057 xen_mc_issue(0);
2058}
2059
2060/*
2061 * Update the pfn-to-mfn mappings for a virtual address range, either to
2062 * point to an array of mfns, or contiguously from a single starting
2063 * mfn.
2064 */
2065static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2066 unsigned long *mfns,
2067 unsigned long first_mfn)
2068{
2069 unsigned i, limit;
2070 unsigned long mfn;
2071
2072 xen_mc_batch();
2073
2074 limit = 1u << order;
2075 for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2076 struct multicall_space mcs;
2077 unsigned flags;
2078
2079 mcs = __xen_mc_entry(0);
2080 if (mfns)
2081 mfn = mfns[i];
2082 else
2083 mfn = first_mfn + i;
2084
2085 if (i < (limit - 1))
2086 flags = 0;
2087 else {
2088 if (order == 0)
2089 flags = UVMF_INVLPG | UVMF_ALL;
2090 else
2091 flags = UVMF_TLB_FLUSH | UVMF_ALL;
2092 }
2093
2094 MULTI_update_va_mapping(mcs.mc, vaddr,
2095 mfn_pte(mfn, PAGE_KERNEL), flags);
2096
2097 set_phys_to_machine(virt_to_pfn(vaddr), mfn);
2098 }
2099
2100 xen_mc_issue(0);
2101}
2102
2103/*
2104 * Perform the hypercall to exchange a region of our pfns to point to
2105 * memory with the required contiguous alignment. Takes the pfns as
2106 * input, and populates mfns as output.
2107 *
2108 * Returns a success code indicating whether the hypervisor was able to
2109 * satisfy the request or not.
2110 */
2111static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2112 unsigned long *pfns_in,
2113 unsigned long extents_out,
2114 unsigned int order_out,
2115 unsigned long *mfns_out,
2116 unsigned int address_bits)
2117{
2118 long rc;
2119 int success;
2120
2121 struct xen_memory_exchange exchange = {
2122 .in = {
2123 .nr_extents = extents_in,
2124 .extent_order = order_in,
2125 .extent_start = pfns_in,
2126 .domid = DOMID_SELF
2127 },
2128 .out = {
2129 .nr_extents = extents_out,
2130 .extent_order = order_out,
2131 .extent_start = mfns_out,
2132 .address_bits = address_bits,
2133 .domid = DOMID_SELF
2134 }
2135 };
2136
2137 BUG_ON(extents_in << order_in != extents_out << order_out);
2138
2139 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2140 success = (exchange.nr_exchanged == extents_in);
2141
2142 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2143 BUG_ON(success && (rc != 0));
2144
2145 return success;
2146}
2147
2148int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
2149 unsigned int address_bits)
2150{
2151 unsigned long *in_frames = discontig_frames, out_frame;
2152 unsigned long flags;
2153 int success;
2154
2155 /*
2156 * Currently an auto-translated guest will not perform I/O, nor will
2157 * it require PAE page directories below 4GB. Therefore any calls to
2158 * this function are redundant and can be ignored.
2159 */
2160
2161 if (xen_feature(XENFEAT_auto_translated_physmap))
2162 return 0;
2163
2164 if (unlikely(order > MAX_CONTIG_ORDER))
2165 return -ENOMEM;
2166
2167 memset((void *) vstart, 0, PAGE_SIZE << order);
2168
2169 spin_lock_irqsave(&xen_reservation_lock, flags);
2170
2171 /* 1. Zap current PTEs, remembering MFNs. */
2172 xen_zap_pfn_range(vstart, order, in_frames, NULL);
2173
2174 /* 2. Get a new contiguous memory extent. */
2175 out_frame = virt_to_pfn(vstart);
2176 success = xen_exchange_memory(1UL << order, 0, in_frames,
2177 1, order, &out_frame,
2178 address_bits);
2179
2180 /* 3. Map the new extent in place of old pages. */
2181 if (success)
2182 xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2183 else
2184 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2185
2186 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2187
2188 return success ? 0 : -ENOMEM;
2189}
2190EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
2191
2192void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
2193{
2194 unsigned long *out_frames = discontig_frames, in_frame;
2195 unsigned long flags;
2196 int success;
2197
2198 if (xen_feature(XENFEAT_auto_translated_physmap))
2199 return;
2200
2201 if (unlikely(order > MAX_CONTIG_ORDER))
2202 return;
2203
2204 memset((void *) vstart, 0, PAGE_SIZE << order);
2205
2206 spin_lock_irqsave(&xen_reservation_lock, flags);
2207
2208 /* 1. Find start MFN of contiguous extent. */
2209 in_frame = virt_to_mfn(vstart);
2210
2211 /* 2. Zap current PTEs. */
2212 xen_zap_pfn_range(vstart, order, NULL, out_frames);
2213
2214 /* 3. Do the exchange for non-contiguous MFNs. */
2215 success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2216 0, out_frames, 0);
2217
2218 /* 4. Map new pages in place of old pages. */
2219 if (success)
2220 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2221 else
2222 xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2223
2224 spin_unlock_irqrestore(&xen_reservation_lock, flags);
1943} 2225}
2226EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
1944 2227
1945#ifdef CONFIG_XEN_PVHVM 2228#ifdef CONFIG_XEN_PVHVM
1946static void xen_hvm_exit_mmap(struct mm_struct *mm) 2229static void xen_hvm_exit_mmap(struct mm_struct *mm)