diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 14:46:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 14:46:48 -0400 |
commit | 752240e74d650faa24425adc523f1308973ea51c (patch) | |
tree | 47657b7d468352424f844156883302653252f70e /arch | |
parent | b8cb642af98216fe6eeca1525345b8a5c9d7c9a4 (diff) | |
parent | 626d7508664c4bc8e67f496da4387ecd0c410b8c (diff) |
Merge tag 'for-linus-4.3-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from David Vrabel:
"Xen features and fixes for 4.3:
- Convert xen-blkfront to the multiqueue API
- [arm] Support binding event channels to different VCPUs.
- [x86] Support > 512 GiB in a PV guests (off by default as such a
guest cannot be migrated with the current toolstack).
- [x86] PMU support for PV dom0 (limited support for using perf with
Xen and other guests)"
* tag 'for-linus-4.3-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (33 commits)
xen: switch extra memory accounting to use pfns
xen: limit memory to architectural maximum
xen: avoid another early crash of memory limited dom0
xen: avoid early crash of memory limited dom0
arm/xen: Remove helpers which are PV specific
xen/x86: Don't try to set PCE bit in CR4
xen/PMU: PMU emulation code
xen/PMU: Intercept PMU-related MSR and APIC accesses
xen/PMU: Describe vendor-specific PMU registers
xen/PMU: Initialization code for Xen PMU
xen/PMU: Sysfs interface for setting Xen PMU mode
xen: xensyms support
xen: remove no longer needed p2m.h
xen: allow more than 512 GB of RAM for 64 bit pv-domains
xen: move p2m list if conflicting with e820 map
xen: add explicit memblock_reserve() calls for special pages
mm: provide early_memremap_ro to establish read-only mapping
xen: check for initrd conflicting with e820 map
xen: check pre-allocated page tables for conflict with memory map
xen: check for kernel memory conflicting with memory layout
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/include/asm/xen/events.h | 6 | ||||
-rw-r--r-- | arch/arm/include/asm/xen/page.h | 16 | ||||
-rw-r--r-- | arch/arm/xen/enlighten.c | 7 | ||||
-rw-r--r-- | arch/arm64/include/asm/xen/events.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/events.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypercall.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/interface.h | 219 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/page.h | 8 | ||||
-rw-r--r-- | arch/x86/xen/Kconfig | 21 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/apic.c | 6 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 20 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 399 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 43 | ||||
-rw-r--r-- | arch/x86/xen/p2m.h | 15 | ||||
-rw-r--r-- | arch/x86/xen/platform-pci-unplug.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/pmu.c | 570 | ||||
-rw-r--r-- | arch/x86/xen/pmu.h | 15 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 496 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 29 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 23 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 7 |
23 files changed, 1679 insertions, 250 deletions
diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h index 8b1f37bfeeec..71e473d05fcc 100644 --- a/arch/arm/include/asm/xen/events.h +++ b/arch/arm/include/asm/xen/events.h | |||
@@ -20,4 +20,10 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) | |||
20 | atomic64_t, \ | 20 | atomic64_t, \ |
21 | counter), (val)) | 21 | counter), (val)) |
22 | 22 | ||
23 | /* Rebind event channel is supported by default */ | ||
24 | static inline bool xen_support_evtchn_rebind(void) | ||
25 | { | ||
26 | return true; | ||
27 | } | ||
28 | |||
23 | #endif /* _ASM_ARM_XEN_EVENTS_H */ | 29 | #endif /* _ASM_ARM_XEN_EVENTS_H */ |
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 1bee8ca12494..98b1084f8282 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h | |||
@@ -54,26 +54,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
54 | 54 | ||
55 | #define mfn_to_local_pfn(mfn) mfn_to_pfn(mfn) | 55 | #define mfn_to_local_pfn(mfn) mfn_to_pfn(mfn) |
56 | 56 | ||
57 | static inline xmaddr_t phys_to_machine(xpaddr_t phys) | ||
58 | { | ||
59 | unsigned offset = phys.paddr & ~PAGE_MASK; | ||
60 | return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); | ||
61 | } | ||
62 | |||
63 | static inline xpaddr_t machine_to_phys(xmaddr_t machine) | ||
64 | { | ||
65 | unsigned offset = machine.maddr & ~PAGE_MASK; | ||
66 | return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); | ||
67 | } | ||
68 | /* VIRT <-> MACHINE conversion */ | 57 | /* VIRT <-> MACHINE conversion */ |
69 | #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) | ||
70 | #define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) | 58 | #define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) |
71 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) | 59 | #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) |
72 | 60 | ||
61 | /* Only used in PV code. But ARM guests are always HVM. */ | ||
73 | static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr) | 62 | static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr) |
74 | { | 63 | { |
75 | /* TODO: assuming it is mapped in the kernel 1:1 */ | 64 | BUG(); |
76 | return virt_to_machine(vaddr); | ||
77 | } | 65 | } |
78 | 66 | ||
79 | /* TODO: this shouldn't be here but it is because the frontend drivers | 67 | /* TODO: this shouldn't be here but it is because the frontend drivers |
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 6c09cc440a2b..c50c8d33f874 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c | |||
@@ -45,13 +45,6 @@ static struct vcpu_info __percpu *xen_vcpu_info; | |||
45 | unsigned long xen_released_pages; | 45 | unsigned long xen_released_pages; |
46 | struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; | 46 | struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; |
47 | 47 | ||
48 | /* TODO: to be removed */ | ||
49 | __read_mostly int xen_have_vector_callback; | ||
50 | EXPORT_SYMBOL_GPL(xen_have_vector_callback); | ||
51 | |||
52 | int xen_platform_pci_unplug = XEN_UNPLUG_ALL; | ||
53 | EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); | ||
54 | |||
55 | static __read_mostly unsigned int xen_events_irq; | 48 | static __read_mostly unsigned int xen_events_irq; |
56 | 49 | ||
57 | static __initdata struct device_node *xen_node; | 50 | static __initdata struct device_node *xen_node; |
diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h index 86553213c132..4318866d053c 100644 --- a/arch/arm64/include/asm/xen/events.h +++ b/arch/arm64/include/asm/xen/events.h | |||
@@ -18,4 +18,10 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) | |||
18 | 18 | ||
19 | #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) | 19 | #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) |
20 | 20 | ||
21 | /* Rebind event channel is supported by default */ | ||
22 | static inline bool xen_support_evtchn_rebind(void) | ||
23 | { | ||
24 | return true; | ||
25 | } | ||
26 | |||
21 | #endif /* _ASM_ARM64_XEN_EVENTS_H */ | 27 | #endif /* _ASM_ARM64_XEN_EVENTS_H */ |
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 608a79d5a466..e6911caf5bbf 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h | |||
@@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) | |||
20 | /* No need for a barrier -- XCHG is a barrier on x86. */ | 20 | /* No need for a barrier -- XCHG is a barrier on x86. */ |
21 | #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) | 21 | #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) |
22 | 22 | ||
23 | extern int xen_have_vector_callback; | ||
24 | |||
25 | /* | ||
26 | * Events delivered via platform PCI interrupts are always | ||
27 | * routed to vcpu 0 and hence cannot be rebound. | ||
28 | */ | ||
29 | static inline bool xen_support_evtchn_rebind(void) | ||
30 | { | ||
31 | return (!xen_hvm_domain() || xen_have_vector_callback); | ||
32 | } | ||
33 | |||
23 | #endif /* _ASM_X86_XEN_EVENTS_H */ | 34 | #endif /* _ASM_X86_XEN_EVENTS_H */ |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index ca08a27b90b3..83aea8055119 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -465,6 +465,12 @@ HYPERVISOR_tmem_op( | |||
465 | return _hypercall1(int, tmem_op, op); | 465 | return _hypercall1(int, tmem_op, op); |
466 | } | 466 | } |
467 | 467 | ||
468 | static inline int | ||
469 | HYPERVISOR_xenpmu_op(unsigned int op, void *arg) | ||
470 | { | ||
471 | return _hypercall2(int, xenpmu_op, op, arg); | ||
472 | } | ||
473 | |||
468 | static inline void | 474 | static inline void |
469 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) | 475 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) |
470 | { | 476 | { |
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 3400dbaec3c3..62ca03ef5c65 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h | |||
@@ -3,12 +3,38 @@ | |||
3 | * | 3 | * |
4 | * Guest OS interface to x86 Xen. | 4 | * Guest OS interface to x86 Xen. |
5 | * | 5 | * |
6 | * Copyright (c) 2004, K A Fraser | 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | * of this software and associated documentation files (the "Software"), to | ||
8 | * deal in the Software without restriction, including without limitation the | ||
9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
10 | * sell copies of the Software, and to permit persons to whom the Software is | ||
11 | * furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | * | ||
24 | * Copyright (c) 2004-2006, K A Fraser | ||
7 | */ | 25 | */ |
8 | 26 | ||
9 | #ifndef _ASM_X86_XEN_INTERFACE_H | 27 | #ifndef _ASM_X86_XEN_INTERFACE_H |
10 | #define _ASM_X86_XEN_INTERFACE_H | 28 | #define _ASM_X86_XEN_INTERFACE_H |
11 | 29 | ||
30 | /* | ||
31 | * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field | ||
32 | * in a struct in memory. | ||
33 | * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an | ||
34 | * hypercall argument. | ||
35 | * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but | ||
36 | * they might not be on other architectures. | ||
37 | */ | ||
12 | #ifdef __XEN__ | 38 | #ifdef __XEN__ |
13 | #define __DEFINE_GUEST_HANDLE(name, type) \ | 39 | #define __DEFINE_GUEST_HANDLE(name, type) \ |
14 | typedef struct { type *p; } __guest_handle_ ## name | 40 | typedef struct { type *p; } __guest_handle_ ## name |
@@ -88,13 +114,16 @@ DEFINE_GUEST_HANDLE(xen_ulong_t); | |||
88 | * start of the GDT because some stupid OSes export hard-coded selector values | 114 | * start of the GDT because some stupid OSes export hard-coded selector values |
89 | * in their ABI. These hard-coded values are always near the start of the GDT, | 115 | * in their ABI. These hard-coded values are always near the start of the GDT, |
90 | * so Xen places itself out of the way, at the far end of the GDT. | 116 | * so Xen places itself out of the way, at the far end of the GDT. |
117 | * | ||
118 | * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op | ||
91 | */ | 119 | */ |
92 | #define FIRST_RESERVED_GDT_PAGE 14 | 120 | #define FIRST_RESERVED_GDT_PAGE 14 |
93 | #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) | 121 | #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) |
94 | #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) | 122 | #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) |
95 | 123 | ||
96 | /* | 124 | /* |
97 | * Send an array of these to HYPERVISOR_set_trap_table() | 125 | * Send an array of these to HYPERVISOR_set_trap_table(). |
126 | * Terminate the array with a sentinel entry, with traps[].address==0. | ||
98 | * The privilege level specifies which modes may enter a trap via a software | 127 | * The privilege level specifies which modes may enter a trap via a software |
99 | * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate | 128 | * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate |
100 | * privilege levels as follows: | 129 | * privilege levels as follows: |
@@ -118,10 +147,41 @@ struct trap_info { | |||
118 | DEFINE_GUEST_HANDLE_STRUCT(trap_info); | 147 | DEFINE_GUEST_HANDLE_STRUCT(trap_info); |
119 | 148 | ||
120 | struct arch_shared_info { | 149 | struct arch_shared_info { |
121 | unsigned long max_pfn; /* max pfn that appears in table */ | 150 | /* |
122 | /* Frame containing list of mfns containing list of mfns containing p2m. */ | 151 | * Number of valid entries in the p2m table(s) anchored at |
123 | unsigned long pfn_to_mfn_frame_list_list; | 152 | * pfn_to_mfn_frame_list_list and/or p2m_vaddr. |
124 | unsigned long nmi_reason; | 153 | */ |
154 | unsigned long max_pfn; | ||
155 | /* | ||
156 | * Frame containing list of mfns containing list of mfns containing p2m. | ||
157 | * A value of 0 indicates it has not yet been set up, ~0 indicates it | ||
158 | * has been set to invalid e.g. due to the p2m being too large for the | ||
159 | * 3-level p2m tree. In this case the linear mapper p2m list anchored | ||
160 | * at p2m_vaddr is to be used. | ||
161 | */ | ||
162 | xen_pfn_t pfn_to_mfn_frame_list_list; | ||
163 | unsigned long nmi_reason; | ||
164 | /* | ||
165 | * Following three fields are valid if p2m_cr3 contains a value | ||
166 | * different from 0. | ||
167 | * p2m_cr3 is the root of the address space where p2m_vaddr is valid. | ||
168 | * p2m_cr3 is in the same format as a cr3 value in the vcpu register | ||
169 | * state and holds the folded machine frame number (via xen_pfn_to_cr3) | ||
170 | * of a L3 or L4 page table. | ||
171 | * p2m_vaddr holds the virtual address of the linear p2m list. All | ||
172 | * entries in the range [0...max_pfn[ are accessible via this pointer. | ||
173 | * p2m_generation will be incremented by the guest before and after each | ||
174 | * change of the mappings of the p2m list. p2m_generation starts at 0 | ||
175 | * and a value with the least significant bit set indicates that a | ||
176 | * mapping update is in progress. This allows guest external software | ||
177 | * (e.g. in Dom0) to verify that read mappings are consistent and | ||
178 | * whether they have changed since the last check. | ||
179 | * Modifying a p2m element in the linear p2m list is allowed via an | ||
180 | * atomic write only. | ||
181 | */ | ||
182 | unsigned long p2m_cr3; /* cr3 value of the p2m address space */ | ||
183 | unsigned long p2m_vaddr; /* virtual address of the p2m list */ | ||
184 | unsigned long p2m_generation; /* generation count of p2m mapping */ | ||
125 | }; | 185 | }; |
126 | #endif /* !__ASSEMBLY__ */ | 186 | #endif /* !__ASSEMBLY__ */ |
127 | 187 | ||
@@ -137,13 +197,31 @@ struct arch_shared_info { | |||
137 | /* | 197 | /* |
138 | * The following is all CPU context. Note that the fpu_ctxt block is filled | 198 | * The following is all CPU context. Note that the fpu_ctxt block is filled |
139 | * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. | 199 | * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. |
200 | * | ||
201 | * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise | ||
202 | * for HVM and PVH guests, not all information in this structure is updated: | ||
203 | * | ||
204 | * - For HVM guests, the structures read include: fpu_ctxt (if | ||
205 | * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] | ||
206 | * | ||
207 | * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to | ||
208 | * set cr3. All other fields not used should be set to 0. | ||
140 | */ | 209 | */ |
141 | struct vcpu_guest_context { | 210 | struct vcpu_guest_context { |
142 | /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ | 211 | /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ |
143 | struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ | 212 | struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ |
144 | #define VGCF_I387_VALID (1<<0) | 213 | #define VGCF_I387_VALID (1<<0) |
145 | #define VGCF_HVM_GUEST (1<<1) | 214 | #define VGCF_IN_KERNEL (1<<2) |
146 | #define VGCF_IN_KERNEL (1<<2) | 215 | #define _VGCF_i387_valid 0 |
216 | #define VGCF_i387_valid (1<<_VGCF_i387_valid) | ||
217 | #define _VGCF_in_kernel 2 | ||
218 | #define VGCF_in_kernel (1<<_VGCF_in_kernel) | ||
219 | #define _VGCF_failsafe_disables_events 3 | ||
220 | #define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) | ||
221 | #define _VGCF_syscall_disables_events 4 | ||
222 | #define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) | ||
223 | #define _VGCF_online 5 | ||
224 | #define VGCF_online (1<<_VGCF_online) | ||
147 | unsigned long flags; /* VGCF_* flags */ | 225 | unsigned long flags; /* VGCF_* flags */ |
148 | struct cpu_user_regs user_regs; /* User-level CPU registers */ | 226 | struct cpu_user_regs user_regs; /* User-level CPU registers */ |
149 | struct trap_info trap_ctxt[256]; /* Virtual IDT */ | 227 | struct trap_info trap_ctxt[256]; /* Virtual IDT */ |
@@ -172,6 +250,129 @@ struct vcpu_guest_context { | |||
172 | #endif | 250 | #endif |
173 | }; | 251 | }; |
174 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); | 252 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); |
253 | |||
254 | /* AMD PMU registers and structures */ | ||
255 | struct xen_pmu_amd_ctxt { | ||
256 | /* | ||
257 | * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). | ||
258 | * For PV(H) guests these fields are RO. | ||
259 | */ | ||
260 | uint32_t counters; | ||
261 | uint32_t ctrls; | ||
262 | |||
263 | /* Counter MSRs */ | ||
264 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | ||
265 | uint64_t regs[]; | ||
266 | #elif defined(__GNUC__) | ||
267 | uint64_t regs[0]; | ||
268 | #endif | ||
269 | }; | ||
270 | |||
271 | /* Intel PMU registers and structures */ | ||
272 | struct xen_pmu_cntr_pair { | ||
273 | uint64_t counter; | ||
274 | uint64_t control; | ||
275 | }; | ||
276 | |||
277 | struct xen_pmu_intel_ctxt { | ||
278 | /* | ||
279 | * Offsets to fixed and architectural counter MSRs (relative to | ||
280 | * xen_pmu_arch.c.intel). | ||
281 | * For PV(H) guests these fields are RO. | ||
282 | */ | ||
283 | uint32_t fixed_counters; | ||
284 | uint32_t arch_counters; | ||
285 | |||
286 | /* PMU registers */ | ||
287 | uint64_t global_ctrl; | ||
288 | uint64_t global_ovf_ctrl; | ||
289 | uint64_t global_status; | ||
290 | uint64_t fixed_ctrl; | ||
291 | uint64_t ds_area; | ||
292 | uint64_t pebs_enable; | ||
293 | uint64_t debugctl; | ||
294 | |||
295 | /* Fixed and architectural counter MSRs */ | ||
296 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | ||
297 | uint64_t regs[]; | ||
298 | #elif defined(__GNUC__) | ||
299 | uint64_t regs[0]; | ||
300 | #endif | ||
301 | }; | ||
302 | |||
303 | /* Sampled domain's registers */ | ||
304 | struct xen_pmu_regs { | ||
305 | uint64_t ip; | ||
306 | uint64_t sp; | ||
307 | uint64_t flags; | ||
308 | uint16_t cs; | ||
309 | uint16_t ss; | ||
310 | uint8_t cpl; | ||
311 | uint8_t pad[3]; | ||
312 | }; | ||
313 | |||
314 | /* PMU flags */ | ||
315 | #define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ | ||
316 | #define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ | ||
317 | #define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ | ||
318 | #define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ | ||
319 | |||
320 | /* | ||
321 | * Architecture-specific information describing state of the processor at | ||
322 | * the time of PMU interrupt. | ||
323 | * Fields of this structure marked as RW for guest should only be written by | ||
324 | * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the | ||
325 | * hypervisor during PMU interrupt). Hypervisor will read updated data in | ||
326 | * XENPMU_flush hypercall and clear PMU_CACHED bit. | ||
327 | */ | ||
328 | struct xen_pmu_arch { | ||
329 | union { | ||
330 | /* | ||
331 | * Processor's registers at the time of interrupt. | ||
332 | * WO for hypervisor, RO for guests. | ||
333 | */ | ||
334 | struct xen_pmu_regs regs; | ||
335 | /* | ||
336 | * Padding for adding new registers to xen_pmu_regs in | ||
337 | * the future | ||
338 | */ | ||
339 | #define XENPMU_REGS_PAD_SZ 64 | ||
340 | uint8_t pad[XENPMU_REGS_PAD_SZ]; | ||
341 | } r; | ||
342 | |||
343 | /* WO for hypervisor, RO for guest */ | ||
344 | uint64_t pmu_flags; | ||
345 | |||
346 | /* | ||
347 | * APIC LVTPC register. | ||
348 | * RW for both hypervisor and guest. | ||
349 | * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware | ||
350 | * during XENPMU_flush or XENPMU_lvtpc_set. | ||
351 | */ | ||
352 | union { | ||
353 | uint32_t lapic_lvtpc; | ||
354 | uint64_t pad; | ||
355 | } l; | ||
356 | |||
357 | /* | ||
358 | * Vendor-specific PMU registers. | ||
359 | * RW for both hypervisor and guest (see exceptions above). | ||
360 | * Guest's updates to this field are verified and then loaded by the | ||
361 | * hypervisor into hardware during XENPMU_flush | ||
362 | */ | ||
363 | union { | ||
364 | struct xen_pmu_amd_ctxt amd; | ||
365 | struct xen_pmu_intel_ctxt intel; | ||
366 | |||
367 | /* | ||
368 | * Padding for contexts (fixed parts only, does not include | ||
369 | * MSR banks that are specified by offsets) | ||
370 | */ | ||
371 | #define XENPMU_CTXT_PAD_SZ 128 | ||
372 | uint8_t pad[XENPMU_CTXT_PAD_SZ]; | ||
373 | } c; | ||
374 | }; | ||
375 | |||
175 | #endif /* !__ASSEMBLY__ */ | 376 | #endif /* !__ASSEMBLY__ */ |
176 | 377 | ||
177 | /* | 378 | /* |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c44a5d53e464..a3804fbe1f36 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -35,9 +35,7 @@ typedef struct xpaddr { | |||
35 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) | 35 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) |
36 | #define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) | 36 | #define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) |
37 | 37 | ||
38 | /* Maximum amount of memory we can handle in a domain in pages */ | 38 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
39 | #define MAX_DOMAIN_PAGES \ | ||
40 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) | ||
41 | 39 | ||
42 | extern unsigned long *machine_to_phys_mapping; | 40 | extern unsigned long *machine_to_phys_mapping; |
43 | extern unsigned long machine_to_phys_nr; | 41 | extern unsigned long machine_to_phys_nr; |
@@ -48,8 +46,8 @@ extern unsigned long xen_max_p2m_pfn; | |||
48 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 46 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
49 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 47 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
50 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 48 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
51 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, | 49 | extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
52 | unsigned long pfn_e); | 50 | unsigned long pfn_e); |
53 | 51 | ||
54 | extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, | 52 | extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
55 | struct gnttab_map_grant_ref *kmap_ops, | 53 | struct gnttab_map_grant_ref *kmap_ops, |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 484145368a24..c7b15f3e2cf3 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -7,6 +7,7 @@ config XEN | |||
7 | depends on PARAVIRT | 7 | depends on PARAVIRT |
8 | select PARAVIRT_CLOCK | 8 | select PARAVIRT_CLOCK |
9 | select XEN_HAVE_PVMMU | 9 | select XEN_HAVE_PVMMU |
10 | select XEN_HAVE_VPMU | ||
10 | depends on X86_64 || (X86_32 && X86_PAE) | 11 | depends on X86_64 || (X86_32 && X86_PAE) |
11 | depends on X86_LOCAL_APIC && X86_TSC | 12 | depends on X86_LOCAL_APIC && X86_TSC |
12 | help | 13 | help |
@@ -23,14 +24,18 @@ config XEN_PVHVM | |||
23 | def_bool y | 24 | def_bool y |
24 | depends on XEN && PCI && X86_LOCAL_APIC | 25 | depends on XEN && PCI && X86_LOCAL_APIC |
25 | 26 | ||
26 | config XEN_MAX_DOMAIN_MEMORY | 27 | config XEN_512GB |
27 | int | 28 | bool "Limit Xen pv-domain memory to 512GB" |
28 | default 500 if X86_64 | 29 | depends on XEN && X86_64 |
29 | default 64 if X86_32 | 30 | default y |
30 | depends on XEN | 31 | help |
31 | help | 32 | Limit paravirtualized user domains to 512GB of RAM. |
32 | This only affects the sizing of some bss arrays, the unused | 33 | |
33 | portions of which are freed. | 34 | The Xen tools and crash dump analysis tools might not support |
35 | pv-domains with more than 512 GB of RAM. This option controls the | ||
36 | default setting of the kernel to use only up to 512 GB or more. | ||
37 | It is always possible to change the default via specifying the | ||
38 | boot parameter "xen_512gb_limit". | ||
34 | 39 | ||
35 | config XEN_SAVE_RESTORE | 40 | config XEN_SAVE_RESTORE |
36 | bool | 41 | bool |
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 4b6e29ac0968..e47e52787d32 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp) | |||
13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
14 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
15 | grant-table.o suspend.o platform-pci-unplug.o \ | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o apic.o | 16 | p2m.o apic.o pmu.o |
17 | 17 | ||
18 | obj-$(CONFIG_EVENT_TRACING) += trace.o | 18 | obj-$(CONFIG_EVENT_TRACING) += trace.o |
19 | 19 | ||
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 70e060ad879a..acda713ab5be 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <xen/xen.h> | 7 | #include <xen/xen.h> |
8 | #include <xen/interface/physdev.h> | 8 | #include <xen/interface/physdev.h> |
9 | #include "xen-ops.h" | 9 | #include "xen-ops.h" |
10 | #include "pmu.h" | ||
10 | #include "smp.h" | 11 | #include "smp.h" |
11 | 12 | ||
12 | static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) | 13 | static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) |
@@ -72,6 +73,11 @@ static u32 xen_apic_read(u32 reg) | |||
72 | 73 | ||
73 | static void xen_apic_write(u32 reg, u32 val) | 74 | static void xen_apic_write(u32 reg, u32 val) |
74 | { | 75 | { |
76 | if (reg == APIC_LVTPC) { | ||
77 | (void)pmu_apic_update(reg); | ||
78 | return; | ||
79 | } | ||
80 | |||
75 | /* Warn to see if there's any stray references */ | 81 | /* Warn to see if there's any stray references */ |
76 | WARN(1,"register: %x, value: %x\n", reg, val); | 82 | WARN(1,"register: %x, value: %x\n", reg, val); |
77 | } | 83 | } |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d9cfa452da9d..30d12afe52ed 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -84,6 +84,7 @@ | |||
84 | #include "mmu.h" | 84 | #include "mmu.h" |
85 | #include "smp.h" | 85 | #include "smp.h" |
86 | #include "multicalls.h" | 86 | #include "multicalls.h" |
87 | #include "pmu.h" | ||
87 | 88 | ||
88 | EXPORT_SYMBOL_GPL(hypercall_page); | 89 | EXPORT_SYMBOL_GPL(hypercall_page); |
89 | 90 | ||
@@ -1010,8 +1011,7 @@ static void xen_write_cr0(unsigned long cr0) | |||
1010 | 1011 | ||
1011 | static void xen_write_cr4(unsigned long cr4) | 1012 | static void xen_write_cr4(unsigned long cr4) |
1012 | { | 1013 | { |
1013 | cr4 &= ~X86_CR4_PGE; | 1014 | cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE); |
1014 | cr4 &= ~X86_CR4_PSE; | ||
1015 | 1015 | ||
1016 | native_write_cr4(cr4); | 1016 | native_write_cr4(cr4); |
1017 | } | 1017 | } |
@@ -1030,6 +1030,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) | |||
1030 | { | 1030 | { |
1031 | u64 val; | 1031 | u64 val; |
1032 | 1032 | ||
1033 | if (pmu_msr_read(msr, &val, err)) | ||
1034 | return val; | ||
1035 | |||
1033 | val = native_read_msr_safe(msr, err); | 1036 | val = native_read_msr_safe(msr, err); |
1034 | switch (msr) { | 1037 | switch (msr) { |
1035 | case MSR_IA32_APICBASE: | 1038 | case MSR_IA32_APICBASE: |
@@ -1076,7 +1079,8 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
1076 | Xen console noise. */ | 1079 | Xen console noise. */ |
1077 | 1080 | ||
1078 | default: | 1081 | default: |
1079 | ret = native_write_msr_safe(msr, low, high); | 1082 | if (!pmu_msr_write(msr, low, high, &ret)) |
1083 | ret = native_write_msr_safe(msr, low, high); | ||
1080 | } | 1084 | } |
1081 | 1085 | ||
1082 | return ret; | 1086 | return ret; |
@@ -1215,7 +1219,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { | |||
1215 | .read_msr = xen_read_msr_safe, | 1219 | .read_msr = xen_read_msr_safe, |
1216 | .write_msr = xen_write_msr_safe, | 1220 | .write_msr = xen_write_msr_safe, |
1217 | 1221 | ||
1218 | .read_pmc = native_read_pmc, | 1222 | .read_pmc = xen_read_pmc, |
1219 | 1223 | ||
1220 | .iret = xen_iret, | 1224 | .iret = xen_iret, |
1221 | #ifdef CONFIG_X86_64 | 1225 | #ifdef CONFIG_X86_64 |
@@ -1264,6 +1268,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = { | |||
1264 | static void xen_reboot(int reason) | 1268 | static void xen_reboot(int reason) |
1265 | { | 1269 | { |
1266 | struct sched_shutdown r = { .reason = reason }; | 1270 | struct sched_shutdown r = { .reason = reason }; |
1271 | int cpu; | ||
1272 | |||
1273 | for_each_online_cpu(cpu) | ||
1274 | xen_pmu_finish(cpu); | ||
1267 | 1275 | ||
1268 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) | 1276 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) |
1269 | BUG(); | 1277 | BUG(); |
@@ -1607,7 +1615,9 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1607 | early_boot_irqs_disabled = true; | 1615 | early_boot_irqs_disabled = true; |
1608 | 1616 | ||
1609 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1617 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1610 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); | 1618 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, |
1619 | xen_start_info->nr_pages); | ||
1620 | xen_reserve_special_pages(); | ||
1611 | 1621 | ||
1612 | /* | 1622 | /* |
1613 | * Modify the cache mode translation tables to match Xen's PAT | 1623 | * Modify the cache mode translation tables to match Xen's PAT |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index dd151b2045b0..2c50b445884e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -116,6 +116,7 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; | |||
116 | DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ | 116 | DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ |
117 | DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | 117 | DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ |
118 | 118 | ||
119 | static phys_addr_t xen_pt_base, xen_pt_size __initdata; | ||
119 | 120 | ||
120 | /* | 121 | /* |
121 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | 122 | * Just beyond the highest usermode address. STACK_TOP_MAX has a |
@@ -1093,6 +1094,16 @@ static void xen_exit_mmap(struct mm_struct *mm) | |||
1093 | 1094 | ||
1094 | static void xen_post_allocator_init(void); | 1095 | static void xen_post_allocator_init(void); |
1095 | 1096 | ||
1097 | static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | ||
1098 | { | ||
1099 | struct mmuext_op op; | ||
1100 | |||
1101 | op.cmd = cmd; | ||
1102 | op.arg1.mfn = pfn_to_mfn(pfn); | ||
1103 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
1104 | BUG(); | ||
1105 | } | ||
1106 | |||
1096 | #ifdef CONFIG_X86_64 | 1107 | #ifdef CONFIG_X86_64 |
1097 | static void __init xen_cleanhighmap(unsigned long vaddr, | 1108 | static void __init xen_cleanhighmap(unsigned long vaddr, |
1098 | unsigned long vaddr_end) | 1109 | unsigned long vaddr_end) |
@@ -1114,6 +1125,83 @@ static void __init xen_cleanhighmap(unsigned long vaddr, | |||
1114 | xen_mc_flush(); | 1125 | xen_mc_flush(); |
1115 | } | 1126 | } |
1116 | 1127 | ||
1128 | /* | ||
1129 | * Make a page range writeable and free it. | ||
1130 | */ | ||
1131 | static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size) | ||
1132 | { | ||
1133 | void *vaddr = __va(paddr); | ||
1134 | void *vaddr_end = vaddr + size; | ||
1135 | |||
1136 | for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) | ||
1137 | make_lowmem_page_readwrite(vaddr); | ||
1138 | |||
1139 | memblock_free(paddr, size); | ||
1140 | } | ||
1141 | |||
1142 | static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin) | ||
1143 | { | ||
1144 | unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK; | ||
1145 | |||
1146 | if (unpin) | ||
1147 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa)); | ||
1148 | ClearPagePinned(virt_to_page(__va(pa))); | ||
1149 | xen_free_ro_pages(pa, PAGE_SIZE); | ||
1150 | } | ||
1151 | |||
1152 | /* | ||
1153 | * Since it is well isolated we can (and since it is perhaps large we should) | ||
1154 | * also free the page tables mapping the initial P->M table. | ||
1155 | */ | ||
1156 | static void __init xen_cleanmfnmap(unsigned long vaddr) | ||
1157 | { | ||
1158 | unsigned long va = vaddr & PMD_MASK; | ||
1159 | unsigned long pa; | ||
1160 | pgd_t *pgd = pgd_offset_k(va); | ||
1161 | pud_t *pud_page = pud_offset(pgd, 0); | ||
1162 | pud_t *pud; | ||
1163 | pmd_t *pmd; | ||
1164 | pte_t *pte; | ||
1165 | unsigned int i; | ||
1166 | bool unpin; | ||
1167 | |||
1168 | unpin = (vaddr == 2 * PGDIR_SIZE); | ||
1169 | set_pgd(pgd, __pgd(0)); | ||
1170 | do { | ||
1171 | pud = pud_page + pud_index(va); | ||
1172 | if (pud_none(*pud)) { | ||
1173 | va += PUD_SIZE; | ||
1174 | } else if (pud_large(*pud)) { | ||
1175 | pa = pud_val(*pud) & PHYSICAL_PAGE_MASK; | ||
1176 | xen_free_ro_pages(pa, PUD_SIZE); | ||
1177 | va += PUD_SIZE; | ||
1178 | } else { | ||
1179 | pmd = pmd_offset(pud, va); | ||
1180 | if (pmd_large(*pmd)) { | ||
1181 | pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK; | ||
1182 | xen_free_ro_pages(pa, PMD_SIZE); | ||
1183 | } else if (!pmd_none(*pmd)) { | ||
1184 | pte = pte_offset_kernel(pmd, va); | ||
1185 | set_pmd(pmd, __pmd(0)); | ||
1186 | for (i = 0; i < PTRS_PER_PTE; ++i) { | ||
1187 | if (pte_none(pte[i])) | ||
1188 | break; | ||
1189 | pa = pte_pfn(pte[i]) << PAGE_SHIFT; | ||
1190 | xen_free_ro_pages(pa, PAGE_SIZE); | ||
1191 | } | ||
1192 | xen_cleanmfnmap_free_pgtbl(pte, unpin); | ||
1193 | } | ||
1194 | va += PMD_SIZE; | ||
1195 | if (pmd_index(va)) | ||
1196 | continue; | ||
1197 | set_pud(pud, __pud(0)); | ||
1198 | xen_cleanmfnmap_free_pgtbl(pmd, unpin); | ||
1199 | } | ||
1200 | |||
1201 | } while (pud_index(va) || pmd_index(va)); | ||
1202 | xen_cleanmfnmap_free_pgtbl(pud_page, unpin); | ||
1203 | } | ||
1204 | |||
1117 | static void __init xen_pagetable_p2m_free(void) | 1205 | static void __init xen_pagetable_p2m_free(void) |
1118 | { | 1206 | { |
1119 | unsigned long size; | 1207 | unsigned long size; |
@@ -1128,18 +1216,31 @@ static void __init xen_pagetable_p2m_free(void) | |||
1128 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | 1216 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
1129 | memset((void *)xen_start_info->mfn_list, 0xff, size); | 1217 | memset((void *)xen_start_info->mfn_list, 0xff, size); |
1130 | 1218 | ||
1131 | /* We should be in __ka space. */ | ||
1132 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | ||
1133 | addr = xen_start_info->mfn_list; | 1219 | addr = xen_start_info->mfn_list; |
1134 | /* We roundup to the PMD, which means that if anybody at this stage is | 1220 | /* |
1135 | * using the __ka address of xen_start_info or xen_start_info->shared_info | 1221 | * We could be in __ka space. |
1136 | * they are in going to crash. Fortunatly we have already revectored | 1222 | * We roundup to the PMD, which means that if anybody at this stage is |
1137 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | 1223 | * using the __ka address of xen_start_info or |
1224 | * xen_start_info->shared_info they are in going to crash. Fortunatly | ||
1225 | * we have already revectored in xen_setup_kernel_pagetable and in | ||
1226 | * xen_setup_shared_info. | ||
1227 | */ | ||
1138 | size = roundup(size, PMD_SIZE); | 1228 | size = roundup(size, PMD_SIZE); |
1139 | xen_cleanhighmap(addr, addr + size); | ||
1140 | 1229 | ||
1141 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1230 | if (addr >= __START_KERNEL_map) { |
1142 | memblock_free(__pa(xen_start_info->mfn_list), size); | 1231 | xen_cleanhighmap(addr, addr + size); |
1232 | size = PAGE_ALIGN(xen_start_info->nr_pages * | ||
1233 | sizeof(unsigned long)); | ||
1234 | memblock_free(__pa(addr), size); | ||
1235 | } else { | ||
1236 | xen_cleanmfnmap(addr); | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | static void __init xen_pagetable_cleanhighmap(void) | ||
1241 | { | ||
1242 | unsigned long size; | ||
1243 | unsigned long addr; | ||
1143 | 1244 | ||
1144 | /* At this stage, cleanup_highmap has already cleaned __ka space | 1245 | /* At this stage, cleanup_highmap has already cleaned __ka space |
1145 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | 1246 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
@@ -1172,6 +1273,8 @@ static void __init xen_pagetable_p2m_setup(void) | |||
1172 | 1273 | ||
1173 | #ifdef CONFIG_X86_64 | 1274 | #ifdef CONFIG_X86_64 |
1174 | xen_pagetable_p2m_free(); | 1275 | xen_pagetable_p2m_free(); |
1276 | |||
1277 | xen_pagetable_cleanhighmap(); | ||
1175 | #endif | 1278 | #endif |
1176 | /* And revector! Bye bye old array */ | 1279 | /* And revector! Bye bye old array */ |
1177 | xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; | 1280 | xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; |
@@ -1461,6 +1564,24 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1461 | #else /* CONFIG_X86_64 */ | 1564 | #else /* CONFIG_X86_64 */ |
1462 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | 1565 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) |
1463 | { | 1566 | { |
1567 | unsigned long pfn; | ||
1568 | |||
1569 | if (xen_feature(XENFEAT_writable_page_tables) || | ||
1570 | xen_feature(XENFEAT_auto_translated_physmap) || | ||
1571 | xen_start_info->mfn_list >= __START_KERNEL_map) | ||
1572 | return pte; | ||
1573 | |||
1574 | /* | ||
1575 | * Pages belonging to the initial p2m list mapped outside the default | ||
1576 | * address range must be mapped read-only. This region contains the | ||
1577 | * page tables for mapping the p2m list, too, and page tables MUST be | ||
1578 | * mapped read-only. | ||
1579 | */ | ||
1580 | pfn = pte_pfn(pte); | ||
1581 | if (pfn >= xen_start_info->first_p2m_pfn && | ||
1582 | pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) | ||
1583 | pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW); | ||
1584 | |||
1464 | return pte; | 1585 | return pte; |
1465 | } | 1586 | } |
1466 | #endif /* CONFIG_X86_64 */ | 1587 | #endif /* CONFIG_X86_64 */ |
@@ -1489,15 +1610,6 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
1489 | native_set_pte(ptep, pte); | 1610 | native_set_pte(ptep, pte); |
1490 | } | 1611 | } |
1491 | 1612 | ||
1492 | static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | ||
1493 | { | ||
1494 | struct mmuext_op op; | ||
1495 | op.cmd = cmd; | ||
1496 | op.arg1.mfn = pfn_to_mfn(pfn); | ||
1497 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
1498 | BUG(); | ||
1499 | } | ||
1500 | |||
1501 | /* Early in boot, while setting up the initial pagetable, assume | 1613 | /* Early in boot, while setting up the initial pagetable, assume |
1502 | everything is pinned. */ | 1614 | everything is pinned. */ |
1503 | static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | 1615 | static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) |
@@ -1815,7 +1927,10 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1815 | * mappings. Considering that on Xen after the kernel mappings we | 1927 | * mappings. Considering that on Xen after the kernel mappings we |
1816 | * have the mappings of some pages that don't exist in pfn space, we | 1928 | * have the mappings of some pages that don't exist in pfn space, we |
1817 | * set max_pfn_mapped to the last real pfn mapped. */ | 1929 | * set max_pfn_mapped to the last real pfn mapped. */ |
1818 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); | 1930 | if (xen_start_info->mfn_list < __START_KERNEL_map) |
1931 | max_pfn_mapped = xen_start_info->first_p2m_pfn; | ||
1932 | else | ||
1933 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); | ||
1819 | 1934 | ||
1820 | pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); | 1935 | pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); |
1821 | pt_end = pt_base + xen_start_info->nr_pt_frames; | 1936 | pt_end = pt_base + xen_start_info->nr_pt_frames; |
@@ -1855,6 +1970,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1855 | /* Graft it onto L4[511][510] */ | 1970 | /* Graft it onto L4[511][510] */ |
1856 | copy_page(level2_kernel_pgt, l2); | 1971 | copy_page(level2_kernel_pgt, l2); |
1857 | 1972 | ||
1973 | /* Copy the initial P->M table mappings if necessary. */ | ||
1974 | i = pgd_index(xen_start_info->mfn_list); | ||
1975 | if (i && i < pgd_index(__START_KERNEL_map)) | ||
1976 | init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; | ||
1977 | |||
1858 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 1978 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
1859 | /* Make pagetable pieces RO */ | 1979 | /* Make pagetable pieces RO */ |
1860 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1980 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); |
@@ -1894,10 +2014,192 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1894 | check_pt_base(&pt_base, &pt_end, addr[i]); | 2014 | check_pt_base(&pt_base, &pt_end, addr[i]); |
1895 | 2015 | ||
1896 | /* Our (by three pages) smaller Xen pagetable that we are using */ | 2016 | /* Our (by three pages) smaller Xen pagetable that we are using */ |
1897 | memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); | 2017 | xen_pt_base = PFN_PHYS(pt_base); |
2018 | xen_pt_size = (pt_end - pt_base) * PAGE_SIZE; | ||
2019 | memblock_reserve(xen_pt_base, xen_pt_size); | ||
2020 | |||
1898 | /* Revector the xen_start_info */ | 2021 | /* Revector the xen_start_info */ |
1899 | xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); | 2022 | xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); |
1900 | } | 2023 | } |
2024 | |||
2025 | /* | ||
2026 | * Read a value from a physical address. | ||
2027 | */ | ||
2028 | static unsigned long __init xen_read_phys_ulong(phys_addr_t addr) | ||
2029 | { | ||
2030 | unsigned long *vaddr; | ||
2031 | unsigned long val; | ||
2032 | |||
2033 | vaddr = early_memremap_ro(addr, sizeof(val)); | ||
2034 | val = *vaddr; | ||
2035 | early_memunmap(vaddr, sizeof(val)); | ||
2036 | return val; | ||
2037 | } | ||
2038 | |||
2039 | /* | ||
2040 | * Translate a virtual address to a physical one without relying on mapped | ||
2041 | * page tables. | ||
2042 | */ | ||
2043 | static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) | ||
2044 | { | ||
2045 | phys_addr_t pa; | ||
2046 | pgd_t pgd; | ||
2047 | pud_t pud; | ||
2048 | pmd_t pmd; | ||
2049 | pte_t pte; | ||
2050 | |||
2051 | pa = read_cr3(); | ||
2052 | pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) * | ||
2053 | sizeof(pgd))); | ||
2054 | if (!pgd_present(pgd)) | ||
2055 | return 0; | ||
2056 | |||
2057 | pa = pgd_val(pgd) & PTE_PFN_MASK; | ||
2058 | pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) * | ||
2059 | sizeof(pud))); | ||
2060 | if (!pud_present(pud)) | ||
2061 | return 0; | ||
2062 | pa = pud_pfn(pud) << PAGE_SHIFT; | ||
2063 | if (pud_large(pud)) | ||
2064 | return pa + (vaddr & ~PUD_MASK); | ||
2065 | |||
2066 | pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) * | ||
2067 | sizeof(pmd))); | ||
2068 | if (!pmd_present(pmd)) | ||
2069 | return 0; | ||
2070 | pa = pmd_pfn(pmd) << PAGE_SHIFT; | ||
2071 | if (pmd_large(pmd)) | ||
2072 | return pa + (vaddr & ~PMD_MASK); | ||
2073 | |||
2074 | pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) * | ||
2075 | sizeof(pte))); | ||
2076 | if (!pte_present(pte)) | ||
2077 | return 0; | ||
2078 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
2079 | |||
2080 | return pa | (vaddr & ~PAGE_MASK); | ||
2081 | } | ||
2082 | |||
2083 | /* | ||
2084 | * Find a new area for the hypervisor supplied p2m list and relocate the p2m to | ||
2085 | * this area. | ||
2086 | */ | ||
2087 | void __init xen_relocate_p2m(void) | ||
2088 | { | ||
2089 | phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys; | ||
2090 | unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; | ||
2091 | int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud; | ||
2092 | pte_t *pt; | ||
2093 | pmd_t *pmd; | ||
2094 | pud_t *pud; | ||
2095 | pgd_t *pgd; | ||
2096 | unsigned long *new_p2m; | ||
2097 | |||
2098 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
2099 | n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT; | ||
2100 | n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; | ||
2101 | n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; | ||
2102 | n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT; | ||
2103 | n_frames = n_pte + n_pt + n_pmd + n_pud; | ||
2104 | |||
2105 | new_area = xen_find_free_area(PFN_PHYS(n_frames)); | ||
2106 | if (!new_area) { | ||
2107 | xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n"); | ||
2108 | BUG(); | ||
2109 | } | ||
2110 | |||
2111 | /* | ||
2112 | * Setup the page tables for addressing the new p2m list. | ||
2113 | * We have asked the hypervisor to map the p2m list at the user address | ||
2114 | * PUD_SIZE. It may have done so, or it may have used a kernel space | ||
2115 | * address depending on the Xen version. | ||
2116 | * To avoid any possible virtual address collision, just use | ||
2117 | * 2 * PUD_SIZE for the new area. | ||
2118 | */ | ||
2119 | pud_phys = new_area; | ||
2120 | pmd_phys = pud_phys + PFN_PHYS(n_pud); | ||
2121 | pt_phys = pmd_phys + PFN_PHYS(n_pmd); | ||
2122 | p2m_pfn = PFN_DOWN(pt_phys) + n_pt; | ||
2123 | |||
2124 | pgd = __va(read_cr3()); | ||
2125 | new_p2m = (unsigned long *)(2 * PGDIR_SIZE); | ||
2126 | for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { | ||
2127 | pud = early_memremap(pud_phys, PAGE_SIZE); | ||
2128 | clear_page(pud); | ||
2129 | for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); | ||
2130 | idx_pmd++) { | ||
2131 | pmd = early_memremap(pmd_phys, PAGE_SIZE); | ||
2132 | clear_page(pmd); | ||
2133 | for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); | ||
2134 | idx_pt++) { | ||
2135 | pt = early_memremap(pt_phys, PAGE_SIZE); | ||
2136 | clear_page(pt); | ||
2137 | for (idx_pte = 0; | ||
2138 | idx_pte < min(n_pte, PTRS_PER_PTE); | ||
2139 | idx_pte++) { | ||
2140 | set_pte(pt + idx_pte, | ||
2141 | pfn_pte(p2m_pfn, PAGE_KERNEL)); | ||
2142 | p2m_pfn++; | ||
2143 | } | ||
2144 | n_pte -= PTRS_PER_PTE; | ||
2145 | early_memunmap(pt, PAGE_SIZE); | ||
2146 | make_lowmem_page_readonly(__va(pt_phys)); | ||
2147 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, | ||
2148 | PFN_DOWN(pt_phys)); | ||
2149 | set_pmd(pmd + idx_pt, | ||
2150 | __pmd(_PAGE_TABLE | pt_phys)); | ||
2151 | pt_phys += PAGE_SIZE; | ||
2152 | } | ||
2153 | n_pt -= PTRS_PER_PMD; | ||
2154 | early_memunmap(pmd, PAGE_SIZE); | ||
2155 | make_lowmem_page_readonly(__va(pmd_phys)); | ||
2156 | pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, | ||
2157 | PFN_DOWN(pmd_phys)); | ||
2158 | set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); | ||
2159 | pmd_phys += PAGE_SIZE; | ||
2160 | } | ||
2161 | n_pmd -= PTRS_PER_PUD; | ||
2162 | early_memunmap(pud, PAGE_SIZE); | ||
2163 | make_lowmem_page_readonly(__va(pud_phys)); | ||
2164 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); | ||
2165 | set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); | ||
2166 | pud_phys += PAGE_SIZE; | ||
2167 | } | ||
2168 | |||
2169 | /* Now copy the old p2m info to the new area. */ | ||
2170 | memcpy(new_p2m, xen_p2m_addr, size); | ||
2171 | xen_p2m_addr = new_p2m; | ||
2172 | |||
2173 | /* Release the old p2m list and set new list info. */ | ||
2174 | p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list)); | ||
2175 | BUG_ON(!p2m_pfn); | ||
2176 | p2m_pfn_end = p2m_pfn + PFN_DOWN(size); | ||
2177 | |||
2178 | if (xen_start_info->mfn_list < __START_KERNEL_map) { | ||
2179 | pfn = xen_start_info->first_p2m_pfn; | ||
2180 | pfn_end = xen_start_info->first_p2m_pfn + | ||
2181 | xen_start_info->nr_p2m_frames; | ||
2182 | set_pgd(pgd + 1, __pgd(0)); | ||
2183 | } else { | ||
2184 | pfn = p2m_pfn; | ||
2185 | pfn_end = p2m_pfn_end; | ||
2186 | } | ||
2187 | |||
2188 | memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn)); | ||
2189 | while (pfn < pfn_end) { | ||
2190 | if (pfn == p2m_pfn) { | ||
2191 | pfn = p2m_pfn_end; | ||
2192 | continue; | ||
2193 | } | ||
2194 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | ||
2195 | pfn++; | ||
2196 | } | ||
2197 | |||
2198 | xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; | ||
2199 | xen_start_info->first_p2m_pfn = PFN_DOWN(new_area); | ||
2200 | xen_start_info->nr_p2m_frames = n_frames; | ||
2201 | } | ||
2202 | |||
1901 | #else /* !CONFIG_X86_64 */ | 2203 | #else /* !CONFIG_X86_64 */ |
1902 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); | 2204 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
1903 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); | 2205 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); |
@@ -1938,18 +2240,41 @@ static void __init xen_write_cr3_init(unsigned long cr3) | |||
1938 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | 2240 | pv_mmu_ops.write_cr3 = &xen_write_cr3; |
1939 | } | 2241 | } |
1940 | 2242 | ||
2243 | /* | ||
2244 | * For 32 bit domains xen_start_info->pt_base is the pgd address which might be | ||
2245 | * not the first page table in the page table pool. | ||
2246 | * Iterate through the initial page tables to find the real page table base. | ||
2247 | */ | ||
2248 | static phys_addr_t xen_find_pt_base(pmd_t *pmd) | ||
2249 | { | ||
2250 | phys_addr_t pt_base, paddr; | ||
2251 | unsigned pmdidx; | ||
2252 | |||
2253 | pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd)); | ||
2254 | |||
2255 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) | ||
2256 | if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) { | ||
2257 | paddr = m2p(pmd[pmdidx].pmd); | ||
2258 | pt_base = min(pt_base, paddr); | ||
2259 | } | ||
2260 | |||
2261 | return pt_base; | ||
2262 | } | ||
2263 | |||
1941 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 2264 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
1942 | { | 2265 | { |
1943 | pmd_t *kernel_pmd; | 2266 | pmd_t *kernel_pmd; |
1944 | 2267 | ||
2268 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | ||
2269 | |||
2270 | xen_pt_base = xen_find_pt_base(kernel_pmd); | ||
2271 | xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE; | ||
2272 | |||
1945 | initial_kernel_pmd = | 2273 | initial_kernel_pmd = |
1946 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | 2274 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); |
1947 | 2275 | ||
1948 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + | 2276 | max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024); |
1949 | xen_start_info->nr_pt_frames * PAGE_SIZE + | ||
1950 | 512*1024); | ||
1951 | 2277 | ||
1952 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | ||
1953 | copy_page(initial_kernel_pmd, kernel_pmd); | 2278 | copy_page(initial_kernel_pmd, kernel_pmd); |
1954 | 2279 | ||
1955 | xen_map_identity_early(initial_kernel_pmd, max_pfn); | 2280 | xen_map_identity_early(initial_kernel_pmd, max_pfn); |
@@ -1968,11 +2293,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1968 | PFN_DOWN(__pa(initial_page_table))); | 2293 | PFN_DOWN(__pa(initial_page_table))); |
1969 | xen_write_cr3(__pa(initial_page_table)); | 2294 | xen_write_cr3(__pa(initial_page_table)); |
1970 | 2295 | ||
1971 | memblock_reserve(__pa(xen_start_info->pt_base), | 2296 | memblock_reserve(xen_pt_base, xen_pt_size); |
1972 | xen_start_info->nr_pt_frames * PAGE_SIZE); | ||
1973 | } | 2297 | } |
1974 | #endif /* CONFIG_X86_64 */ | 2298 | #endif /* CONFIG_X86_64 */ |
1975 | 2299 | ||
2300 | void __init xen_reserve_special_pages(void) | ||
2301 | { | ||
2302 | phys_addr_t paddr; | ||
2303 | |||
2304 | memblock_reserve(__pa(xen_start_info), PAGE_SIZE); | ||
2305 | if (xen_start_info->store_mfn) { | ||
2306 | paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn)); | ||
2307 | memblock_reserve(paddr, PAGE_SIZE); | ||
2308 | } | ||
2309 | if (!xen_initial_domain()) { | ||
2310 | paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn)); | ||
2311 | memblock_reserve(paddr, PAGE_SIZE); | ||
2312 | } | ||
2313 | } | ||
2314 | |||
2315 | void __init xen_pt_check_e820(void) | ||
2316 | { | ||
2317 | if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) { | ||
2318 | xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n"); | ||
2319 | BUG(); | ||
2320 | } | ||
2321 | } | ||
2322 | |||
1976 | static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; | 2323 | static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; |
1977 | 2324 | ||
1978 | static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | 2325 | static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 8b7f18e200aa..bfc08b13044b 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -79,10 +79,14 @@ | |||
79 | #include <xen/balloon.h> | 79 | #include <xen/balloon.h> |
80 | #include <xen/grant_table.h> | 80 | #include <xen/grant_table.h> |
81 | 81 | ||
82 | #include "p2m.h" | ||
83 | #include "multicalls.h" | 82 | #include "multicalls.h" |
84 | #include "xen-ops.h" | 83 | #include "xen-ops.h" |
85 | 84 | ||
85 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
86 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
87 | |||
88 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
89 | |||
86 | #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) | 90 | #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) |
87 | 91 | ||
88 | unsigned long *xen_p2m_addr __read_mostly; | 92 | unsigned long *xen_p2m_addr __read_mostly; |
@@ -199,7 +203,8 @@ void __ref xen_build_mfn_list_list(void) | |||
199 | unsigned int level, topidx, mididx; | 203 | unsigned int level, topidx, mididx; |
200 | unsigned long *mid_mfn_p; | 204 | unsigned long *mid_mfn_p; |
201 | 205 | ||
202 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 206 | if (xen_feature(XENFEAT_auto_translated_physmap) || |
207 | xen_start_info->flags & SIF_VIRT_P2M_4TOOLS) | ||
203 | return; | 208 | return; |
204 | 209 | ||
205 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 210 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
@@ -260,9 +265,16 @@ void xen_setup_mfn_list_list(void) | |||
260 | 265 | ||
261 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 266 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
262 | 267 | ||
263 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | 268 | if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS) |
264 | virt_to_mfn(p2m_top_mfn); | 269 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = ~0UL; |
270 | else | ||
271 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
272 | virt_to_mfn(p2m_top_mfn); | ||
265 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | 273 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; |
274 | HYPERVISOR_shared_info->arch.p2m_generation = 0; | ||
275 | HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr; | ||
276 | HYPERVISOR_shared_info->arch.p2m_cr3 = | ||
277 | xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | ||
266 | } | 278 | } |
267 | 279 | ||
268 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | 280 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
@@ -478,8 +490,12 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) | |||
478 | 490 | ||
479 | ptechk = lookup_address(vaddr, &level); | 491 | ptechk = lookup_address(vaddr, &level); |
480 | if (ptechk == pte_pg) { | 492 | if (ptechk == pte_pg) { |
493 | HYPERVISOR_shared_info->arch.p2m_generation++; | ||
494 | wmb(); /* Tools are synchronizing via p2m_generation. */ | ||
481 | set_pmd(pmdp, | 495 | set_pmd(pmdp, |
482 | __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); | 496 | __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); |
497 | wmb(); /* Tools are synchronizing via p2m_generation. */ | ||
498 | HYPERVISOR_shared_info->arch.p2m_generation++; | ||
483 | pte_newpg[i] = NULL; | 499 | pte_newpg[i] = NULL; |
484 | } | 500 | } |
485 | 501 | ||
@@ -505,7 +521,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) | |||
505 | */ | 521 | */ |
506 | static bool alloc_p2m(unsigned long pfn) | 522 | static bool alloc_p2m(unsigned long pfn) |
507 | { | 523 | { |
508 | unsigned topidx, mididx; | 524 | unsigned topidx; |
509 | unsigned long *top_mfn_p, *mid_mfn; | 525 | unsigned long *top_mfn_p, *mid_mfn; |
510 | pte_t *ptep, *pte_pg; | 526 | pte_t *ptep, *pte_pg; |
511 | unsigned int level; | 527 | unsigned int level; |
@@ -513,9 +529,6 @@ static bool alloc_p2m(unsigned long pfn) | |||
513 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | 529 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); |
514 | unsigned long p2m_pfn; | 530 | unsigned long p2m_pfn; |
515 | 531 | ||
516 | topidx = p2m_top_index(pfn); | ||
517 | mididx = p2m_mid_index(pfn); | ||
518 | |||
519 | ptep = lookup_address(addr, &level); | 532 | ptep = lookup_address(addr, &level); |
520 | BUG_ON(!ptep || level != PG_LEVEL_4K); | 533 | BUG_ON(!ptep || level != PG_LEVEL_4K); |
521 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | 534 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); |
@@ -527,7 +540,8 @@ static bool alloc_p2m(unsigned long pfn) | |||
527 | return false; | 540 | return false; |
528 | } | 541 | } |
529 | 542 | ||
530 | if (p2m_top_mfn) { | 543 | if (p2m_top_mfn && pfn < MAX_P2M_PFN) { |
544 | topidx = p2m_top_index(pfn); | ||
531 | top_mfn_p = &p2m_top_mfn[topidx]; | 545 | top_mfn_p = &p2m_top_mfn[topidx]; |
532 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | 546 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); |
533 | 547 | ||
@@ -577,10 +591,14 @@ static bool alloc_p2m(unsigned long pfn) | |||
577 | spin_lock_irqsave(&p2m_update_lock, flags); | 591 | spin_lock_irqsave(&p2m_update_lock, flags); |
578 | 592 | ||
579 | if (pte_pfn(*ptep) == p2m_pfn) { | 593 | if (pte_pfn(*ptep) == p2m_pfn) { |
594 | HYPERVISOR_shared_info->arch.p2m_generation++; | ||
595 | wmb(); /* Tools are synchronizing via p2m_generation. */ | ||
580 | set_pte(ptep, | 596 | set_pte(ptep, |
581 | pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); | 597 | pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); |
598 | wmb(); /* Tools are synchronizing via p2m_generation. */ | ||
599 | HYPERVISOR_shared_info->arch.p2m_generation++; | ||
582 | if (mid_mfn) | 600 | if (mid_mfn) |
583 | mid_mfn[mididx] = virt_to_mfn(p2m); | 601 | mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m); |
584 | p2m = NULL; | 602 | p2m = NULL; |
585 | } | 603 | } |
586 | 604 | ||
@@ -630,6 +648,11 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
630 | return true; | 648 | return true; |
631 | } | 649 | } |
632 | 650 | ||
651 | /* | ||
652 | * The interface requires atomic updates on p2m elements. | ||
653 | * xen_safe_write_ulong() is using __put_user which does an atomic | ||
654 | * store via asm(). | ||
655 | */ | ||
633 | if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) | 656 | if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) |
634 | return true; | 657 | return true; |
635 | 658 | ||
diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h deleted file mode 100644 index ad8aee24ab72..000000000000 --- a/arch/x86/xen/p2m.h +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | #ifndef _XEN_P2M_H | ||
2 | #define _XEN_P2M_H | ||
3 | |||
4 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
5 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
6 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
7 | |||
8 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
9 | |||
10 | #define MAX_REMAP_RANGES 10 | ||
11 | |||
12 | extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, | ||
13 | unsigned long pfn_e); | ||
14 | |||
15 | #endif /* _XEN_P2M_H */ | ||
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index a8261716d58d..9586ff32810c 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -68,7 +68,7 @@ static int check_platform_magic(void) | |||
68 | return 0; | 68 | return 0; |
69 | } | 69 | } |
70 | 70 | ||
71 | bool xen_has_pv_devices() | 71 | bool xen_has_pv_devices(void) |
72 | { | 72 | { |
73 | if (!xen_domain()) | 73 | if (!xen_domain()) |
74 | return false; | 74 | return false; |
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c new file mode 100644 index 000000000000..724a08740a04 --- /dev/null +++ b/arch/x86/xen/pmu.c | |||
@@ -0,0 +1,570 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <linux/interrupt.h> | ||
3 | |||
4 | #include <asm/xen/hypercall.h> | ||
5 | #include <xen/page.h> | ||
6 | #include <xen/interface/xen.h> | ||
7 | #include <xen/interface/vcpu.h> | ||
8 | #include <xen/interface/xenpmu.h> | ||
9 | |||
10 | #include "xen-ops.h" | ||
11 | #include "pmu.h" | ||
12 | |||
13 | /* x86_pmu.handle_irq definition */ | ||
14 | #include "../kernel/cpu/perf_event.h" | ||
15 | |||
16 | #define XENPMU_IRQ_PROCESSING 1 | ||
17 | struct xenpmu { | ||
18 | /* Shared page between hypervisor and domain */ | ||
19 | struct xen_pmu_data *xenpmu_data; | ||
20 | |||
21 | uint8_t flags; | ||
22 | }; | ||
23 | static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared); | ||
24 | #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data) | ||
25 | #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags) | ||
26 | |||
27 | /* Macro for computing address of a PMU MSR bank */ | ||
28 | #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \ | ||
29 | (uintptr_t)ctxt->field)) | ||
30 | |||
31 | /* AMD PMU */ | ||
32 | #define F15H_NUM_COUNTERS 6 | ||
33 | #define F10H_NUM_COUNTERS 4 | ||
34 | |||
35 | static __read_mostly uint32_t amd_counters_base; | ||
36 | static __read_mostly uint32_t amd_ctrls_base; | ||
37 | static __read_mostly int amd_msr_step; | ||
38 | static __read_mostly int k7_counters_mirrored; | ||
39 | static __read_mostly int amd_num_counters; | ||
40 | |||
41 | /* Intel PMU */ | ||
42 | #define MSR_TYPE_COUNTER 0 | ||
43 | #define MSR_TYPE_CTRL 1 | ||
44 | #define MSR_TYPE_GLOBAL 2 | ||
45 | #define MSR_TYPE_ARCH_COUNTER 3 | ||
46 | #define MSR_TYPE_ARCH_CTRL 4 | ||
47 | |||
48 | /* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */ | ||
49 | #define PMU_GENERAL_NR_SHIFT 8 | ||
50 | #define PMU_GENERAL_NR_BITS 8 | ||
51 | #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \ | ||
52 | << PMU_GENERAL_NR_SHIFT) | ||
53 | |||
54 | /* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */ | ||
55 | #define PMU_FIXED_NR_SHIFT 0 | ||
56 | #define PMU_FIXED_NR_BITS 5 | ||
57 | #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \ | ||
58 | << PMU_FIXED_NR_SHIFT) | ||
59 | |||
60 | /* Alias registers (0x4c1) for full-width writes to PMCs */ | ||
61 | #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0)) | ||
62 | |||
63 | #define INTEL_PMC_TYPE_SHIFT 30 | ||
64 | |||
65 | static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters; | ||
66 | |||
67 | |||
68 | static void xen_pmu_arch_init(void) | ||
69 | { | ||
70 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | ||
71 | |||
72 | switch (boot_cpu_data.x86) { | ||
73 | case 0x15: | ||
74 | amd_num_counters = F15H_NUM_COUNTERS; | ||
75 | amd_counters_base = MSR_F15H_PERF_CTR; | ||
76 | amd_ctrls_base = MSR_F15H_PERF_CTL; | ||
77 | amd_msr_step = 2; | ||
78 | k7_counters_mirrored = 1; | ||
79 | break; | ||
80 | case 0x10: | ||
81 | case 0x12: | ||
82 | case 0x14: | ||
83 | case 0x16: | ||
84 | default: | ||
85 | amd_num_counters = F10H_NUM_COUNTERS; | ||
86 | amd_counters_base = MSR_K7_PERFCTR0; | ||
87 | amd_ctrls_base = MSR_K7_EVNTSEL0; | ||
88 | amd_msr_step = 1; | ||
89 | k7_counters_mirrored = 0; | ||
90 | break; | ||
91 | } | ||
92 | } else { | ||
93 | uint32_t eax, ebx, ecx, edx; | ||
94 | |||
95 | cpuid(0xa, &eax, &ebx, &ecx, &edx); | ||
96 | |||
97 | intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >> | ||
98 | PMU_GENERAL_NR_SHIFT; | ||
99 | intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >> | ||
100 | PMU_FIXED_NR_SHIFT; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | static inline uint32_t get_fam15h_addr(u32 addr) | ||
105 | { | ||
106 | switch (addr) { | ||
107 | case MSR_K7_PERFCTR0: | ||
108 | case MSR_K7_PERFCTR1: | ||
109 | case MSR_K7_PERFCTR2: | ||
110 | case MSR_K7_PERFCTR3: | ||
111 | return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0); | ||
112 | case MSR_K7_EVNTSEL0: | ||
113 | case MSR_K7_EVNTSEL1: | ||
114 | case MSR_K7_EVNTSEL2: | ||
115 | case MSR_K7_EVNTSEL3: | ||
116 | return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0); | ||
117 | default: | ||
118 | break; | ||
119 | } | ||
120 | |||
121 | return addr; | ||
122 | } | ||
123 | |||
124 | static inline bool is_amd_pmu_msr(unsigned int msr) | ||
125 | { | ||
126 | if ((msr >= MSR_F15H_PERF_CTL && | ||
127 | msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) || | ||
128 | (msr >= MSR_K7_EVNTSEL0 && | ||
129 | msr < MSR_K7_PERFCTR0 + amd_num_counters)) | ||
130 | return true; | ||
131 | |||
132 | return false; | ||
133 | } | ||
134 | |||
135 | static int is_intel_pmu_msr(u32 msr_index, int *type, int *index) | ||
136 | { | ||
137 | u32 msr_index_pmc; | ||
138 | |||
139 | switch (msr_index) { | ||
140 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
141 | case MSR_IA32_DS_AREA: | ||
142 | case MSR_IA32_PEBS_ENABLE: | ||
143 | *type = MSR_TYPE_CTRL; | ||
144 | return true; | ||
145 | |||
146 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
147 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
148 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
149 | *type = MSR_TYPE_GLOBAL; | ||
150 | return true; | ||
151 | |||
152 | default: | ||
153 | |||
154 | if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) && | ||
155 | (msr_index < MSR_CORE_PERF_FIXED_CTR0 + | ||
156 | intel_num_fixed_counters)) { | ||
157 | *index = msr_index - MSR_CORE_PERF_FIXED_CTR0; | ||
158 | *type = MSR_TYPE_COUNTER; | ||
159 | return true; | ||
160 | } | ||
161 | |||
162 | if ((msr_index >= MSR_P6_EVNTSEL0) && | ||
163 | (msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) { | ||
164 | *index = msr_index - MSR_P6_EVNTSEL0; | ||
165 | *type = MSR_TYPE_ARCH_CTRL; | ||
166 | return true; | ||
167 | } | ||
168 | |||
169 | msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK; | ||
170 | if ((msr_index_pmc >= MSR_IA32_PERFCTR0) && | ||
171 | (msr_index_pmc < MSR_IA32_PERFCTR0 + | ||
172 | intel_num_arch_counters)) { | ||
173 | *type = MSR_TYPE_ARCH_COUNTER; | ||
174 | *index = msr_index_pmc - MSR_IA32_PERFCTR0; | ||
175 | return true; | ||
176 | } | ||
177 | return false; | ||
178 | } | ||
179 | } | ||
180 | |||
181 | static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type, | ||
182 | int index, bool is_read) | ||
183 | { | ||
184 | uint64_t *reg = NULL; | ||
185 | struct xen_pmu_intel_ctxt *ctxt; | ||
186 | uint64_t *fix_counters; | ||
187 | struct xen_pmu_cntr_pair *arch_cntr_pair; | ||
188 | struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
189 | uint8_t xenpmu_flags = get_xenpmu_flags(); | ||
190 | |||
191 | |||
192 | if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) | ||
193 | return false; | ||
194 | |||
195 | ctxt = &xenpmu_data->pmu.c.intel; | ||
196 | |||
197 | switch (msr) { | ||
198 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
199 | reg = &ctxt->global_ovf_ctrl; | ||
200 | break; | ||
201 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
202 | reg = &ctxt->global_status; | ||
203 | break; | ||
204 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
205 | reg = &ctxt->global_ctrl; | ||
206 | break; | ||
207 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
208 | reg = &ctxt->fixed_ctrl; | ||
209 | break; | ||
210 | default: | ||
211 | switch (type) { | ||
212 | case MSR_TYPE_COUNTER: | ||
213 | fix_counters = field_offset(ctxt, fixed_counters); | ||
214 | reg = &fix_counters[index]; | ||
215 | break; | ||
216 | case MSR_TYPE_ARCH_COUNTER: | ||
217 | arch_cntr_pair = field_offset(ctxt, arch_counters); | ||
218 | reg = &arch_cntr_pair[index].counter; | ||
219 | break; | ||
220 | case MSR_TYPE_ARCH_CTRL: | ||
221 | arch_cntr_pair = field_offset(ctxt, arch_counters); | ||
222 | reg = &arch_cntr_pair[index].control; | ||
223 | break; | ||
224 | default: | ||
225 | return false; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | if (reg) { | ||
230 | if (is_read) | ||
231 | *val = *reg; | ||
232 | else { | ||
233 | *reg = *val; | ||
234 | |||
235 | if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL) | ||
236 | ctxt->global_status &= (~(*val)); | ||
237 | } | ||
238 | return true; | ||
239 | } | ||
240 | |||
241 | return false; | ||
242 | } | ||
243 | |||
244 | static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) | ||
245 | { | ||
246 | uint64_t *reg = NULL; | ||
247 | int i, off = 0; | ||
248 | struct xen_pmu_amd_ctxt *ctxt; | ||
249 | uint64_t *counter_regs, *ctrl_regs; | ||
250 | struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
251 | uint8_t xenpmu_flags = get_xenpmu_flags(); | ||
252 | |||
253 | if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) | ||
254 | return false; | ||
255 | |||
256 | if (k7_counters_mirrored && | ||
257 | ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3))) | ||
258 | msr = get_fam15h_addr(msr); | ||
259 | |||
260 | ctxt = &xenpmu_data->pmu.c.amd; | ||
261 | for (i = 0; i < amd_num_counters; i++) { | ||
262 | if (msr == amd_ctrls_base + off) { | ||
263 | ctrl_regs = field_offset(ctxt, ctrls); | ||
264 | reg = &ctrl_regs[i]; | ||
265 | break; | ||
266 | } else if (msr == amd_counters_base + off) { | ||
267 | counter_regs = field_offset(ctxt, counters); | ||
268 | reg = &counter_regs[i]; | ||
269 | break; | ||
270 | } | ||
271 | off += amd_msr_step; | ||
272 | } | ||
273 | |||
274 | if (reg) { | ||
275 | if (is_read) | ||
276 | *val = *reg; | ||
277 | else | ||
278 | *reg = *val; | ||
279 | |||
280 | return true; | ||
281 | } | ||
282 | return false; | ||
283 | } | ||
284 | |||
285 | bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) | ||
286 | { | ||
287 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | ||
288 | if (is_amd_pmu_msr(msr)) { | ||
289 | if (!xen_amd_pmu_emulate(msr, val, 1)) | ||
290 | *val = native_read_msr_safe(msr, err); | ||
291 | return true; | ||
292 | } | ||
293 | } else { | ||
294 | int type, index; | ||
295 | |||
296 | if (is_intel_pmu_msr(msr, &type, &index)) { | ||
297 | if (!xen_intel_pmu_emulate(msr, val, type, index, 1)) | ||
298 | *val = native_read_msr_safe(msr, err); | ||
299 | return true; | ||
300 | } | ||
301 | } | ||
302 | |||
303 | return false; | ||
304 | } | ||
305 | |||
306 | bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) | ||
307 | { | ||
308 | uint64_t val = ((uint64_t)high << 32) | low; | ||
309 | |||
310 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | ||
311 | if (is_amd_pmu_msr(msr)) { | ||
312 | if (!xen_amd_pmu_emulate(msr, &val, 0)) | ||
313 | *err = native_write_msr_safe(msr, low, high); | ||
314 | return true; | ||
315 | } | ||
316 | } else { | ||
317 | int type, index; | ||
318 | |||
319 | if (is_intel_pmu_msr(msr, &type, &index)) { | ||
320 | if (!xen_intel_pmu_emulate(msr, &val, type, index, 0)) | ||
321 | *err = native_write_msr_safe(msr, low, high); | ||
322 | return true; | ||
323 | } | ||
324 | } | ||
325 | |||
326 | return false; | ||
327 | } | ||
328 | |||
329 | static unsigned long long xen_amd_read_pmc(int counter) | ||
330 | { | ||
331 | struct xen_pmu_amd_ctxt *ctxt; | ||
332 | uint64_t *counter_regs; | ||
333 | struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
334 | uint8_t xenpmu_flags = get_xenpmu_flags(); | ||
335 | |||
336 | if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { | ||
337 | uint32_t msr; | ||
338 | int err; | ||
339 | |||
340 | msr = amd_counters_base + (counter * amd_msr_step); | ||
341 | return native_read_msr_safe(msr, &err); | ||
342 | } | ||
343 | |||
344 | ctxt = &xenpmu_data->pmu.c.amd; | ||
345 | counter_regs = field_offset(ctxt, counters); | ||
346 | return counter_regs[counter]; | ||
347 | } | ||
348 | |||
349 | static unsigned long long xen_intel_read_pmc(int counter) | ||
350 | { | ||
351 | struct xen_pmu_intel_ctxt *ctxt; | ||
352 | uint64_t *fixed_counters; | ||
353 | struct xen_pmu_cntr_pair *arch_cntr_pair; | ||
354 | struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
355 | uint8_t xenpmu_flags = get_xenpmu_flags(); | ||
356 | |||
357 | if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { | ||
358 | uint32_t msr; | ||
359 | int err; | ||
360 | |||
361 | if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) | ||
362 | msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff); | ||
363 | else | ||
364 | msr = MSR_IA32_PERFCTR0 + counter; | ||
365 | |||
366 | return native_read_msr_safe(msr, &err); | ||
367 | } | ||
368 | |||
369 | ctxt = &xenpmu_data->pmu.c.intel; | ||
370 | if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) { | ||
371 | fixed_counters = field_offset(ctxt, fixed_counters); | ||
372 | return fixed_counters[counter & 0xffff]; | ||
373 | } | ||
374 | |||
375 | arch_cntr_pair = field_offset(ctxt, arch_counters); | ||
376 | return arch_cntr_pair[counter].counter; | ||
377 | } | ||
378 | |||
379 | unsigned long long xen_read_pmc(int counter) | ||
380 | { | ||
381 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
382 | return xen_amd_read_pmc(counter); | ||
383 | else | ||
384 | return xen_intel_read_pmc(counter); | ||
385 | } | ||
386 | |||
387 | int pmu_apic_update(uint32_t val) | ||
388 | { | ||
389 | int ret; | ||
390 | struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
391 | |||
392 | if (!xenpmu_data) { | ||
393 | pr_warn_once("%s: pmudata not initialized\n", __func__); | ||
394 | return -EINVAL; | ||
395 | } | ||
396 | |||
397 | xenpmu_data->pmu.l.lapic_lvtpc = val; | ||
398 | |||
399 | if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING) | ||
400 | return 0; | ||
401 | |||
402 | ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL); | ||
403 | |||
404 | return ret; | ||
405 | } | ||
406 | |||
407 | /* perf callbacks */ | ||
408 | static int xen_is_in_guest(void) | ||
409 | { | ||
410 | const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
411 | |||
412 | if (!xenpmu_data) { | ||
413 | pr_warn_once("%s: pmudata not initialized\n", __func__); | ||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) | ||
418 | return 0; | ||
419 | |||
420 | return 1; | ||
421 | } | ||
422 | |||
423 | static int xen_is_user_mode(void) | ||
424 | { | ||
425 | const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
426 | |||
427 | if (!xenpmu_data) { | ||
428 | pr_warn_once("%s: pmudata not initialized\n", __func__); | ||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) | ||
433 | return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER); | ||
434 | else | ||
435 | return !!(xenpmu_data->pmu.r.regs.cpl & 3); | ||
436 | } | ||
437 | |||
438 | static unsigned long xen_get_guest_ip(void) | ||
439 | { | ||
440 | const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
441 | |||
442 | if (!xenpmu_data) { | ||
443 | pr_warn_once("%s: pmudata not initialized\n", __func__); | ||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | return xenpmu_data->pmu.r.regs.ip; | ||
448 | } | ||
449 | |||
450 | static struct perf_guest_info_callbacks xen_guest_cbs = { | ||
451 | .is_in_guest = xen_is_in_guest, | ||
452 | .is_user_mode = xen_is_user_mode, | ||
453 | .get_guest_ip = xen_get_guest_ip, | ||
454 | }; | ||
455 | |||
456 | /* Convert registers from Xen's format to Linux' */ | ||
457 | static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, | ||
458 | struct pt_regs *regs, uint64_t pmu_flags) | ||
459 | { | ||
460 | regs->ip = xen_regs->ip; | ||
461 | regs->cs = xen_regs->cs; | ||
462 | regs->sp = xen_regs->sp; | ||
463 | |||
464 | if (pmu_flags & PMU_SAMPLE_PV) { | ||
465 | if (pmu_flags & PMU_SAMPLE_USER) | ||
466 | regs->cs |= 3; | ||
467 | else | ||
468 | regs->cs &= ~3; | ||
469 | } else { | ||
470 | if (xen_regs->cpl) | ||
471 | regs->cs |= 3; | ||
472 | else | ||
473 | regs->cs &= ~3; | ||
474 | } | ||
475 | } | ||
476 | |||
477 | irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) | ||
478 | { | ||
479 | int err, ret = IRQ_NONE; | ||
480 | struct pt_regs regs; | ||
481 | const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); | ||
482 | uint8_t xenpmu_flags = get_xenpmu_flags(); | ||
483 | |||
484 | if (!xenpmu_data) { | ||
485 | pr_warn_once("%s: pmudata not initialized\n", __func__); | ||
486 | return ret; | ||
487 | } | ||
488 | |||
489 | this_cpu_ptr(&xenpmu_shared)->flags = | ||
490 | xenpmu_flags | XENPMU_IRQ_PROCESSING; | ||
491 | xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, | ||
492 | xenpmu_data->pmu.pmu_flags); | ||
493 | if (x86_pmu.handle_irq(®s)) | ||
494 | ret = IRQ_HANDLED; | ||
495 | |||
496 | /* Write out cached context to HW */ | ||
497 | err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL); | ||
498 | this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags; | ||
499 | if (err) { | ||
500 | pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err); | ||
501 | return IRQ_NONE; | ||
502 | } | ||
503 | |||
504 | return ret; | ||
505 | } | ||
506 | |||
507 | bool is_xen_pmu(int cpu) | ||
508 | { | ||
509 | return (get_xenpmu_data() != NULL); | ||
510 | } | ||
511 | |||
512 | void xen_pmu_init(int cpu) | ||
513 | { | ||
514 | int err; | ||
515 | struct xen_pmu_params xp; | ||
516 | unsigned long pfn; | ||
517 | struct xen_pmu_data *xenpmu_data; | ||
518 | |||
519 | BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); | ||
520 | |||
521 | if (xen_hvm_domain()) | ||
522 | return; | ||
523 | |||
524 | xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); | ||
525 | if (!xenpmu_data) { | ||
526 | pr_err("VPMU init: No memory\n"); | ||
527 | return; | ||
528 | } | ||
529 | pfn = virt_to_pfn(xenpmu_data); | ||
530 | |||
531 | xp.val = pfn_to_mfn(pfn); | ||
532 | xp.vcpu = cpu; | ||
533 | xp.version.maj = XENPMU_VER_MAJ; | ||
534 | xp.version.min = XENPMU_VER_MIN; | ||
535 | err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp); | ||
536 | if (err) | ||
537 | goto fail; | ||
538 | |||
539 | per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; | ||
540 | per_cpu(xenpmu_shared, cpu).flags = 0; | ||
541 | |||
542 | if (cpu == 0) { | ||
543 | perf_register_guest_info_callbacks(&xen_guest_cbs); | ||
544 | xen_pmu_arch_init(); | ||
545 | } | ||
546 | |||
547 | return; | ||
548 | |||
549 | fail: | ||
550 | pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n", | ||
551 | cpu, err); | ||
552 | free_pages((unsigned long)xenpmu_data, 0); | ||
553 | } | ||
554 | |||
555 | void xen_pmu_finish(int cpu) | ||
556 | { | ||
557 | struct xen_pmu_params xp; | ||
558 | |||
559 | if (xen_hvm_domain()) | ||
560 | return; | ||
561 | |||
562 | xp.vcpu = cpu; | ||
563 | xp.version.maj = XENPMU_VER_MAJ; | ||
564 | xp.version.min = XENPMU_VER_MIN; | ||
565 | |||
566 | (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp); | ||
567 | |||
568 | free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0); | ||
569 | per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL; | ||
570 | } | ||
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h new file mode 100644 index 000000000000..af5f0ad94078 --- /dev/null +++ b/arch/x86/xen/pmu.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #ifndef __XEN_PMU_H | ||
2 | #define __XEN_PMU_H | ||
3 | |||
4 | #include <xen/interface/xenpmu.h> | ||
5 | |||
6 | irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); | ||
7 | void xen_pmu_init(int cpu); | ||
8 | void xen_pmu_finish(int cpu); | ||
9 | bool is_xen_pmu(int cpu); | ||
10 | bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); | ||
11 | bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); | ||
12 | int pmu_apic_update(uint32_t reg); | ||
13 | unsigned long long xen_read_pmc(int counter); | ||
14 | |||
15 | #endif /* __XEN_PMU_H */ | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 55f388ef481a..f5ef6746d47a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -27,17 +27,23 @@ | |||
27 | #include <xen/interface/memory.h> | 27 | #include <xen/interface/memory.h> |
28 | #include <xen/interface/physdev.h> | 28 | #include <xen/interface/physdev.h> |
29 | #include <xen/features.h> | 29 | #include <xen/features.h> |
30 | #include <xen/hvc-console.h> | ||
30 | #include "xen-ops.h" | 31 | #include "xen-ops.h" |
31 | #include "vdso.h" | 32 | #include "vdso.h" |
32 | #include "p2m.h" | ||
33 | #include "mmu.h" | 33 | #include "mmu.h" |
34 | 34 | ||
35 | #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) | ||
36 | |||
35 | /* Amount of extra memory space we add to the e820 ranges */ | 37 | /* Amount of extra memory space we add to the e820 ranges */ |
36 | struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; | 38 | struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; |
37 | 39 | ||
38 | /* Number of pages released from the initial allocation. */ | 40 | /* Number of pages released from the initial allocation. */ |
39 | unsigned long xen_released_pages; | 41 | unsigned long xen_released_pages; |
40 | 42 | ||
43 | /* E820 map used during setting up memory. */ | ||
44 | static struct e820entry xen_e820_map[E820MAX] __initdata; | ||
45 | static u32 xen_e820_map_entries __initdata; | ||
46 | |||
41 | /* | 47 | /* |
42 | * Buffer used to remap identity mapped pages. We only need the virtual space. | 48 | * Buffer used to remap identity mapped pages. We only need the virtual space. |
43 | * The physical page behind this address is remapped as needed to different | 49 | * The physical page behind this address is remapped as needed to different |
@@ -64,62 +70,89 @@ static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; | |||
64 | */ | 70 | */ |
65 | #define EXTRA_MEM_RATIO (10) | 71 | #define EXTRA_MEM_RATIO (10) |
66 | 72 | ||
67 | static void __init xen_add_extra_mem(phys_addr_t start, phys_addr_t size) | 73 | static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB); |
74 | |||
75 | static void __init xen_parse_512gb(void) | ||
76 | { | ||
77 | bool val = false; | ||
78 | char *arg; | ||
79 | |||
80 | arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit"); | ||
81 | if (!arg) | ||
82 | return; | ||
83 | |||
84 | arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit="); | ||
85 | if (!arg) | ||
86 | val = true; | ||
87 | else if (strtobool(arg + strlen("xen_512gb_limit="), &val)) | ||
88 | return; | ||
89 | |||
90 | xen_512gb_limit = val; | ||
91 | } | ||
92 | |||
93 | static void __init xen_add_extra_mem(unsigned long start_pfn, | ||
94 | unsigned long n_pfns) | ||
68 | { | 95 | { |
69 | int i; | 96 | int i; |
70 | 97 | ||
98 | /* | ||
99 | * No need to check for zero size, should happen rarely and will only | ||
100 | * write a new entry regarded to be unused due to zero size. | ||
101 | */ | ||
71 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | 102 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
72 | /* Add new region. */ | 103 | /* Add new region. */ |
73 | if (xen_extra_mem[i].size == 0) { | 104 | if (xen_extra_mem[i].n_pfns == 0) { |
74 | xen_extra_mem[i].start = start; | 105 | xen_extra_mem[i].start_pfn = start_pfn; |
75 | xen_extra_mem[i].size = size; | 106 | xen_extra_mem[i].n_pfns = n_pfns; |
76 | break; | 107 | break; |
77 | } | 108 | } |
78 | /* Append to existing region. */ | 109 | /* Append to existing region. */ |
79 | if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { | 110 | if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns == |
80 | xen_extra_mem[i].size += size; | 111 | start_pfn) { |
112 | xen_extra_mem[i].n_pfns += n_pfns; | ||
81 | break; | 113 | break; |
82 | } | 114 | } |
83 | } | 115 | } |
84 | if (i == XEN_EXTRA_MEM_MAX_REGIONS) | 116 | if (i == XEN_EXTRA_MEM_MAX_REGIONS) |
85 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); | 117 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); |
86 | 118 | ||
87 | memblock_reserve(start, size); | 119 | memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); |
88 | } | 120 | } |
89 | 121 | ||
90 | static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size) | 122 | static void __init xen_del_extra_mem(unsigned long start_pfn, |
123 | unsigned long n_pfns) | ||
91 | { | 124 | { |
92 | int i; | 125 | int i; |
93 | phys_addr_t start_r, size_r; | 126 | unsigned long start_r, size_r; |
94 | 127 | ||
95 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | 128 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
96 | start_r = xen_extra_mem[i].start; | 129 | start_r = xen_extra_mem[i].start_pfn; |
97 | size_r = xen_extra_mem[i].size; | 130 | size_r = xen_extra_mem[i].n_pfns; |
98 | 131 | ||
99 | /* Start of region. */ | 132 | /* Start of region. */ |
100 | if (start_r == start) { | 133 | if (start_r == start_pfn) { |
101 | BUG_ON(size > size_r); | 134 | BUG_ON(n_pfns > size_r); |
102 | xen_extra_mem[i].start += size; | 135 | xen_extra_mem[i].start_pfn += n_pfns; |
103 | xen_extra_mem[i].size -= size; | 136 | xen_extra_mem[i].n_pfns -= n_pfns; |
104 | break; | 137 | break; |
105 | } | 138 | } |
106 | /* End of region. */ | 139 | /* End of region. */ |
107 | if (start_r + size_r == start + size) { | 140 | if (start_r + size_r == start_pfn + n_pfns) { |
108 | BUG_ON(size > size_r); | 141 | BUG_ON(n_pfns > size_r); |
109 | xen_extra_mem[i].size -= size; | 142 | xen_extra_mem[i].n_pfns -= n_pfns; |
110 | break; | 143 | break; |
111 | } | 144 | } |
112 | /* Mid of region. */ | 145 | /* Mid of region. */ |
113 | if (start > start_r && start < start_r + size_r) { | 146 | if (start_pfn > start_r && start_pfn < start_r + size_r) { |
114 | BUG_ON(start + size > start_r + size_r); | 147 | BUG_ON(start_pfn + n_pfns > start_r + size_r); |
115 | xen_extra_mem[i].size = start - start_r; | 148 | xen_extra_mem[i].n_pfns = start_pfn - start_r; |
116 | /* Calling memblock_reserve() again is okay. */ | 149 | /* Calling memblock_reserve() again is okay. */ |
117 | xen_add_extra_mem(start + size, start_r + size_r - | 150 | xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r - |
118 | (start + size)); | 151 | (start_pfn + n_pfns)); |
119 | break; | 152 | break; |
120 | } | 153 | } |
121 | } | 154 | } |
122 | memblock_free(start, size); | 155 | memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); |
123 | } | 156 | } |
124 | 157 | ||
125 | /* | 158 | /* |
@@ -130,11 +163,10 @@ static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size) | |||
130 | unsigned long __ref xen_chk_extra_mem(unsigned long pfn) | 163 | unsigned long __ref xen_chk_extra_mem(unsigned long pfn) |
131 | { | 164 | { |
132 | int i; | 165 | int i; |
133 | phys_addr_t addr = PFN_PHYS(pfn); | ||
134 | 166 | ||
135 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | 167 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
136 | if (addr >= xen_extra_mem[i].start && | 168 | if (pfn >= xen_extra_mem[i].start_pfn && |
137 | addr < xen_extra_mem[i].start + xen_extra_mem[i].size) | 169 | pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns) |
138 | return INVALID_P2M_ENTRY; | 170 | return INVALID_P2M_ENTRY; |
139 | } | 171 | } |
140 | 172 | ||
@@ -150,10 +182,10 @@ void __init xen_inv_extra_mem(void) | |||
150 | int i; | 182 | int i; |
151 | 183 | ||
152 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | 184 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
153 | if (!xen_extra_mem[i].size) | 185 | if (!xen_extra_mem[i].n_pfns) |
154 | continue; | 186 | continue; |
155 | pfn_s = PFN_DOWN(xen_extra_mem[i].start); | 187 | pfn_s = xen_extra_mem[i].start_pfn; |
156 | pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); | 188 | pfn_e = pfn_s + xen_extra_mem[i].n_pfns; |
157 | for (pfn = pfn_s; pfn < pfn_e; pfn++) | 189 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
158 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 190 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
159 | } | 191 | } |
@@ -164,15 +196,13 @@ void __init xen_inv_extra_mem(void) | |||
164 | * This function updates min_pfn with the pfn found and returns | 196 | * This function updates min_pfn with the pfn found and returns |
165 | * the size of that range or zero if not found. | 197 | * the size of that range or zero if not found. |
166 | */ | 198 | */ |
167 | static unsigned long __init xen_find_pfn_range( | 199 | static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn) |
168 | const struct e820entry *list, size_t map_size, | ||
169 | unsigned long *min_pfn) | ||
170 | { | 200 | { |
171 | const struct e820entry *entry; | 201 | const struct e820entry *entry = xen_e820_map; |
172 | unsigned int i; | 202 | unsigned int i; |
173 | unsigned long done = 0; | 203 | unsigned long done = 0; |
174 | 204 | ||
175 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 205 | for (i = 0; i < xen_e820_map_entries; i++, entry++) { |
176 | unsigned long s_pfn; | 206 | unsigned long s_pfn; |
177 | unsigned long e_pfn; | 207 | unsigned long e_pfn; |
178 | 208 | ||
@@ -221,7 +251,7 @@ static int __init xen_free_mfn(unsigned long mfn) | |||
221 | * as a fallback if the remapping fails. | 251 | * as a fallback if the remapping fails. |
222 | */ | 252 | */ |
223 | static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, | 253 | static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, |
224 | unsigned long end_pfn, unsigned long nr_pages, unsigned long *released) | 254 | unsigned long end_pfn, unsigned long nr_pages) |
225 | { | 255 | { |
226 | unsigned long pfn, end; | 256 | unsigned long pfn, end; |
227 | int ret; | 257 | int ret; |
@@ -241,7 +271,7 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, | |||
241 | WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); | 271 | WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); |
242 | 272 | ||
243 | if (ret == 1) { | 273 | if (ret == 1) { |
244 | (*released)++; | 274 | xen_released_pages++; |
245 | if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) | 275 | if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) |
246 | break; | 276 | break; |
247 | } else | 277 | } else |
@@ -356,9 +386,8 @@ static void __init xen_do_set_identity_and_remap_chunk( | |||
356 | * to Xen and not remapped. | 386 | * to Xen and not remapped. |
357 | */ | 387 | */ |
358 | static unsigned long __init xen_set_identity_and_remap_chunk( | 388 | static unsigned long __init xen_set_identity_and_remap_chunk( |
359 | const struct e820entry *list, size_t map_size, unsigned long start_pfn, | 389 | unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, |
360 | unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, | 390 | unsigned long remap_pfn) |
361 | unsigned long *released, unsigned long *remapped) | ||
362 | { | 391 | { |
363 | unsigned long pfn; | 392 | unsigned long pfn; |
364 | unsigned long i = 0; | 393 | unsigned long i = 0; |
@@ -379,12 +408,11 @@ static unsigned long __init xen_set_identity_and_remap_chunk( | |||
379 | if (cur_pfn + size > nr_pages) | 408 | if (cur_pfn + size > nr_pages) |
380 | size = nr_pages - cur_pfn; | 409 | size = nr_pages - cur_pfn; |
381 | 410 | ||
382 | remap_range_size = xen_find_pfn_range(list, map_size, | 411 | remap_range_size = xen_find_pfn_range(&remap_pfn); |
383 | &remap_pfn); | ||
384 | if (!remap_range_size) { | 412 | if (!remap_range_size) { |
385 | pr_warning("Unable to find available pfn range, not remapping identity pages\n"); | 413 | pr_warning("Unable to find available pfn range, not remapping identity pages\n"); |
386 | xen_set_identity_and_release_chunk(cur_pfn, | 414 | xen_set_identity_and_release_chunk(cur_pfn, |
387 | cur_pfn + left, nr_pages, released); | 415 | cur_pfn + left, nr_pages); |
388 | break; | 416 | break; |
389 | } | 417 | } |
390 | /* Adjust size to fit in current e820 RAM region */ | 418 | /* Adjust size to fit in current e820 RAM region */ |
@@ -396,7 +424,6 @@ static unsigned long __init xen_set_identity_and_remap_chunk( | |||
396 | /* Update variables to reflect new mappings. */ | 424 | /* Update variables to reflect new mappings. */ |
397 | i += size; | 425 | i += size; |
398 | remap_pfn += size; | 426 | remap_pfn += size; |
399 | *remapped += size; | ||
400 | } | 427 | } |
401 | 428 | ||
402 | /* | 429 | /* |
@@ -411,15 +438,11 @@ static unsigned long __init xen_set_identity_and_remap_chunk( | |||
411 | return remap_pfn; | 438 | return remap_pfn; |
412 | } | 439 | } |
413 | 440 | ||
414 | static void __init xen_set_identity_and_remap( | 441 | static void __init xen_set_identity_and_remap(unsigned long nr_pages) |
415 | const struct e820entry *list, size_t map_size, unsigned long nr_pages, | ||
416 | unsigned long *released, unsigned long *remapped) | ||
417 | { | 442 | { |
418 | phys_addr_t start = 0; | 443 | phys_addr_t start = 0; |
419 | unsigned long last_pfn = nr_pages; | 444 | unsigned long last_pfn = nr_pages; |
420 | const struct e820entry *entry; | 445 | const struct e820entry *entry = xen_e820_map; |
421 | unsigned long num_released = 0; | ||
422 | unsigned long num_remapped = 0; | ||
423 | int i; | 446 | int i; |
424 | 447 | ||
425 | /* | 448 | /* |
@@ -433,9 +456,9 @@ static void __init xen_set_identity_and_remap( | |||
433 | * example) the DMI tables in a reserved region that begins on | 456 | * example) the DMI tables in a reserved region that begins on |
434 | * a non-page boundary. | 457 | * a non-page boundary. |
435 | */ | 458 | */ |
436 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 459 | for (i = 0; i < xen_e820_map_entries; i++, entry++) { |
437 | phys_addr_t end = entry->addr + entry->size; | 460 | phys_addr_t end = entry->addr + entry->size; |
438 | if (entry->type == E820_RAM || i == map_size - 1) { | 461 | if (entry->type == E820_RAM || i == xen_e820_map_entries - 1) { |
439 | unsigned long start_pfn = PFN_DOWN(start); | 462 | unsigned long start_pfn = PFN_DOWN(start); |
440 | unsigned long end_pfn = PFN_UP(end); | 463 | unsigned long end_pfn = PFN_UP(end); |
441 | 464 | ||
@@ -444,17 +467,13 @@ static void __init xen_set_identity_and_remap( | |||
444 | 467 | ||
445 | if (start_pfn < end_pfn) | 468 | if (start_pfn < end_pfn) |
446 | last_pfn = xen_set_identity_and_remap_chunk( | 469 | last_pfn = xen_set_identity_and_remap_chunk( |
447 | list, map_size, start_pfn, | 470 | start_pfn, end_pfn, nr_pages, |
448 | end_pfn, nr_pages, last_pfn, | 471 | last_pfn); |
449 | &num_released, &num_remapped); | ||
450 | start = end; | 472 | start = end; |
451 | } | 473 | } |
452 | } | 474 | } |
453 | 475 | ||
454 | *released = num_released; | 476 | pr_info("Released %ld page(s)\n", xen_released_pages); |
455 | *remapped = num_remapped; | ||
456 | |||
457 | pr_info("Released %ld page(s)\n", num_released); | ||
458 | } | 477 | } |
459 | 478 | ||
460 | /* | 479 | /* |
@@ -494,7 +513,7 @@ void __init xen_remap_memory(void) | |||
494 | } else if (pfn_s + len == xen_remap_buf.target_pfn) { | 513 | } else if (pfn_s + len == xen_remap_buf.target_pfn) { |
495 | len += xen_remap_buf.size; | 514 | len += xen_remap_buf.size; |
496 | } else { | 515 | } else { |
497 | xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); | 516 | xen_del_extra_mem(pfn_s, len); |
498 | pfn_s = xen_remap_buf.target_pfn; | 517 | pfn_s = xen_remap_buf.target_pfn; |
499 | len = xen_remap_buf.size; | 518 | len = xen_remap_buf.size; |
500 | } | 519 | } |
@@ -504,19 +523,36 @@ void __init xen_remap_memory(void) | |||
504 | } | 523 | } |
505 | 524 | ||
506 | if (pfn_s != ~0UL && len) | 525 | if (pfn_s != ~0UL && len) |
507 | xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); | 526 | xen_del_extra_mem(pfn_s, len); |
508 | 527 | ||
509 | set_pte_mfn(buf, mfn_save, PAGE_KERNEL); | 528 | set_pte_mfn(buf, mfn_save, PAGE_KERNEL); |
510 | 529 | ||
511 | pr_info("Remapped %ld page(s)\n", remapped); | 530 | pr_info("Remapped %ld page(s)\n", remapped); |
512 | } | 531 | } |
513 | 532 | ||
533 | static unsigned long __init xen_get_pages_limit(void) | ||
534 | { | ||
535 | unsigned long limit; | ||
536 | |||
537 | #ifdef CONFIG_X86_32 | ||
538 | limit = GB(64) / PAGE_SIZE; | ||
539 | #else | ||
540 | limit = MAXMEM / PAGE_SIZE; | ||
541 | if (!xen_initial_domain() && xen_512gb_limit) | ||
542 | limit = GB(512) / PAGE_SIZE; | ||
543 | #endif | ||
544 | return limit; | ||
545 | } | ||
546 | |||
514 | static unsigned long __init xen_get_max_pages(void) | 547 | static unsigned long __init xen_get_max_pages(void) |
515 | { | 548 | { |
516 | unsigned long max_pages = MAX_DOMAIN_PAGES; | 549 | unsigned long max_pages, limit; |
517 | domid_t domid = DOMID_SELF; | 550 | domid_t domid = DOMID_SELF; |
518 | int ret; | 551 | int ret; |
519 | 552 | ||
553 | limit = xen_get_pages_limit(); | ||
554 | max_pages = limit; | ||
555 | |||
520 | /* | 556 | /* |
521 | * For the initial domain we use the maximum reservation as | 557 | * For the initial domain we use the maximum reservation as |
522 | * the maximum page. | 558 | * the maximum page. |
@@ -532,7 +568,7 @@ static unsigned long __init xen_get_max_pages(void) | |||
532 | max_pages = ret; | 568 | max_pages = ret; |
533 | } | 569 | } |
534 | 570 | ||
535 | return min(max_pages, MAX_DOMAIN_PAGES); | 571 | return min(max_pages, limit); |
536 | } | 572 | } |
537 | 573 | ||
538 | static void __init xen_align_and_add_e820_region(phys_addr_t start, | 574 | static void __init xen_align_and_add_e820_region(phys_addr_t start, |
@@ -549,39 +585,188 @@ static void __init xen_align_and_add_e820_region(phys_addr_t start, | |||
549 | e820_add_region(start, end - start, type); | 585 | e820_add_region(start, end - start, type); |
550 | } | 586 | } |
551 | 587 | ||
552 | static void __init xen_ignore_unusable(struct e820entry *list, size_t map_size) | 588 | static void __init xen_ignore_unusable(void) |
553 | { | 589 | { |
554 | struct e820entry *entry; | 590 | struct e820entry *entry = xen_e820_map; |
555 | unsigned int i; | 591 | unsigned int i; |
556 | 592 | ||
557 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 593 | for (i = 0; i < xen_e820_map_entries; i++, entry++) { |
558 | if (entry->type == E820_UNUSABLE) | 594 | if (entry->type == E820_UNUSABLE) |
559 | entry->type = E820_RAM; | 595 | entry->type = E820_RAM; |
560 | } | 596 | } |
561 | } | 597 | } |
562 | 598 | ||
599 | static unsigned long __init xen_count_remap_pages(unsigned long max_pfn) | ||
600 | { | ||
601 | unsigned long extra = 0; | ||
602 | unsigned long start_pfn, end_pfn; | ||
603 | const struct e820entry *entry = xen_e820_map; | ||
604 | int i; | ||
605 | |||
606 | end_pfn = 0; | ||
607 | for (i = 0; i < xen_e820_map_entries; i++, entry++) { | ||
608 | start_pfn = PFN_DOWN(entry->addr); | ||
609 | /* Adjacent regions on non-page boundaries handling! */ | ||
610 | end_pfn = min(end_pfn, start_pfn); | ||
611 | |||
612 | if (start_pfn >= max_pfn) | ||
613 | return extra + max_pfn - end_pfn; | ||
614 | |||
615 | /* Add any holes in map to result. */ | ||
616 | extra += start_pfn - end_pfn; | ||
617 | |||
618 | end_pfn = PFN_UP(entry->addr + entry->size); | ||
619 | end_pfn = min(end_pfn, max_pfn); | ||
620 | |||
621 | if (entry->type != E820_RAM) | ||
622 | extra += end_pfn - start_pfn; | ||
623 | } | ||
624 | |||
625 | return extra; | ||
626 | } | ||
627 | |||
628 | bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size) | ||
629 | { | ||
630 | struct e820entry *entry; | ||
631 | unsigned mapcnt; | ||
632 | phys_addr_t end; | ||
633 | |||
634 | if (!size) | ||
635 | return false; | ||
636 | |||
637 | end = start + size; | ||
638 | entry = xen_e820_map; | ||
639 | |||
640 | for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++) { | ||
641 | if (entry->type == E820_RAM && entry->addr <= start && | ||
642 | (entry->addr + entry->size) >= end) | ||
643 | return false; | ||
644 | |||
645 | entry++; | ||
646 | } | ||
647 | |||
648 | return true; | ||
649 | } | ||
650 | |||
651 | /* | ||
652 | * Find a free area in physical memory not yet reserved and compliant with | ||
653 | * E820 map. | ||
654 | * Used to relocate pre-allocated areas like initrd or p2m list which are in | ||
655 | * conflict with the to be used E820 map. | ||
656 | * In case no area is found, return 0. Otherwise return the physical address | ||
657 | * of the area which is already reserved for convenience. | ||
658 | */ | ||
659 | phys_addr_t __init xen_find_free_area(phys_addr_t size) | ||
660 | { | ||
661 | unsigned mapcnt; | ||
662 | phys_addr_t addr, start; | ||
663 | struct e820entry *entry = xen_e820_map; | ||
664 | |||
665 | for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++, entry++) { | ||
666 | if (entry->type != E820_RAM || entry->size < size) | ||
667 | continue; | ||
668 | start = entry->addr; | ||
669 | for (addr = start; addr < start + size; addr += PAGE_SIZE) { | ||
670 | if (!memblock_is_reserved(addr)) | ||
671 | continue; | ||
672 | start = addr + PAGE_SIZE; | ||
673 | if (start + size > entry->addr + entry->size) | ||
674 | break; | ||
675 | } | ||
676 | if (addr >= start + size) { | ||
677 | memblock_reserve(start, size); | ||
678 | return start; | ||
679 | } | ||
680 | } | ||
681 | |||
682 | return 0; | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * Like memcpy, but with physical addresses for dest and src. | ||
687 | */ | ||
688 | static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src, | ||
689 | phys_addr_t n) | ||
690 | { | ||
691 | phys_addr_t dest_off, src_off, dest_len, src_len, len; | ||
692 | void *from, *to; | ||
693 | |||
694 | while (n) { | ||
695 | dest_off = dest & ~PAGE_MASK; | ||
696 | src_off = src & ~PAGE_MASK; | ||
697 | dest_len = n; | ||
698 | if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off) | ||
699 | dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off; | ||
700 | src_len = n; | ||
701 | if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off) | ||
702 | src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off; | ||
703 | len = min(dest_len, src_len); | ||
704 | to = early_memremap(dest - dest_off, dest_len + dest_off); | ||
705 | from = early_memremap(src - src_off, src_len + src_off); | ||
706 | memcpy(to, from, len); | ||
707 | early_memunmap(to, dest_len + dest_off); | ||
708 | early_memunmap(from, src_len + src_off); | ||
709 | n -= len; | ||
710 | dest += len; | ||
711 | src += len; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | /* | ||
716 | * Reserve Xen mfn_list. | ||
717 | */ | ||
718 | static void __init xen_reserve_xen_mfnlist(void) | ||
719 | { | ||
720 | phys_addr_t start, size; | ||
721 | |||
722 | if (xen_start_info->mfn_list >= __START_KERNEL_map) { | ||
723 | start = __pa(xen_start_info->mfn_list); | ||
724 | size = PFN_ALIGN(xen_start_info->nr_pages * | ||
725 | sizeof(unsigned long)); | ||
726 | } else { | ||
727 | start = PFN_PHYS(xen_start_info->first_p2m_pfn); | ||
728 | size = PFN_PHYS(xen_start_info->nr_p2m_frames); | ||
729 | } | ||
730 | |||
731 | if (!xen_is_e820_reserved(start, size)) { | ||
732 | memblock_reserve(start, size); | ||
733 | return; | ||
734 | } | ||
735 | |||
736 | #ifdef CONFIG_X86_32 | ||
737 | /* | ||
738 | * Relocating the p2m on 32 bit system to an arbitrary virtual address | ||
739 | * is not supported, so just give up. | ||
740 | */ | ||
741 | xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n"); | ||
742 | BUG(); | ||
743 | #else | ||
744 | xen_relocate_p2m(); | ||
745 | #endif | ||
746 | } | ||
747 | |||
563 | /** | 748 | /** |
564 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 749 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
565 | **/ | 750 | **/ |
566 | char * __init xen_memory_setup(void) | 751 | char * __init xen_memory_setup(void) |
567 | { | 752 | { |
568 | static struct e820entry map[E820MAX] __initdata; | 753 | unsigned long max_pfn, pfn_s, n_pfns; |
569 | 754 | phys_addr_t mem_end, addr, size, chunk_size; | |
570 | unsigned long max_pfn = xen_start_info->nr_pages; | 755 | u32 type; |
571 | phys_addr_t mem_end; | ||
572 | int rc; | 756 | int rc; |
573 | struct xen_memory_map memmap; | 757 | struct xen_memory_map memmap; |
574 | unsigned long max_pages; | 758 | unsigned long max_pages; |
575 | unsigned long extra_pages = 0; | 759 | unsigned long extra_pages = 0; |
576 | unsigned long remapped_pages; | ||
577 | int i; | 760 | int i; |
578 | int op; | 761 | int op; |
579 | 762 | ||
580 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); | 763 | xen_parse_512gb(); |
764 | max_pfn = xen_get_pages_limit(); | ||
765 | max_pfn = min(max_pfn, xen_start_info->nr_pages); | ||
581 | mem_end = PFN_PHYS(max_pfn); | 766 | mem_end = PFN_PHYS(max_pfn); |
582 | 767 | ||
583 | memmap.nr_entries = E820MAX; | 768 | memmap.nr_entries = E820MAX; |
584 | set_xen_guest_handle(memmap.buffer, map); | 769 | set_xen_guest_handle(memmap.buffer, xen_e820_map); |
585 | 770 | ||
586 | op = xen_initial_domain() ? | 771 | op = xen_initial_domain() ? |
587 | XENMEM_machine_memory_map : | 772 | XENMEM_machine_memory_map : |
@@ -590,15 +775,16 @@ char * __init xen_memory_setup(void) | |||
590 | if (rc == -ENOSYS) { | 775 | if (rc == -ENOSYS) { |
591 | BUG_ON(xen_initial_domain()); | 776 | BUG_ON(xen_initial_domain()); |
592 | memmap.nr_entries = 1; | 777 | memmap.nr_entries = 1; |
593 | map[0].addr = 0ULL; | 778 | xen_e820_map[0].addr = 0ULL; |
594 | map[0].size = mem_end; | 779 | xen_e820_map[0].size = mem_end; |
595 | /* 8MB slack (to balance backend allocations). */ | 780 | /* 8MB slack (to balance backend allocations). */ |
596 | map[0].size += 8ULL << 20; | 781 | xen_e820_map[0].size += 8ULL << 20; |
597 | map[0].type = E820_RAM; | 782 | xen_e820_map[0].type = E820_RAM; |
598 | rc = 0; | 783 | rc = 0; |
599 | } | 784 | } |
600 | BUG_ON(rc); | 785 | BUG_ON(rc); |
601 | BUG_ON(memmap.nr_entries == 0); | 786 | BUG_ON(memmap.nr_entries == 0); |
787 | xen_e820_map_entries = memmap.nr_entries; | ||
602 | 788 | ||
603 | /* | 789 | /* |
604 | * Xen won't allow a 1:1 mapping to be created to UNUSABLE | 790 | * Xen won't allow a 1:1 mapping to be created to UNUSABLE |
@@ -609,24 +795,19 @@ char * __init xen_memory_setup(void) | |||
609 | * a patch in the future. | 795 | * a patch in the future. |
610 | */ | 796 | */ |
611 | if (xen_initial_domain()) | 797 | if (xen_initial_domain()) |
612 | xen_ignore_unusable(map, memmap.nr_entries); | 798 | xen_ignore_unusable(); |
613 | 799 | ||
614 | /* Make sure the Xen-supplied memory map is well-ordered. */ | 800 | /* Make sure the Xen-supplied memory map is well-ordered. */ |
615 | sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); | 801 | sanitize_e820_map(xen_e820_map, xen_e820_map_entries, |
802 | &xen_e820_map_entries); | ||
616 | 803 | ||
617 | max_pages = xen_get_max_pages(); | 804 | max_pages = xen_get_max_pages(); |
618 | if (max_pages > max_pfn) | ||
619 | extra_pages += max_pages - max_pfn; | ||
620 | 805 | ||
621 | /* | 806 | /* How many extra pages do we need due to remapping? */ |
622 | * Set identity map on non-RAM pages and prepare remapping the | 807 | max_pages += xen_count_remap_pages(max_pfn); |
623 | * underlying RAM. | ||
624 | */ | ||
625 | xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, | ||
626 | &xen_released_pages, &remapped_pages); | ||
627 | 808 | ||
628 | extra_pages += xen_released_pages; | 809 | if (max_pages > max_pfn) |
629 | extra_pages += remapped_pages; | 810 | extra_pages += max_pages - max_pfn; |
630 | 811 | ||
631 | /* | 812 | /* |
632 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | 813 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO |
@@ -635,46 +816,54 @@ char * __init xen_memory_setup(void) | |||
635 | * is limited to the max size of lowmem, so that it doesn't | 816 | * is limited to the max size of lowmem, so that it doesn't |
636 | * get completely filled. | 817 | * get completely filled. |
637 | * | 818 | * |
819 | * Make sure we have no memory above max_pages, as this area | ||
820 | * isn't handled by the p2m management. | ||
821 | * | ||
638 | * In principle there could be a problem in lowmem systems if | 822 | * In principle there could be a problem in lowmem systems if |
639 | * the initial memory is also very large with respect to | 823 | * the initial memory is also very large with respect to |
640 | * lowmem, but we won't try to deal with that here. | 824 | * lowmem, but we won't try to deal with that here. |
641 | */ | 825 | */ |
642 | extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), | 826 | extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), |
643 | extra_pages); | 827 | extra_pages, max_pages - max_pfn); |
644 | i = 0; | 828 | i = 0; |
645 | while (i < memmap.nr_entries) { | 829 | addr = xen_e820_map[0].addr; |
646 | phys_addr_t addr = map[i].addr; | 830 | size = xen_e820_map[0].size; |
647 | phys_addr_t size = map[i].size; | 831 | while (i < xen_e820_map_entries) { |
648 | u32 type = map[i].type; | 832 | chunk_size = size; |
833 | type = xen_e820_map[i].type; | ||
649 | 834 | ||
650 | if (type == E820_RAM) { | 835 | if (type == E820_RAM) { |
651 | if (addr < mem_end) { | 836 | if (addr < mem_end) { |
652 | size = min(size, mem_end - addr); | 837 | chunk_size = min(size, mem_end - addr); |
653 | } else if (extra_pages) { | 838 | } else if (extra_pages) { |
654 | size = min(size, PFN_PHYS(extra_pages)); | 839 | chunk_size = min(size, PFN_PHYS(extra_pages)); |
655 | extra_pages -= PFN_DOWN(size); | 840 | pfn_s = PFN_UP(addr); |
656 | xen_add_extra_mem(addr, size); | 841 | n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s; |
657 | xen_max_p2m_pfn = PFN_DOWN(addr + size); | 842 | extra_pages -= n_pfns; |
843 | xen_add_extra_mem(pfn_s, n_pfns); | ||
844 | xen_max_p2m_pfn = pfn_s + n_pfns; | ||
658 | } else | 845 | } else |
659 | type = E820_UNUSABLE; | 846 | type = E820_UNUSABLE; |
660 | } | 847 | } |
661 | 848 | ||
662 | xen_align_and_add_e820_region(addr, size, type); | 849 | xen_align_and_add_e820_region(addr, chunk_size, type); |
663 | 850 | ||
664 | map[i].addr += size; | 851 | addr += chunk_size; |
665 | map[i].size -= size; | 852 | size -= chunk_size; |
666 | if (map[i].size == 0) | 853 | if (size == 0) { |
667 | i++; | 854 | i++; |
855 | if (i < xen_e820_map_entries) { | ||
856 | addr = xen_e820_map[i].addr; | ||
857 | size = xen_e820_map[i].size; | ||
858 | } | ||
859 | } | ||
668 | } | 860 | } |
669 | 861 | ||
670 | /* | 862 | /* |
671 | * Set the rest as identity mapped, in case PCI BARs are | 863 | * Set the rest as identity mapped, in case PCI BARs are |
672 | * located here. | 864 | * located here. |
673 | * | ||
674 | * PFNs above MAX_P2M_PFN are considered identity mapped as | ||
675 | * well. | ||
676 | */ | 865 | */ |
677 | set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul); | 866 | set_phys_range_identity(addr / PAGE_SIZE, ~0ul); |
678 | 867 | ||
679 | /* | 868 | /* |
680 | * In domU, the ISA region is normal, usable memory, but we | 869 | * In domU, the ISA region is normal, usable memory, but we |
@@ -684,34 +873,53 @@ char * __init xen_memory_setup(void) | |||
684 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, | 873 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, |
685 | E820_RESERVED); | 874 | E820_RESERVED); |
686 | 875 | ||
876 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
877 | |||
687 | /* | 878 | /* |
688 | * Reserve Xen bits: | 879 | * Check whether the kernel itself conflicts with the target E820 map. |
689 | * - mfn_list | 880 | * Failing now is better than running into weird problems later due |
690 | * - xen_start_info | 881 | * to relocating (and even reusing) pages with kernel text or data. |
691 | * See comment above "struct start_info" in <xen/interface/xen.h> | ||
692 | * We tried to make the the memblock_reserve more selective so | ||
693 | * that it would be clear what region is reserved. Sadly we ran | ||
694 | * in the problem wherein on a 64-bit hypervisor with a 32-bit | ||
695 | * initial domain, the pt_base has the cr3 value which is not | ||
696 | * neccessarily where the pagetable starts! As Jan put it: " | ||
697 | * Actually, the adjustment turns out to be correct: The page | ||
698 | * tables for a 32-on-64 dom0 get allocated in the order "first L1", | ||
699 | * "first L2", "first L3", so the offset to the page table base is | ||
700 | * indeed 2. When reading xen/include/public/xen.h's comment | ||
701 | * very strictly, this is not a violation (since there nothing is said | ||
702 | * that the first thing in the page table space is pointed to by | ||
703 | * pt_base; I admit that this seems to be implied though, namely | ||
704 | * do I think that it is implied that the page table space is the | ||
705 | * range [pt_base, pt_base + nt_pt_frames), whereas that | ||
706 | * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), | ||
707 | * which - without a priori knowledge - the kernel would have | ||
708 | * difficulty to figure out)." - so lets just fall back to the | ||
709 | * easy way and reserve the whole region. | ||
710 | */ | 882 | */ |
711 | memblock_reserve(__pa(xen_start_info->mfn_list), | 883 | if (xen_is_e820_reserved(__pa_symbol(_text), |
712 | xen_start_info->pt_base - xen_start_info->mfn_list); | 884 | __pa_symbol(__bss_stop) - __pa_symbol(_text))) { |
885 | xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n"); | ||
886 | BUG(); | ||
887 | } | ||
713 | 888 | ||
714 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 889 | /* |
890 | * Check for a conflict of the hypervisor supplied page tables with | ||
891 | * the target E820 map. | ||
892 | */ | ||
893 | xen_pt_check_e820(); | ||
894 | |||
895 | xen_reserve_xen_mfnlist(); | ||
896 | |||
897 | /* Check for a conflict of the initrd with the target E820 map. */ | ||
898 | if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image, | ||
899 | boot_params.hdr.ramdisk_size)) { | ||
900 | phys_addr_t new_area, start, size; | ||
901 | |||
902 | new_area = xen_find_free_area(boot_params.hdr.ramdisk_size); | ||
903 | if (!new_area) { | ||
904 | xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n"); | ||
905 | BUG(); | ||
906 | } | ||
907 | |||
908 | start = boot_params.hdr.ramdisk_image; | ||
909 | size = boot_params.hdr.ramdisk_size; | ||
910 | xen_phys_memcpy(new_area, start, size); | ||
911 | pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n", | ||
912 | start, start + size, new_area, new_area + size); | ||
913 | memblock_free(start, size); | ||
914 | boot_params.hdr.ramdisk_image = new_area; | ||
915 | boot_params.ext_ramdisk_image = new_area >> 32; | ||
916 | } | ||
917 | |||
918 | /* | ||
919 | * Set identity map on non-RAM pages and prepare remapping the | ||
920 | * underlying RAM. | ||
921 | */ | ||
922 | xen_set_identity_and_remap(max_pfn); | ||
715 | 923 | ||
716 | return "Xen"; | 924 | return "Xen"; |
717 | } | 925 | } |
@@ -721,26 +929,30 @@ char * __init xen_memory_setup(void) | |||
721 | */ | 929 | */ |
722 | char * __init xen_auto_xlated_memory_setup(void) | 930 | char * __init xen_auto_xlated_memory_setup(void) |
723 | { | 931 | { |
724 | static struct e820entry map[E820MAX] __initdata; | ||
725 | |||
726 | struct xen_memory_map memmap; | 932 | struct xen_memory_map memmap; |
727 | int i; | 933 | int i; |
728 | int rc; | 934 | int rc; |
729 | 935 | ||
730 | memmap.nr_entries = E820MAX; | 936 | memmap.nr_entries = E820MAX; |
731 | set_xen_guest_handle(memmap.buffer, map); | 937 | set_xen_guest_handle(memmap.buffer, xen_e820_map); |
732 | 938 | ||
733 | rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); | 939 | rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); |
734 | if (rc < 0) | 940 | if (rc < 0) |
735 | panic("No memory map (%d)\n", rc); | 941 | panic("No memory map (%d)\n", rc); |
736 | 942 | ||
737 | sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries); | 943 | xen_e820_map_entries = memmap.nr_entries; |
944 | |||
945 | sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map), | ||
946 | &xen_e820_map_entries); | ||
738 | 947 | ||
739 | for (i = 0; i < memmap.nr_entries; i++) | 948 | for (i = 0; i < xen_e820_map_entries; i++) |
740 | e820_add_region(map[i].addr, map[i].size, map[i].type); | 949 | e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size, |
950 | xen_e820_map[i].type); | ||
741 | 951 | ||
742 | memblock_reserve(__pa(xen_start_info->mfn_list), | 952 | /* Remove p2m info, it is not needed. */ |
743 | xen_start_info->pt_base - xen_start_info->mfn_list); | 953 | xen_start_info->mfn_list = 0; |
954 | xen_start_info->first_p2m_pfn = 0; | ||
955 | xen_start_info->nr_p2m_frames = 0; | ||
744 | 956 | ||
745 | return "Xen"; | 957 | return "Xen"; |
746 | } | 958 | } |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 86484384492e..2a9ff7342791 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #include <xen/interface/xen.h> | 27 | #include <xen/interface/xen.h> |
28 | #include <xen/interface/vcpu.h> | 28 | #include <xen/interface/vcpu.h> |
29 | #include <xen/interface/xenpmu.h> | ||
29 | 30 | ||
30 | #include <asm/xen/interface.h> | 31 | #include <asm/xen/interface.h> |
31 | #include <asm/xen/hypercall.h> | 32 | #include <asm/xen/hypercall.h> |
@@ -38,6 +39,7 @@ | |||
38 | #include "xen-ops.h" | 39 | #include "xen-ops.h" |
39 | #include "mmu.h" | 40 | #include "mmu.h" |
40 | #include "smp.h" | 41 | #include "smp.h" |
42 | #include "pmu.h" | ||
41 | 43 | ||
42 | cpumask_var_t xen_cpu_initialized_map; | 44 | cpumask_var_t xen_cpu_initialized_map; |
43 | 45 | ||
@@ -50,6 +52,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 }; | |||
50 | static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 }; | 52 | static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 }; |
51 | static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; | 53 | static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; |
52 | static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 }; | 54 | static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 }; |
55 | static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; | ||
53 | 56 | ||
54 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | 57 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); |
55 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); | 58 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); |
@@ -148,11 +151,18 @@ static void xen_smp_intr_free(unsigned int cpu) | |||
148 | kfree(per_cpu(xen_irq_work, cpu).name); | 151 | kfree(per_cpu(xen_irq_work, cpu).name); |
149 | per_cpu(xen_irq_work, cpu).name = NULL; | 152 | per_cpu(xen_irq_work, cpu).name = NULL; |
150 | } | 153 | } |
154 | |||
155 | if (per_cpu(xen_pmu_irq, cpu).irq >= 0) { | ||
156 | unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL); | ||
157 | per_cpu(xen_pmu_irq, cpu).irq = -1; | ||
158 | kfree(per_cpu(xen_pmu_irq, cpu).name); | ||
159 | per_cpu(xen_pmu_irq, cpu).name = NULL; | ||
160 | } | ||
151 | }; | 161 | }; |
152 | static int xen_smp_intr_init(unsigned int cpu) | 162 | static int xen_smp_intr_init(unsigned int cpu) |
153 | { | 163 | { |
154 | int rc; | 164 | int rc; |
155 | char *resched_name, *callfunc_name, *debug_name; | 165 | char *resched_name, *callfunc_name, *debug_name, *pmu_name; |
156 | 166 | ||
157 | resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); | 167 | resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); |
158 | rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, | 168 | rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, |
@@ -218,6 +228,18 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
218 | per_cpu(xen_irq_work, cpu).irq = rc; | 228 | per_cpu(xen_irq_work, cpu).irq = rc; |
219 | per_cpu(xen_irq_work, cpu).name = callfunc_name; | 229 | per_cpu(xen_irq_work, cpu).name = callfunc_name; |
220 | 230 | ||
231 | if (is_xen_pmu(cpu)) { | ||
232 | pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); | ||
233 | rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, | ||
234 | xen_pmu_irq_handler, | ||
235 | IRQF_PERCPU|IRQF_NOBALANCING, | ||
236 | pmu_name, NULL); | ||
237 | if (rc < 0) | ||
238 | goto fail; | ||
239 | per_cpu(xen_pmu_irq, cpu).irq = rc; | ||
240 | per_cpu(xen_pmu_irq, cpu).name = pmu_name; | ||
241 | } | ||
242 | |||
221 | return 0; | 243 | return 0; |
222 | 244 | ||
223 | fail: | 245 | fail: |
@@ -335,6 +357,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
335 | } | 357 | } |
336 | set_cpu_sibling_map(0); | 358 | set_cpu_sibling_map(0); |
337 | 359 | ||
360 | xen_pmu_init(0); | ||
361 | |||
338 | if (xen_smp_intr_init(0)) | 362 | if (xen_smp_intr_init(0)) |
339 | BUG(); | 363 | BUG(); |
340 | 364 | ||
@@ -462,6 +486,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
462 | if (rc) | 486 | if (rc) |
463 | return rc; | 487 | return rc; |
464 | 488 | ||
489 | xen_pmu_init(cpu); | ||
490 | |||
465 | rc = xen_smp_intr_init(cpu); | 491 | rc = xen_smp_intr_init(cpu); |
466 | if (rc) | 492 | if (rc) |
467 | return rc; | 493 | return rc; |
@@ -503,6 +529,7 @@ static void xen_cpu_die(unsigned int cpu) | |||
503 | xen_smp_intr_free(cpu); | 529 | xen_smp_intr_free(cpu); |
504 | xen_uninit_lock_cpu(cpu); | 530 | xen_uninit_lock_cpu(cpu); |
505 | xen_teardown_timer(cpu); | 531 | xen_teardown_timer(cpu); |
532 | xen_pmu_finish(cpu); | ||
506 | } | 533 | } |
507 | } | 534 | } |
508 | 535 | ||
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 53b4c0811f4f..feddabdab448 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include "xen-ops.h" | 12 | #include "xen-ops.h" |
13 | #include "mmu.h" | 13 | #include "mmu.h" |
14 | #include "pmu.h" | ||
14 | 15 | ||
15 | static void xen_pv_pre_suspend(void) | 16 | static void xen_pv_pre_suspend(void) |
16 | { | 17 | { |
@@ -67,16 +68,26 @@ static void xen_pv_post_suspend(int suspend_cancelled) | |||
67 | 68 | ||
68 | void xen_arch_pre_suspend(void) | 69 | void xen_arch_pre_suspend(void) |
69 | { | 70 | { |
70 | if (xen_pv_domain()) | 71 | int cpu; |
71 | xen_pv_pre_suspend(); | 72 | |
73 | for_each_online_cpu(cpu) | ||
74 | xen_pmu_finish(cpu); | ||
75 | |||
76 | if (xen_pv_domain()) | ||
77 | xen_pv_pre_suspend(); | ||
72 | } | 78 | } |
73 | 79 | ||
74 | void xen_arch_post_suspend(int cancelled) | 80 | void xen_arch_post_suspend(int cancelled) |
75 | { | 81 | { |
76 | if (xen_pv_domain()) | 82 | int cpu; |
77 | xen_pv_post_suspend(cancelled); | 83 | |
78 | else | 84 | if (xen_pv_domain()) |
79 | xen_hvm_post_suspend(cancelled); | 85 | xen_pv_post_suspend(cancelled); |
86 | else | ||
87 | xen_hvm_post_suspend(cancelled); | ||
88 | |||
89 | for_each_online_cpu(cpu) | ||
90 | xen_pmu_init(cpu); | ||
80 | } | 91 | } |
81 | 92 | ||
82 | static void xen_vcpu_notify_restore(void *data) | 93 | static void xen_vcpu_notify_restore(void *data) |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 8afdfccf6086..b65f59a358a2 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -104,6 +104,8 @@ ENTRY(hypercall_page) | |||
104 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET) | 104 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET) |
105 | #else | 105 | #else |
106 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) | 106 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) |
107 | /* Map the p2m table to a 512GB-aligned user address. */ | ||
108 | ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE) | ||
107 | #endif | 109 | #endif |
108 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) | 110 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) |
109 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) | 111 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 2292721b1d10..1399423f3418 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -35,13 +35,20 @@ void xen_build_mfn_list_list(void); | |||
35 | void xen_setup_machphys_mapping(void); | 35 | void xen_setup_machphys_mapping(void); |
36 | void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | 36 | void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
37 | void xen_reserve_top(void); | 37 | void xen_reserve_top(void); |
38 | void __init xen_reserve_special_pages(void); | ||
39 | void __init xen_pt_check_e820(void); | ||
38 | 40 | ||
39 | void xen_mm_pin_all(void); | 41 | void xen_mm_pin_all(void); |
40 | void xen_mm_unpin_all(void); | 42 | void xen_mm_unpin_all(void); |
43 | #ifdef CONFIG_X86_64 | ||
44 | void __init xen_relocate_p2m(void); | ||
45 | #endif | ||
41 | 46 | ||
47 | bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size); | ||
42 | unsigned long __ref xen_chk_extra_mem(unsigned long pfn); | 48 | unsigned long __ref xen_chk_extra_mem(unsigned long pfn); |
43 | void __init xen_inv_extra_mem(void); | 49 | void __init xen_inv_extra_mem(void); |
44 | void __init xen_remap_memory(void); | 50 | void __init xen_remap_memory(void); |
51 | phys_addr_t __init xen_find_free_area(phys_addr_t size); | ||
45 | char * __init xen_memory_setup(void); | 52 | char * __init xen_memory_setup(void); |
46 | char * xen_auto_xlated_memory_setup(void); | 53 | char * xen_auto_xlated_memory_setup(void); |
47 | void __init xen_arch_setup(void); | 54 | void __init xen_arch_setup(void); |