diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 14:46:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 14:46:48 -0400 |
commit | 752240e74d650faa24425adc523f1308973ea51c (patch) | |
tree | 47657b7d468352424f844156883302653252f70e /arch/x86/include | |
parent | b8cb642af98216fe6eeca1525345b8a5c9d7c9a4 (diff) | |
parent | 626d7508664c4bc8e67f496da4387ecd0c410b8c (diff) |
Merge tag 'for-linus-4.3-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from David Vrabel:
"Xen features and fixes for 4.3:
- Convert xen-blkfront to the multiqueue API
- [arm] Support binding event channels to different VCPUs.
- [x86] Support > 512 GiB in a PV guests (off by default as such a
guest cannot be migrated with the current toolstack).
- [x86] PMU support for PV dom0 (limited support for using perf with
Xen and other guests)"
* tag 'for-linus-4.3-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (33 commits)
xen: switch extra memory accounting to use pfns
xen: limit memory to architectural maximum
xen: avoid another early crash of memory limited dom0
xen: avoid early crash of memory limited dom0
arm/xen: Remove helpers which are PV specific
xen/x86: Don't try to set PCE bit in CR4
xen/PMU: PMU emulation code
xen/PMU: Intercept PMU-related MSR and APIC accesses
xen/PMU: Describe vendor-specific PMU registers
xen/PMU: Initialization code for Xen PMU
xen/PMU: Sysfs interface for setting Xen PMU mode
xen: xensyms support
xen: remove no longer needed p2m.h
xen: allow more than 512 GB of RAM for 64 bit pv-domains
xen: move p2m list if conflicting with e820 map
xen: add explicit memblock_reserve() calls for special pages
mm: provide early_memremap_ro to establish read-only mapping
xen: check for initrd conflicting with e820 map
xen: check pre-allocated page tables for conflict with memory map
xen: check for kernel memory conflicting with memory layout
...
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/xen/events.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypercall.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/interface.h | 219 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/page.h | 8 |
4 files changed, 230 insertions, 14 deletions
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 608a79d5a466..e6911caf5bbf 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h | |||
@@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) | |||
20 | /* No need for a barrier -- XCHG is a barrier on x86. */ | 20 | /* No need for a barrier -- XCHG is a barrier on x86. */ |
21 | #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) | 21 | #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) |
22 | 22 | ||
23 | extern int xen_have_vector_callback; | ||
24 | |||
25 | /* | ||
26 | * Events delivered via platform PCI interrupts are always | ||
27 | * routed to vcpu 0 and hence cannot be rebound. | ||
28 | */ | ||
29 | static inline bool xen_support_evtchn_rebind(void) | ||
30 | { | ||
31 | return (!xen_hvm_domain() || xen_have_vector_callback); | ||
32 | } | ||
33 | |||
23 | #endif /* _ASM_X86_XEN_EVENTS_H */ | 34 | #endif /* _ASM_X86_XEN_EVENTS_H */ |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index ca08a27b90b3..83aea8055119 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -465,6 +465,12 @@ HYPERVISOR_tmem_op( | |||
465 | return _hypercall1(int, tmem_op, op); | 465 | return _hypercall1(int, tmem_op, op); |
466 | } | 466 | } |
467 | 467 | ||
468 | static inline int | ||
469 | HYPERVISOR_xenpmu_op(unsigned int op, void *arg) | ||
470 | { | ||
471 | return _hypercall2(int, xenpmu_op, op, arg); | ||
472 | } | ||
473 | |||
468 | static inline void | 474 | static inline void |
469 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) | 475 | MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) |
470 | { | 476 | { |
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 3400dbaec3c3..62ca03ef5c65 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h | |||
@@ -3,12 +3,38 @@ | |||
3 | * | 3 | * |
4 | * Guest OS interface to x86 Xen. | 4 | * Guest OS interface to x86 Xen. |
5 | * | 5 | * |
6 | * Copyright (c) 2004, K A Fraser | 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | * of this software and associated documentation files (the "Software"), to | ||
8 | * deal in the Software without restriction, including without limitation the | ||
9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
10 | * sell copies of the Software, and to permit persons to whom the Software is | ||
11 | * furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | * | ||
24 | * Copyright (c) 2004-2006, K A Fraser | ||
7 | */ | 25 | */ |
8 | 26 | ||
9 | #ifndef _ASM_X86_XEN_INTERFACE_H | 27 | #ifndef _ASM_X86_XEN_INTERFACE_H |
10 | #define _ASM_X86_XEN_INTERFACE_H | 28 | #define _ASM_X86_XEN_INTERFACE_H |
11 | 29 | ||
30 | /* | ||
31 | * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field | ||
32 | * in a struct in memory. | ||
33 | * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an | ||
34 | * hypercall argument. | ||
35 | * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but | ||
36 | * they might not be on other architectures. | ||
37 | */ | ||
12 | #ifdef __XEN__ | 38 | #ifdef __XEN__ |
13 | #define __DEFINE_GUEST_HANDLE(name, type) \ | 39 | #define __DEFINE_GUEST_HANDLE(name, type) \ |
14 | typedef struct { type *p; } __guest_handle_ ## name | 40 | typedef struct { type *p; } __guest_handle_ ## name |
@@ -88,13 +114,16 @@ DEFINE_GUEST_HANDLE(xen_ulong_t); | |||
88 | * start of the GDT because some stupid OSes export hard-coded selector values | 114 | * start of the GDT because some stupid OSes export hard-coded selector values |
89 | * in their ABI. These hard-coded values are always near the start of the GDT, | 115 | * in their ABI. These hard-coded values are always near the start of the GDT, |
90 | * so Xen places itself out of the way, at the far end of the GDT. | 116 | * so Xen places itself out of the way, at the far end of the GDT. |
117 | * | ||
118 | * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op | ||
91 | */ | 119 | */ |
92 | #define FIRST_RESERVED_GDT_PAGE 14 | 120 | #define FIRST_RESERVED_GDT_PAGE 14 |
93 | #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) | 121 | #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) |
94 | #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) | 122 | #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) |
95 | 123 | ||
96 | /* | 124 | /* |
97 | * Send an array of these to HYPERVISOR_set_trap_table() | 125 | * Send an array of these to HYPERVISOR_set_trap_table(). |
126 | * Terminate the array with a sentinel entry, with traps[].address==0. | ||
98 | * The privilege level specifies which modes may enter a trap via a software | 127 | * The privilege level specifies which modes may enter a trap via a software |
99 | * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate | 128 | * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate |
100 | * privilege levels as follows: | 129 | * privilege levels as follows: |
@@ -118,10 +147,41 @@ struct trap_info { | |||
118 | DEFINE_GUEST_HANDLE_STRUCT(trap_info); | 147 | DEFINE_GUEST_HANDLE_STRUCT(trap_info); |
119 | 148 | ||
120 | struct arch_shared_info { | 149 | struct arch_shared_info { |
121 | unsigned long max_pfn; /* max pfn that appears in table */ | 150 | /* |
122 | /* Frame containing list of mfns containing list of mfns containing p2m. */ | 151 | * Number of valid entries in the p2m table(s) anchored at |
123 | unsigned long pfn_to_mfn_frame_list_list; | 152 | * pfn_to_mfn_frame_list_list and/or p2m_vaddr. |
124 | unsigned long nmi_reason; | 153 | */ |
154 | unsigned long max_pfn; | ||
155 | /* | ||
156 | * Frame containing list of mfns containing list of mfns containing p2m. | ||
157 | * A value of 0 indicates it has not yet been set up, ~0 indicates it | ||
158 | * has been set to invalid e.g. due to the p2m being too large for the | ||
159 | * 3-level p2m tree. In this case the linear mapper p2m list anchored | ||
160 | * at p2m_vaddr is to be used. | ||
161 | */ | ||
162 | xen_pfn_t pfn_to_mfn_frame_list_list; | ||
163 | unsigned long nmi_reason; | ||
164 | /* | ||
165 | * Following three fields are valid if p2m_cr3 contains a value | ||
166 | * different from 0. | ||
167 | * p2m_cr3 is the root of the address space where p2m_vaddr is valid. | ||
168 | * p2m_cr3 is in the same format as a cr3 value in the vcpu register | ||
169 | * state and holds the folded machine frame number (via xen_pfn_to_cr3) | ||
170 | * of a L3 or L4 page table. | ||
171 | * p2m_vaddr holds the virtual address of the linear p2m list. All | ||
172 | * entries in the range [0...max_pfn[ are accessible via this pointer. | ||
173 | * p2m_generation will be incremented by the guest before and after each | ||
174 | * change of the mappings of the p2m list. p2m_generation starts at 0 | ||
175 | * and a value with the least significant bit set indicates that a | ||
176 | * mapping update is in progress. This allows guest external software | ||
177 | * (e.g. in Dom0) to verify that read mappings are consistent and | ||
178 | * whether they have changed since the last check. | ||
179 | * Modifying a p2m element in the linear p2m list is allowed via an | ||
180 | * atomic write only. | ||
181 | */ | ||
182 | unsigned long p2m_cr3; /* cr3 value of the p2m address space */ | ||
183 | unsigned long p2m_vaddr; /* virtual address of the p2m list */ | ||
184 | unsigned long p2m_generation; /* generation count of p2m mapping */ | ||
125 | }; | 185 | }; |
126 | #endif /* !__ASSEMBLY__ */ | 186 | #endif /* !__ASSEMBLY__ */ |
127 | 187 | ||
@@ -137,13 +197,31 @@ struct arch_shared_info { | |||
137 | /* | 197 | /* |
138 | * The following is all CPU context. Note that the fpu_ctxt block is filled | 198 | * The following is all CPU context. Note that the fpu_ctxt block is filled |
139 | * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. | 199 | * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. |
200 | * | ||
201 | * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise | ||
202 | * for HVM and PVH guests, not all information in this structure is updated: | ||
203 | * | ||
204 | * - For HVM guests, the structures read include: fpu_ctxt (if | ||
205 | * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] | ||
206 | * | ||
207 | * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to | ||
208 | * set cr3. All other fields not used should be set to 0. | ||
140 | */ | 209 | */ |
141 | struct vcpu_guest_context { | 210 | struct vcpu_guest_context { |
142 | /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ | 211 | /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ |
143 | struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ | 212 | struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ |
144 | #define VGCF_I387_VALID (1<<0) | 213 | #define VGCF_I387_VALID (1<<0) |
145 | #define VGCF_HVM_GUEST (1<<1) | 214 | #define VGCF_IN_KERNEL (1<<2) |
146 | #define VGCF_IN_KERNEL (1<<2) | 215 | #define _VGCF_i387_valid 0 |
216 | #define VGCF_i387_valid (1<<_VGCF_i387_valid) | ||
217 | #define _VGCF_in_kernel 2 | ||
218 | #define VGCF_in_kernel (1<<_VGCF_in_kernel) | ||
219 | #define _VGCF_failsafe_disables_events 3 | ||
220 | #define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) | ||
221 | #define _VGCF_syscall_disables_events 4 | ||
222 | #define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) | ||
223 | #define _VGCF_online 5 | ||
224 | #define VGCF_online (1<<_VGCF_online) | ||
147 | unsigned long flags; /* VGCF_* flags */ | 225 | unsigned long flags; /* VGCF_* flags */ |
148 | struct cpu_user_regs user_regs; /* User-level CPU registers */ | 226 | struct cpu_user_regs user_regs; /* User-level CPU registers */ |
149 | struct trap_info trap_ctxt[256]; /* Virtual IDT */ | 227 | struct trap_info trap_ctxt[256]; /* Virtual IDT */ |
@@ -172,6 +250,129 @@ struct vcpu_guest_context { | |||
172 | #endif | 250 | #endif |
173 | }; | 251 | }; |
174 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); | 252 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); |
253 | |||
254 | /* AMD PMU registers and structures */ | ||
255 | struct xen_pmu_amd_ctxt { | ||
256 | /* | ||
257 | * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). | ||
258 | * For PV(H) guests these fields are RO. | ||
259 | */ | ||
260 | uint32_t counters; | ||
261 | uint32_t ctrls; | ||
262 | |||
263 | /* Counter MSRs */ | ||
264 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | ||
265 | uint64_t regs[]; | ||
266 | #elif defined(__GNUC__) | ||
267 | uint64_t regs[0]; | ||
268 | #endif | ||
269 | }; | ||
270 | |||
271 | /* Intel PMU registers and structures */ | ||
272 | struct xen_pmu_cntr_pair { | ||
273 | uint64_t counter; | ||
274 | uint64_t control; | ||
275 | }; | ||
276 | |||
277 | struct xen_pmu_intel_ctxt { | ||
278 | /* | ||
279 | * Offsets to fixed and architectural counter MSRs (relative to | ||
280 | * xen_pmu_arch.c.intel). | ||
281 | * For PV(H) guests these fields are RO. | ||
282 | */ | ||
283 | uint32_t fixed_counters; | ||
284 | uint32_t arch_counters; | ||
285 | |||
286 | /* PMU registers */ | ||
287 | uint64_t global_ctrl; | ||
288 | uint64_t global_ovf_ctrl; | ||
289 | uint64_t global_status; | ||
290 | uint64_t fixed_ctrl; | ||
291 | uint64_t ds_area; | ||
292 | uint64_t pebs_enable; | ||
293 | uint64_t debugctl; | ||
294 | |||
295 | /* Fixed and architectural counter MSRs */ | ||
296 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | ||
297 | uint64_t regs[]; | ||
298 | #elif defined(__GNUC__) | ||
299 | uint64_t regs[0]; | ||
300 | #endif | ||
301 | }; | ||
302 | |||
303 | /* Sampled domain's registers */ | ||
304 | struct xen_pmu_regs { | ||
305 | uint64_t ip; | ||
306 | uint64_t sp; | ||
307 | uint64_t flags; | ||
308 | uint16_t cs; | ||
309 | uint16_t ss; | ||
310 | uint8_t cpl; | ||
311 | uint8_t pad[3]; | ||
312 | }; | ||
313 | |||
314 | /* PMU flags */ | ||
315 | #define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ | ||
316 | #define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ | ||
317 | #define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ | ||
318 | #define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ | ||
319 | |||
320 | /* | ||
321 | * Architecture-specific information describing state of the processor at | ||
322 | * the time of PMU interrupt. | ||
323 | * Fields of this structure marked as RW for guest should only be written by | ||
324 | * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the | ||
325 | * hypervisor during PMU interrupt). Hypervisor will read updated data in | ||
326 | * XENPMU_flush hypercall and clear PMU_CACHED bit. | ||
327 | */ | ||
328 | struct xen_pmu_arch { | ||
329 | union { | ||
330 | /* | ||
331 | * Processor's registers at the time of interrupt. | ||
332 | * WO for hypervisor, RO for guests. | ||
333 | */ | ||
334 | struct xen_pmu_regs regs; | ||
335 | /* | ||
336 | * Padding for adding new registers to xen_pmu_regs in | ||
337 | * the future | ||
338 | */ | ||
339 | #define XENPMU_REGS_PAD_SZ 64 | ||
340 | uint8_t pad[XENPMU_REGS_PAD_SZ]; | ||
341 | } r; | ||
342 | |||
343 | /* WO for hypervisor, RO for guest */ | ||
344 | uint64_t pmu_flags; | ||
345 | |||
346 | /* | ||
347 | * APIC LVTPC register. | ||
348 | * RW for both hypervisor and guest. | ||
349 | * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware | ||
350 | * during XENPMU_flush or XENPMU_lvtpc_set. | ||
351 | */ | ||
352 | union { | ||
353 | uint32_t lapic_lvtpc; | ||
354 | uint64_t pad; | ||
355 | } l; | ||
356 | |||
357 | /* | ||
358 | * Vendor-specific PMU registers. | ||
359 | * RW for both hypervisor and guest (see exceptions above). | ||
360 | * Guest's updates to this field are verified and then loaded by the | ||
361 | * hypervisor into hardware during XENPMU_flush | ||
362 | */ | ||
363 | union { | ||
364 | struct xen_pmu_amd_ctxt amd; | ||
365 | struct xen_pmu_intel_ctxt intel; | ||
366 | |||
367 | /* | ||
368 | * Padding for contexts (fixed parts only, does not include | ||
369 | * MSR banks that are specified by offsets) | ||
370 | */ | ||
371 | #define XENPMU_CTXT_PAD_SZ 128 | ||
372 | uint8_t pad[XENPMU_CTXT_PAD_SZ]; | ||
373 | } c; | ||
374 | }; | ||
375 | |||
175 | #endif /* !__ASSEMBLY__ */ | 376 | #endif /* !__ASSEMBLY__ */ |
176 | 377 | ||
177 | /* | 378 | /* |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c44a5d53e464..a3804fbe1f36 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -35,9 +35,7 @@ typedef struct xpaddr { | |||
35 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) | 35 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) |
36 | #define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) | 36 | #define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) |
37 | 37 | ||
38 | /* Maximum amount of memory we can handle in a domain in pages */ | 38 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
39 | #define MAX_DOMAIN_PAGES \ | ||
40 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) | ||
41 | 39 | ||
42 | extern unsigned long *machine_to_phys_mapping; | 40 | extern unsigned long *machine_to_phys_mapping; |
43 | extern unsigned long machine_to_phys_nr; | 41 | extern unsigned long machine_to_phys_nr; |
@@ -48,8 +46,8 @@ extern unsigned long xen_max_p2m_pfn; | |||
48 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 46 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
49 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 47 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
50 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 48 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
51 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, | 49 | extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
52 | unsigned long pfn_e); | 50 | unsigned long pfn_e); |
53 | 51 | ||
54 | extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, | 52 | extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
55 | struct gnttab_map_grant_ref *kmap_ops, | 53 | struct gnttab_map_grant_ref *kmap_ops, |