aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-02-13 22:26:32 -0500
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-03-10 12:44:44 -0500
commit73c154c60be106b47f15d1111fc2d75cc7a436f2 (patch)
treeaf32a6e0fd49bfafa1499cd1a8cdf15caae27054
parentcc7335b2f6acc0f24c7fac80ce536301f7d52214 (diff)
xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it.
For the hypervisor to take advantage of the MWAIT support it needs to extract from the ACPI _CST the register address. But the hypervisor does not have the support to parse DSDT so it relies on the initial domain (dom0) to parse the ACPI Power Management information and push it up to the hypervisor. The pushing of the data is done by the processor_harveset_xen module which parses the information that the ACPI parser has graciously exposed in 'struct acpi_processor'. For the ACPI parser to also expose the Cx states for MWAIT, we need to expose the MWAIT capability (leaf 1). Furthermore we also need to expose the MWAIT_LEAF capability (leaf 5) for cstate.c to properly function. The hypervisor could expose these flags when it traps the XEN_EMULATE_PREFIX operations, but it can't do it since it needs to be backwards compatible. Instead we choose to use the native CPUID to figure out if the MWAIT capability exists and use the XEN_SET_PDC query hypercall to figure out if the hypervisor wants us to expose the MWAIT_LEAF capability or not. Note: The XEN_SET_PDC query was implemented in c/s 23783: "ACPI: add _PDC input override mechanism". With this in place, instead of C3 ACPI IOPORT 415 we get now C3:ACPI FFH INTEL MWAIT 0x20 Note: The cpu_idle which would be calling the mwait variants for idling never gets set b/c we set the default pm_idle to be the hypercall variant. Acked-by: Jan Beulich <JBeulich@suse.com> [v2: Fix missing header file include and #ifdef] Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-rw-r--r--arch/ia64/include/asm/xen/interface.h1
-rw-r--r--arch/x86/include/asm/xen/interface.h1
-rw-r--r--arch/x86/xen/enlighten.c93
-rw-r--r--include/xen/interface/platform.h3
4 files changed, 96 insertions, 2 deletions
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index fbb519828aa1..09d5f7fd9db1 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -77,6 +77,7 @@ DEFINE_GUEST_HANDLE(int);
77DEFINE_GUEST_HANDLE(long); 77DEFINE_GUEST_HANDLE(long);
78DEFINE_GUEST_HANDLE(void); 78DEFINE_GUEST_HANDLE(void);
79DEFINE_GUEST_HANDLE(uint64_t); 79DEFINE_GUEST_HANDLE(uint64_t);
80DEFINE_GUEST_HANDLE(uint32_t);
80 81
81typedef unsigned long xen_pfn_t; 82typedef unsigned long xen_pfn_t;
82DEFINE_GUEST_HANDLE(xen_pfn_t); 83DEFINE_GUEST_HANDLE(xen_pfn_t);
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index a1f2db5f1170..cbf0c9d50b92 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -56,6 +56,7 @@ DEFINE_GUEST_HANDLE(int);
56DEFINE_GUEST_HANDLE(long); 56DEFINE_GUEST_HANDLE(long);
57DEFINE_GUEST_HANDLE(void); 57DEFINE_GUEST_HANDLE(void);
58DEFINE_GUEST_HANDLE(uint64_t); 58DEFINE_GUEST_HANDLE(uint64_t);
59DEFINE_GUEST_HANDLE(uint32_t);
59#endif 60#endif
60 61
61#ifndef HYPERVISOR_VIRT_START 62#ifndef HYPERVISOR_VIRT_START
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 312c9e3cb635..fe06bf4ef0e3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -62,6 +62,15 @@
62#include <asm/reboot.h> 62#include <asm/reboot.h>
63#include <asm/stackprotector.h> 63#include <asm/stackprotector.h>
64#include <asm/hypervisor.h> 64#include <asm/hypervisor.h>
65#include <asm/mwait.h>
66
67#ifdef CONFIG_ACPI
68#include <linux/acpi.h>
69#include <asm/acpi.h>
70#include <acpi/pdc_intel.h>
71#include <acpi/processor.h>
72#include <xen/interface/platform.h>
73#endif
65 74
66#include "xen-ops.h" 75#include "xen-ops.h"
67#include "mmu.h" 76#include "mmu.h"
@@ -200,13 +209,17 @@ static void __init xen_banner(void)
200static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; 209static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
201static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; 210static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
202 211
212static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
213static __read_mostly unsigned int cpuid_leaf5_ecx_val;
214static __read_mostly unsigned int cpuid_leaf5_edx_val;
215
203static void xen_cpuid(unsigned int *ax, unsigned int *bx, 216static void xen_cpuid(unsigned int *ax, unsigned int *bx,
204 unsigned int *cx, unsigned int *dx) 217 unsigned int *cx, unsigned int *dx)
205{ 218{
206 unsigned maskebx = ~0; 219 unsigned maskebx = ~0;
207 unsigned maskecx = ~0; 220 unsigned maskecx = ~0;
208 unsigned maskedx = ~0; 221 unsigned maskedx = ~0;
209 222 unsigned setecx = 0;
210 /* 223 /*
211 * Mask out inconvenient features, to try and disable as many 224 * Mask out inconvenient features, to try and disable as many
212 * unsupported kernel subsystems as possible. 225 * unsupported kernel subsystems as possible.
@@ -214,9 +227,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
214 switch (*ax) { 227 switch (*ax) {
215 case 1: 228 case 1:
216 maskecx = cpuid_leaf1_ecx_mask; 229 maskecx = cpuid_leaf1_ecx_mask;
230 setecx = cpuid_leaf1_ecx_set_mask;
217 maskedx = cpuid_leaf1_edx_mask; 231 maskedx = cpuid_leaf1_edx_mask;
218 break; 232 break;
219 233
234 case CPUID_MWAIT_LEAF:
235 /* Synthesize the values.. */
236 *ax = 0;
237 *bx = 0;
238 *cx = cpuid_leaf5_ecx_val;
239 *dx = cpuid_leaf5_edx_val;
240 return;
241
220 case 0xb: 242 case 0xb:
221 /* Suppress extended topology stuff */ 243 /* Suppress extended topology stuff */
222 maskebx = 0; 244 maskebx = 0;
@@ -232,9 +254,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
232 254
233 *bx &= maskebx; 255 *bx &= maskebx;
234 *cx &= maskecx; 256 *cx &= maskecx;
257 *cx |= setecx;
235 *dx &= maskedx; 258 *dx &= maskedx;
259
236} 260}
237 261
262static bool __init xen_check_mwait(void)
263{
264#ifdef CONFIG_ACPI
265 struct xen_platform_op op = {
266 .cmd = XENPF_set_processor_pminfo,
267 .u.set_pminfo.id = -1,
268 .u.set_pminfo.type = XEN_PM_PDC,
269 };
270 uint32_t buf[3];
271 unsigned int ax, bx, cx, dx;
272 unsigned int mwait_mask;
273
274 /* We need to determine whether it is OK to expose the MWAIT
275 * capability to the kernel to harvest deeper than C3 states from ACPI
276 * _CST using the processor_harvest_xen.c module. For this to work, we
277 * need to gather the MWAIT_LEAF values (which the cstate.c code
278 * checks against). The hypervisor won't expose the MWAIT flag because
279 * it would break backwards compatibility; so we will find out directly
280 * from the hardware and hypercall.
281 */
282 if (!xen_initial_domain())
283 return false;
284
285 ax = 1;
286 cx = 0;
287
288 native_cpuid(&ax, &bx, &cx, &dx);
289
290 mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
291 (1 << (X86_FEATURE_MWAIT % 32));
292
293 if ((cx & mwait_mask) != mwait_mask)
294 return false;
295
296 /* We need to emulate the MWAIT_LEAF and for that we need both
297 * ecx and edx. The hypercall provides only partial information.
298 */
299
300 ax = CPUID_MWAIT_LEAF;
301 bx = 0;
302 cx = 0;
303 dx = 0;
304
305 native_cpuid(&ax, &bx, &cx, &dx);
306
307 /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
308 * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
309 */
310 buf[0] = ACPI_PDC_REVISION_ID;
311 buf[1] = 1;
312 buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
313
314 set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
315
316 if ((HYPERVISOR_dom0_op(&op) == 0) &&
317 (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
318 cpuid_leaf5_ecx_val = cx;
319 cpuid_leaf5_edx_val = dx;
320 }
321 return true;
322#else
323 return false;
324#endif
325}
238static void __init xen_init_cpuid_mask(void) 326static void __init xen_init_cpuid_mask(void)
239{ 327{
240 unsigned int ax, bx, cx, dx; 328 unsigned int ax, bx, cx, dx;
@@ -261,6 +349,9 @@ static void __init xen_init_cpuid_mask(void)
261 /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ 349 /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
262 if ((cx & xsave_mask) != xsave_mask) 350 if ((cx & xsave_mask) != xsave_mask)
263 cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ 351 cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
352
353 if (xen_check_mwait())
354 cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
264} 355}
265 356
266static void xen_set_debugreg(int reg, unsigned long val) 357static void xen_set_debugreg(int reg, unsigned long val)
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index c1684680431b..861b359b44aa 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
200#define XEN_PM_CX 0 200#define XEN_PM_CX 0
201#define XEN_PM_PX 1 201#define XEN_PM_PX 1
202#define XEN_PM_TX 2 202#define XEN_PM_TX 2
203 203#define XEN_PM_PDC 3
204/* Px sub info type */ 204/* Px sub info type */
205#define XEN_PX_PCT 1 205#define XEN_PX_PCT 1
206#define XEN_PX_PSS 2 206#define XEN_PX_PSS 2
@@ -293,6 +293,7 @@ struct xenpf_set_processor_pminfo {
293 union { 293 union {
294 struct xen_processor_power power;/* Cx: _CST/_CSD */ 294 struct xen_processor_power power;/* Cx: _CST/_CSD */
295 struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ 295 struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
296 GUEST_HANDLE(uint32_t) pdc;
296 }; 297 };
297}; 298};
298DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo); 299DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);