diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_cqm.c | 50 |
1 files changed, 27 insertions, 23 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 8241b64d34c4..8233b29bdd35 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c | |||
@@ -16,18 +16,32 @@ | |||
16 | static unsigned int cqm_max_rmid = -1; | 16 | static unsigned int cqm_max_rmid = -1; |
17 | static unsigned int cqm_l3_scale; /* supposedly cacheline size */ | 17 | static unsigned int cqm_l3_scale; /* supposedly cacheline size */ |
18 | 18 | ||
19 | struct intel_cqm_state { | 19 | /** |
20 | * struct intel_pqr_state - State cache for the PQR MSR | ||
21 | * @rmid: The cached Resource Monitoring ID | ||
22 | * @closid: The cached Class Of Service ID | ||
23 | * @rmid_usecnt: The usage counter for rmid | ||
24 | * | ||
25 | * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the | ||
26 | * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always | ||
27 | * contains both parts, so we need to cache them. | ||
28 | * | ||
29 | * The cache also helps to avoid pointless updates if the value does | ||
30 | * not change. | ||
31 | */ | ||
32 | struct intel_pqr_state { | ||
20 | u32 rmid; | 33 | u32 rmid; |
21 | int cnt; | 34 | u32 closid; |
35 | int rmid_usecnt; | ||
22 | }; | 36 | }; |
23 | 37 | ||
24 | /* | 38 | /* |
25 | * The cached intel_cqm_state is strictly per CPU and can never be | 39 | * The cached intel_pqr_state is strictly per CPU and can never be |
26 | * updated from a remote CPU. Both functions which modify the state | 40 | * updated from a remote CPU. Both functions which modify the state |
27 | * (intel_cqm_event_start and intel_cqm_event_stop) are called with | 41 | * (intel_cqm_event_start and intel_cqm_event_stop) are called with |
28 | * interrupts disabled, which is sufficient for the protection. | 42 | * interrupts disabled, which is sufficient for the protection. |
29 | */ | 43 | */ |
30 | static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); | 44 | static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); |
31 | 45 | ||
32 | /* | 46 | /* |
33 | * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. | 47 | * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. |
@@ -966,7 +980,7 @@ out: | |||
966 | 980 | ||
967 | static void intel_cqm_event_start(struct perf_event *event, int mode) | 981 | static void intel_cqm_event_start(struct perf_event *event, int mode) |
968 | { | 982 | { |
969 | struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); | 983 | struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); |
970 | u32 rmid = event->hw.cqm_rmid; | 984 | u32 rmid = event->hw.cqm_rmid; |
971 | 985 | ||
972 | if (!(event->hw.cqm_state & PERF_HES_STOPPED)) | 986 | if (!(event->hw.cqm_state & PERF_HES_STOPPED)) |
@@ -974,7 +988,7 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) | |||
974 | 988 | ||
975 | event->hw.cqm_state &= ~PERF_HES_STOPPED; | 989 | event->hw.cqm_state &= ~PERF_HES_STOPPED; |
976 | 990 | ||
977 | if (state->cnt++) { | 991 | if (state->rmid_usecnt++) { |
978 | if (!WARN_ON_ONCE(state->rmid != rmid)) | 992 | if (!WARN_ON_ONCE(state->rmid != rmid)) |
979 | return; | 993 | return; |
980 | } else { | 994 | } else { |
@@ -982,17 +996,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) | |||
982 | } | 996 | } |
983 | 997 | ||
984 | state->rmid = rmid; | 998 | state->rmid = rmid; |
985 | /* | 999 | wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid); |
986 | * This is actually wrong, as the upper 32 bit MSR contain the | ||
987 | * closid which is used for configuring the Cache Allocation | ||
988 | * Technology component. | ||
989 | */ | ||
990 | wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0); | ||
991 | } | 1000 | } |
992 | 1001 | ||
993 | static void intel_cqm_event_stop(struct perf_event *event, int mode) | 1002 | static void intel_cqm_event_stop(struct perf_event *event, int mode) |
994 | { | 1003 | { |
995 | struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); | 1004 | struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); |
996 | 1005 | ||
997 | if (event->hw.cqm_state & PERF_HES_STOPPED) | 1006 | if (event->hw.cqm_state & PERF_HES_STOPPED) |
998 | return; | 1007 | return; |
@@ -1001,15 +1010,9 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode) | |||
1001 | 1010 | ||
1002 | intel_cqm_event_read(event); | 1011 | intel_cqm_event_read(event); |
1003 | 1012 | ||
1004 | if (!--state->cnt) { | 1013 | if (!--state->rmid_usecnt) { |
1005 | state->rmid = 0; | 1014 | state->rmid = 0; |
1006 | /* | 1015 | wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid); |
1007 | * This is actually wrong, as the upper 32 bit of the | ||
1008 | * MSR contain the closid which is used for | ||
1009 | * configuring the Cache Allocation Technology | ||
1010 | * component. | ||
1011 | */ | ||
1012 | wrmsr(MSR_IA32_PQR_ASSOC, 0, 0); | ||
1013 | } else { | 1016 | } else { |
1014 | WARN_ON_ONCE(!state->rmid); | 1017 | WARN_ON_ONCE(!state->rmid); |
1015 | } | 1018 | } |
@@ -1247,11 +1250,12 @@ static inline void cqm_pick_event_reader(int cpu) | |||
1247 | 1250 | ||
1248 | static void intel_cqm_cpu_prepare(unsigned int cpu) | 1251 | static void intel_cqm_cpu_prepare(unsigned int cpu) |
1249 | { | 1252 | { |
1250 | struct intel_cqm_state *state = &per_cpu(cqm_state, cpu); | 1253 | struct intel_pqr_state *state = &per_cpu(pqr_state, cpu); |
1251 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 1254 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
1252 | 1255 | ||
1253 | state->rmid = 0; | 1256 | state->rmid = 0; |
1254 | state->cnt = 0; | 1257 | state->closid = 0; |
1258 | state->rmid_usecnt = 0; | ||
1255 | 1259 | ||
1256 | WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); | 1260 | WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); |
1257 | WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); | 1261 | WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); |