diff options
author | Tony Luck <tony.luck@intel.com> | 2016-03-11 14:26:11 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-03-21 04:08:20 -0400 |
commit | 87f01cc2a2914b61ade5ec834377fa7819484173 (patch) | |
tree | 499025ae8140354fcfbd928088e30cfc5c60170a | |
parent | 33c3cc7acfd95968d74247f1a4e1b0727a07ed43 (diff) |
perf/x86/mbm: Add memory bandwidth monitoring event management
Includes all the core infrastructure to measure the total_bytes and
bandwidth.
We have per socket counters for both total system wide L3 external
bytes and local socket memory-controller bytes. The OS does MSR writes
to MSR_IA32_QM_EVTSEL and MSR_IA32_QM_CTR to read the counters and
uses the IA32_PQR_ASSOC_MSR to associate the RMID with the task. The
tasks have a common RMID for CQM (cache quality of service monitoring)
and MBM. Hence most of the scheduling code is reused from CQM.
Signed-off-by: Tony Luck <tony.luck@intel.com>
[ Restructured rmid_read to not have an obvious hole, removed MBM_CNTR_MAX as its unused. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: fenghua.yu@intel.com
Cc: h.peter.anvin@intel.com
Cc: ravi.v.shankar@intel.com
Cc: vikas.shivappa@intel.com
Link: http://lkml.kernel.org/r/abd7aac9a18d93b95b985b931cf258df0164746d.1457723885.git.tony.luck@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/events/intel/cqm.c | 122 |
1 files changed, 116 insertions, 6 deletions
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 515df11e65bb..610bd8ab37e4 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #define MSR_IA32_QM_CTR 0x0c8e | 13 | #define MSR_IA32_QM_CTR 0x0c8e |
14 | #define MSR_IA32_QM_EVTSEL 0x0c8d | 14 | #define MSR_IA32_QM_EVTSEL 0x0c8d |
15 | 15 | ||
16 | #define MBM_CNTR_WIDTH 24 | ||
17 | |||
16 | static u32 cqm_max_rmid = -1; | 18 | static u32 cqm_max_rmid = -1; |
17 | static unsigned int cqm_l3_scale; /* supposedly cacheline size */ | 19 | static unsigned int cqm_l3_scale; /* supposedly cacheline size */ |
18 | static bool cqm_enabled, mbm_enabled; | 20 | static bool cqm_enabled, mbm_enabled; |
@@ -62,6 +64,16 @@ static struct sample *mbm_total; | |||
62 | */ | 64 | */ |
63 | static struct sample *mbm_local; | 65 | static struct sample *mbm_local; |
64 | 66 | ||
67 | #define pkg_id topology_physical_package_id(smp_processor_id()) | ||
68 | /* | ||
69 | * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array. | ||
70 | * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of | ||
71 | * rmids per socket, an example is given below | ||
72 | * RMID1 of Socket0: vrmid = 1 | ||
73 | * RMID1 of Socket1: vrmid = 1 * (cqm_max_rmid + 1) + 1 | ||
74 | * RMID1 of Socket2: vrmid = 2 * (cqm_max_rmid + 1) + 1 | ||
75 | */ | ||
76 | #define rmid_2_index(rmid) ((pkg_id * (cqm_max_rmid + 1)) + rmid) | ||
65 | /* | 77 | /* |
66 | * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. | 78 | * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. |
67 | * Also protects event->hw.cqm_rmid | 79 | * Also protects event->hw.cqm_rmid |
@@ -84,9 +96,13 @@ static cpumask_t cqm_cpumask; | |||
84 | #define RMID_VAL_ERROR (1ULL << 63) | 96 | #define RMID_VAL_ERROR (1ULL << 63) |
85 | #define RMID_VAL_UNAVAIL (1ULL << 62) | 97 | #define RMID_VAL_UNAVAIL (1ULL << 62) |
86 | 98 | ||
87 | #define QOS_L3_OCCUP_EVENT_ID (1 << 0) | 99 | /* |
88 | 100 | * Event IDs are used to program IA32_QM_EVTSEL before reading event | |
89 | #define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID | 101 | * counter from IA32_QM_CTR |
102 | */ | ||
103 | #define QOS_L3_OCCUP_EVENT_ID 0x01 | ||
104 | #define QOS_MBM_TOTAL_EVENT_ID 0x02 | ||
105 | #define QOS_MBM_LOCAL_EVENT_ID 0x03 | ||
90 | 106 | ||
91 | /* | 107 | /* |
92 | * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). | 108 | * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). |
@@ -428,10 +444,17 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b) | |||
428 | 444 | ||
429 | struct rmid_read { | 445 | struct rmid_read { |
430 | u32 rmid; | 446 | u32 rmid; |
447 | u32 evt_type; | ||
431 | atomic64_t value; | 448 | atomic64_t value; |
432 | }; | 449 | }; |
433 | 450 | ||
434 | static void __intel_cqm_event_count(void *info); | 451 | static void __intel_cqm_event_count(void *info); |
452 | static void init_mbm_sample(u32 rmid, u32 evt_type); | ||
453 | |||
454 | static bool is_mbm_event(int e) | ||
455 | { | ||
456 | return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID); | ||
457 | } | ||
435 | 458 | ||
436 | /* | 459 | /* |
437 | * Exchange the RMID of a group of events. | 460 | * Exchange the RMID of a group of events. |
@@ -873,6 +896,68 @@ static void intel_cqm_rmid_rotate(struct work_struct *work) | |||
873 | schedule_delayed_work(&intel_cqm_rmid_work, delay); | 896 | schedule_delayed_work(&intel_cqm_rmid_work, delay); |
874 | } | 897 | } |
875 | 898 | ||
899 | static u64 update_sample(unsigned int rmid, u32 evt_type, int first) | ||
900 | { | ||
901 | struct sample *mbm_current; | ||
902 | u32 vrmid = rmid_2_index(rmid); | ||
903 | u64 val, bytes, shift; | ||
904 | u32 eventid; | ||
905 | |||
906 | if (evt_type == QOS_MBM_LOCAL_EVENT_ID) { | ||
907 | mbm_current = &mbm_local[vrmid]; | ||
908 | eventid = QOS_MBM_LOCAL_EVENT_ID; | ||
909 | } else { | ||
910 | mbm_current = &mbm_total[vrmid]; | ||
911 | eventid = QOS_MBM_TOTAL_EVENT_ID; | ||
912 | } | ||
913 | |||
914 | wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); | ||
915 | rdmsrl(MSR_IA32_QM_CTR, val); | ||
916 | if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) | ||
917 | return mbm_current->total_bytes; | ||
918 | |||
919 | if (first) { | ||
920 | mbm_current->prev_msr = val; | ||
921 | mbm_current->total_bytes = 0; | ||
922 | return mbm_current->total_bytes; | ||
923 | } | ||
924 | |||
925 | shift = 64 - MBM_CNTR_WIDTH; | ||
926 | bytes = (val << shift) - (mbm_current->prev_msr << shift); | ||
927 | bytes >>= shift; | ||
928 | |||
929 | bytes *= cqm_l3_scale; | ||
930 | |||
931 | mbm_current->total_bytes += bytes; | ||
932 | mbm_current->prev_msr = val; | ||
933 | |||
934 | return mbm_current->total_bytes; | ||
935 | } | ||
936 | |||
937 | static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type) | ||
938 | { | ||
939 | return update_sample(rmid, evt_type, 0); | ||
940 | } | ||
941 | |||
942 | static void __intel_mbm_event_init(void *info) | ||
943 | { | ||
944 | struct rmid_read *rr = info; | ||
945 | |||
946 | update_sample(rr->rmid, rr->evt_type, 1); | ||
947 | } | ||
948 | |||
949 | static void init_mbm_sample(u32 rmid, u32 evt_type) | ||
950 | { | ||
951 | struct rmid_read rr = { | ||
952 | .rmid = rmid, | ||
953 | .evt_type = evt_type, | ||
954 | .value = ATOMIC64_INIT(0), | ||
955 | }; | ||
956 | |||
957 | /* on each socket, init sample */ | ||
958 | on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1); | ||
959 | } | ||
960 | |||
876 | /* | 961 | /* |
877 | * Find a group and setup RMID. | 962 | * Find a group and setup RMID. |
878 | * | 963 | * |
@@ -893,6 +978,8 @@ static void intel_cqm_setup_event(struct perf_event *event, | |||
893 | /* All tasks in a group share an RMID */ | 978 | /* All tasks in a group share an RMID */ |
894 | event->hw.cqm_rmid = rmid; | 979 | event->hw.cqm_rmid = rmid; |
895 | *group = iter; | 980 | *group = iter; |
981 | if (is_mbm_event(event->attr.config)) | ||
982 | init_mbm_sample(rmid, event->attr.config); | ||
896 | return; | 983 | return; |
897 | } | 984 | } |
898 | 985 | ||
@@ -909,6 +996,9 @@ static void intel_cqm_setup_event(struct perf_event *event, | |||
909 | else | 996 | else |
910 | rmid = __get_rmid(); | 997 | rmid = __get_rmid(); |
911 | 998 | ||
999 | if (is_mbm_event(event->attr.config)) | ||
1000 | init_mbm_sample(rmid, event->attr.config); | ||
1001 | |||
912 | event->hw.cqm_rmid = rmid; | 1002 | event->hw.cqm_rmid = rmid; |
913 | } | 1003 | } |
914 | 1004 | ||
@@ -930,7 +1020,10 @@ static void intel_cqm_event_read(struct perf_event *event) | |||
930 | if (!__rmid_valid(rmid)) | 1020 | if (!__rmid_valid(rmid)) |
931 | goto out; | 1021 | goto out; |
932 | 1022 | ||
933 | val = __rmid_read(rmid); | 1023 | if (is_mbm_event(event->attr.config)) |
1024 | val = rmid_read_mbm(rmid, event->attr.config); | ||
1025 | else | ||
1026 | val = __rmid_read(rmid); | ||
934 | 1027 | ||
935 | /* | 1028 | /* |
936 | * Ignore this reading on error states and do not update the value. | 1029 | * Ignore this reading on error states and do not update the value. |
@@ -961,6 +1054,17 @@ static inline bool cqm_group_leader(struct perf_event *event) | |||
961 | return !list_empty(&event->hw.cqm_groups_entry); | 1054 | return !list_empty(&event->hw.cqm_groups_entry); |
962 | } | 1055 | } |
963 | 1056 | ||
1057 | static void __intel_mbm_event_count(void *info) | ||
1058 | { | ||
1059 | struct rmid_read *rr = info; | ||
1060 | u64 val; | ||
1061 | |||
1062 | val = rmid_read_mbm(rr->rmid, rr->evt_type); | ||
1063 | if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) | ||
1064 | return; | ||
1065 | atomic64_add(val, &rr->value); | ||
1066 | } | ||
1067 | |||
964 | static u64 intel_cqm_event_count(struct perf_event *event) | 1068 | static u64 intel_cqm_event_count(struct perf_event *event) |
965 | { | 1069 | { |
966 | unsigned long flags; | 1070 | unsigned long flags; |
@@ -1014,7 +1118,12 @@ static u64 intel_cqm_event_count(struct perf_event *event) | |||
1014 | if (!__rmid_valid(rr.rmid)) | 1118 | if (!__rmid_valid(rr.rmid)) |
1015 | goto out; | 1119 | goto out; |
1016 | 1120 | ||
1017 | on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); | 1121 | if (is_mbm_event(event->attr.config)) { |
1122 | rr.evt_type = event->attr.config; | ||
1123 | on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, &rr, 1); | ||
1124 | } else { | ||
1125 | on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); | ||
1126 | } | ||
1018 | 1127 | ||
1019 | raw_spin_lock_irqsave(&cache_lock, flags); | 1128 | raw_spin_lock_irqsave(&cache_lock, flags); |
1020 | if (event->hw.cqm_rmid == rr.rmid) | 1129 | if (event->hw.cqm_rmid == rr.rmid) |
@@ -1129,7 +1238,8 @@ static int intel_cqm_event_init(struct perf_event *event) | |||
1129 | if (event->attr.type != intel_cqm_pmu.type) | 1238 | if (event->attr.type != intel_cqm_pmu.type) |
1130 | return -ENOENT; | 1239 | return -ENOENT; |
1131 | 1240 | ||
1132 | if (event->attr.config & ~QOS_EVENT_MASK) | 1241 | if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) || |
1242 | (event->attr.config > QOS_MBM_LOCAL_EVENT_ID)) | ||
1133 | return -EINVAL; | 1243 | return -EINVAL; |
1134 | 1244 | ||
1135 | /* unsupported modes and filters */ | 1245 | /* unsupported modes and filters */ |