aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Jung Bauermann <bauerman@linux.vnet.ibm.com>2017-06-29 17:55:38 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2017-07-02 06:40:33 -0400
commitbfaa7834b60e01135af4e8e06a9477bef2368f44 (patch)
treea3f1101b2637ef2773289a59a9b27369d31331c1
parent2e6553aae3e6bd13cf176855d67233dce8817381 (diff)
powerpc/perf/hv-24x7: Aggregate result elements on POWER9 SMT8
On POWER9 SMT8 the 24x7 API returns two result elements for physical core and virtual CPU events and we need to add their counts to get the final result. Reviewed-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/perf/hv-24x7.c53
1 files changed, 42 insertions, 11 deletions
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index a9a4df6e6e22..9c88b82f6229 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -31,6 +31,9 @@
31/* Version of the 24x7 hypervisor API that we should use in this machine. */ 31/* Version of the 24x7 hypervisor API that we should use in this machine. */
32static int interface_version; 32static int interface_version;
33 33
34/* Whether we have to aggregate result data for some domains. */
35static bool aggregate_result_elements;
36
34static bool domain_is_valid(unsigned domain) 37static bool domain_is_valid(unsigned domain)
35{ 38{
36 switch (domain) { 39 switch (domain) {
@@ -58,6 +61,15 @@ static bool is_physical_domain(unsigned domain)
58 } 61 }
59} 62}
60 63
64/* Domains for which more than one result element are returned for each event. */
65static bool domain_needs_aggregation(unsigned int domain)
66{
67 return aggregate_result_elements &&
68 (domain == HV_PERF_DOMAIN_PHYS_CORE ||
69 (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
70 domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
71}
72
61static const char *domain_name(unsigned domain) 73static const char *domain_name(unsigned domain)
62{ 74{
63 if (!domain_is_valid(domain)) 75 if (!domain_is_valid(domain))
@@ -1145,17 +1157,23 @@ static int add_event_to_24x7_request(struct perf_event *event,
1145 req->starting_ix = cpu_to_be16(idx); 1157 req->starting_ix = cpu_to_be16(idx);
1146 req->max_ix = cpu_to_be16(1); 1158 req->max_ix = cpu_to_be16(1);
1147 1159
1148 if (request_buffer->interface_version > 1 && 1160 if (request_buffer->interface_version > 1) {
1149 req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) { 1161 if (domain_needs_aggregation(req->performance_domain))
1150 req->starting_thread_group_ix = idx % 2; 1162 req->max_num_thread_groups = -1;
1151 req->max_num_thread_groups = 1; 1163 else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
1164 req->starting_thread_group_ix = idx % 2;
1165 req->max_num_thread_groups = 1;
1166 }
1152 } 1167 }
1153 1168
1154 return 0; 1169 return 0;
1155} 1170}
1156 1171
1157/** 1172/**
1158 * get_count_from_result - get event count from the given result 1173 * get_count_from_result - get event count from all result elements in result
1174 *
1175 * If the event corresponding to this result needs aggregation of the result
1176 * element values, then this function does that.
1159 * 1177 *
1160 * @event: Event associated with @res. 1178 * @event: Event associated with @res.
1161 * @resb: Result buffer containing @res. 1179 * @resb: Result buffer containing @res.
@@ -1172,6 +1190,8 @@ static int get_count_from_result(struct perf_event *event,
1172 u16 data_size = be16_to_cpu(res->result_element_data_size); 1190 u16 data_size = be16_to_cpu(res->result_element_data_size);
1173 unsigned int data_offset; 1191 unsigned int data_offset;
1174 void *element_data; 1192 void *element_data;
1193 int i;
1194 u64 count;
1175 1195
1176 /* 1196 /*
1177 * We can bail out early if the result is empty. 1197 * We can bail out early if the result is empty.
@@ -1189,8 +1209,10 @@ static int get_count_from_result(struct perf_event *event,
1189 /* 1209 /*
1190 * Since we always specify 1 as the maximum for the smallest resource 1210 * Since we always specify 1 as the maximum for the smallest resource
1191 * we're requesting, there should to be only one element per result. 1211 * we're requesting, there should to be only one element per result.
1212 * Except when an event needs aggregation, in which case there are more.
1192 */ 1213 */
1193 if (num_elements != 1) { 1214 if (num_elements != 1 &&
1215 !domain_needs_aggregation(event_get_domain(event))) {
1194 pr_err("Error: result of request %hhu has %hu elements\n", 1216 pr_err("Error: result of request %hhu has %hu elements\n",
1195 res->result_ix, num_elements); 1217 res->result_ix, num_elements);
1196 1218
@@ -1211,13 +1233,17 @@ static int get_count_from_result(struct perf_event *event,
1211 data_offset = offsetof(struct hv_24x7_result_element_v2, 1233 data_offset = offsetof(struct hv_24x7_result_element_v2,
1212 element_data); 1234 element_data);
1213 1235
1214 element_data = res->elements + data_offset; 1236 /* Go through the result elements in the result. */
1237 for (i = count = 0, element_data = res->elements + data_offset;
1238 i < num_elements;
1239 i++, element_data += data_size + data_offset)
1240 count += be64_to_cpu(*((u64 *) element_data));
1215 1241
1216 *countp = be64_to_cpu(*((u64 *) element_data)); 1242 *countp = count;
1217 1243
1218 /* The next result is after the result element. */ 1244 /* The next result is after the last result element. */
1219 if (next) 1245 if (next)
1220 *next = element_data + data_size; 1246 *next = element_data - data_offset;
1221 1247
1222 return 0; 1248 return 0;
1223} 1249}
@@ -1568,9 +1594,14 @@ static int hv_24x7_init(void)
1568 /* POWER8 only supports v1, while POWER9 only supports v2. */ 1594 /* POWER8 only supports v1, while POWER9 only supports v2. */
1569 if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) 1595 if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
1570 interface_version = 1; 1596 interface_version = 1;
1571 else 1597 else {
1572 interface_version = 2; 1598 interface_version = 2;
1573 1599
1600 /* SMT8 in POWER9 needs to aggregate result elements. */
1601 if (threads_per_core == 8)
1602 aggregate_result_elements = true;
1603 }
1604
1574 hret = hv_perf_caps_get(&caps); 1605 hret = hv_perf_caps_get(&caps);
1575 if (hret) { 1606 if (hret) {
1576 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", 1607 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",