diff options
| author | Satheesh Rajendran <sathnaga@linux.vnet.ibm.com> | 2017-11-22 11:43:53 -0500 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-11-28 12:28:10 -0500 |
| commit | 321a7c35c90cc834851ceda18a8ee18f1d032b92 (patch) | |
| tree | 2f795f391026d285d2da784827489b2189367656 | |
| parent | bdaab8c4b3db820b0946ed4516bc5ec811e0cc82 (diff) | |
perf bench numa: Fixup discontiguous/sparse numa nodes
Certain systems are designed to have sparse/discontiguous nodes. On
such systems, 'perf bench numa' hangs, shows wrong number of nodes and
shows values for non-existent nodes. Handle this by only taking nodes
that are exposed by kernel to userspace.
Signed-off-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1edbcd353c009e109e93d78f2f46381930c340fe.1511368645.git.sathnaga@linux.vnet.ibm.com
Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
| -rw-r--r-- | tools/perf/bench/numa.c | 56 |
1 files changed, 51 insertions, 5 deletions
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index d95fdcc26f4b..944070e98a2c 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c | |||
| @@ -216,6 +216,47 @@ static const char * const numa_usage[] = { | |||
| 216 | NULL | 216 | NULL |
| 217 | }; | 217 | }; |
| 218 | 218 | ||
| 219 | /* | ||
| 220 | * To get number of numa nodes present. | ||
| 221 | */ | ||
| 222 | static int nr_numa_nodes(void) | ||
| 223 | { | ||
| 224 | int i, nr_nodes = 0; | ||
| 225 | |||
| 226 | for (i = 0; i < g->p.nr_nodes; i++) { | ||
| 227 | if (numa_bitmask_isbitset(numa_nodes_ptr, i)) | ||
| 228 | nr_nodes++; | ||
| 229 | } | ||
| 230 | |||
| 231 | return nr_nodes; | ||
| 232 | } | ||
| 233 | |||
| 234 | /* | ||
| 235 | * To check if given numa node is present. | ||
| 236 | */ | ||
| 237 | static int is_node_present(int node) | ||
| 238 | { | ||
| 239 | return numa_bitmask_isbitset(numa_nodes_ptr, node); | ||
| 240 | } | ||
| 241 | |||
| 242 | /* | ||
| 243 | * To check given numa node has cpus. | ||
| 244 | */ | ||
| 245 | static bool node_has_cpus(int node) | ||
| 246 | { | ||
| 247 | struct bitmask *cpu = numa_allocate_cpumask(); | ||
| 248 | unsigned int i; | ||
| 249 | |||
| 250 | if (cpu && !numa_node_to_cpus(node, cpu)) { | ||
| 251 | for (i = 0; i < cpu->size; i++) { | ||
| 252 | if (numa_bitmask_isbitset(cpu, i)) | ||
| 253 | return true; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 257 | return false; /* lets fall back to nocpus safely */ | ||
| 258 | } | ||
| 259 | |||
| 219 | static cpu_set_t bind_to_cpu(int target_cpu) | 260 | static cpu_set_t bind_to_cpu(int target_cpu) |
| 220 | { | 261 | { |
| 221 | cpu_set_t orig_mask, mask; | 262 | cpu_set_t orig_mask, mask; |
| @@ -244,12 +285,12 @@ static cpu_set_t bind_to_cpu(int target_cpu) | |||
| 244 | 285 | ||
| 245 | static cpu_set_t bind_to_node(int target_node) | 286 | static cpu_set_t bind_to_node(int target_node) |
| 246 | { | 287 | { |
| 247 | int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes; | 288 | int cpus_per_node = g->p.nr_cpus / nr_numa_nodes(); |
| 248 | cpu_set_t orig_mask, mask; | 289 | cpu_set_t orig_mask, mask; |
| 249 | int cpu; | 290 | int cpu; |
| 250 | int ret; | 291 | int ret; |
| 251 | 292 | ||
| 252 | BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus); | 293 | BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus); |
| 253 | BUG_ON(!cpus_per_node); | 294 | BUG_ON(!cpus_per_node); |
| 254 | 295 | ||
| 255 | ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); | 296 | ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); |
| @@ -649,7 +690,7 @@ static int parse_setup_node_list(void) | |||
| 649 | int i; | 690 | int i; |
| 650 | 691 | ||
| 651 | for (i = 0; i < mul; i++) { | 692 | for (i = 0; i < mul; i++) { |
| 652 | if (t >= g->p.nr_tasks) { | 693 | if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) { |
| 653 | printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node); | 694 | printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node); |
| 654 | goto out; | 695 | goto out; |
| 655 | } | 696 | } |
| @@ -964,6 +1005,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence) | |||
| 964 | sum = 0; | 1005 | sum = 0; |
| 965 | 1006 | ||
| 966 | for (node = 0; node < g->p.nr_nodes; node++) { | 1007 | for (node = 0; node < g->p.nr_nodes; node++) { |
| 1008 | if (!is_node_present(node)) | ||
| 1009 | continue; | ||
| 967 | nr = nodes[node]; | 1010 | nr = nodes[node]; |
| 968 | nr_min = min(nr, nr_min); | 1011 | nr_min = min(nr, nr_min); |
| 969 | nr_max = max(nr, nr_max); | 1012 | nr_max = max(nr, nr_max); |
| @@ -984,8 +1027,11 @@ static void calc_convergence(double runtime_ns_max, double *convergence) | |||
| 984 | process_groups = 0; | 1027 | process_groups = 0; |
| 985 | 1028 | ||
| 986 | for (node = 0; node < g->p.nr_nodes; node++) { | 1029 | for (node = 0; node < g->p.nr_nodes; node++) { |
| 987 | int processes = count_node_processes(node); | 1030 | int processes; |
| 988 | 1031 | ||
| 1032 | if (!is_node_present(node)) | ||
| 1033 | continue; | ||
| 1034 | processes = count_node_processes(node); | ||
| 989 | nr = nodes[node]; | 1035 | nr = nodes[node]; |
| 990 | tprintf(" %2d/%-2d", nr, processes); | 1036 | tprintf(" %2d/%-2d", nr, processes); |
| 991 | 1037 | ||
| @@ -1291,7 +1337,7 @@ static void print_summary(void) | |||
| 1291 | 1337 | ||
| 1292 | printf("\n ###\n"); | 1338 | printf("\n ###\n"); |
| 1293 | printf(" # %d %s will execute (on %d nodes, %d CPUs):\n", | 1339 | printf(" # %d %s will execute (on %d nodes, %d CPUs):\n", |
| 1294 | g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus); | 1340 | g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus); |
| 1295 | printf(" # %5dx %5ldMB global shared mem operations\n", | 1341 | printf(" # %5dx %5ldMB global shared mem operations\n", |
| 1296 | g->p.nr_loops, g->p.bytes_global/1024/1024); | 1342 | g->p.nr_loops, g->p.bytes_global/1024/1024); |
| 1297 | printf(" # %5dx %5ldMB process shared mem operations\n", | 1343 | printf(" # %5dx %5ldMB process shared mem operations\n", |
