diff options
-rw-r--r-- | tools/perf/builtin-kmem.c | 81 |
1 files changed, 79 insertions, 2 deletions
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index dc86f1e64b66..1ecf3f4415ce 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -36,6 +36,9 @@ static char default_sort_order[] = "frag,hit,bytes"; | |||
36 | static char *cwd; | 36 | static char *cwd; |
37 | static int cwdlen; | 37 | static int cwdlen; |
38 | 38 | ||
39 | static int *cpunode_map; | ||
40 | static int max_cpu_num; | ||
41 | |||
39 | struct alloc_stat { | 42 | struct alloc_stat { |
40 | union { | 43 | union { |
41 | u64 call_site; | 44 | u64 call_site; |
@@ -54,12 +57,74 @@ static struct rb_root root_caller_stat; | |||
54 | static struct rb_root root_caller_sorted; | 57 | static struct rb_root root_caller_sorted; |
55 | 58 | ||
56 | static unsigned long total_requested, total_allocated; | 59 | static unsigned long total_requested, total_allocated; |
60 | static unsigned long nr_allocs, nr_cross_allocs; | ||
57 | 61 | ||
58 | struct raw_event_sample { | 62 | struct raw_event_sample { |
59 | u32 size; | 63 | u32 size; |
60 | char data[0]; | 64 | char data[0]; |
61 | }; | 65 | }; |
62 | 66 | ||
67 | #define PATH_SYS_NODE "/sys/devices/system/node" | ||
68 | |||
69 | static void init_cpunode_map(void) | ||
70 | { | ||
71 | FILE *fp; | ||
72 | int i; | ||
73 | |||
74 | fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); | ||
75 | if (!fp) { | ||
76 | max_cpu_num = 4096; | ||
77 | return; | ||
78 | } | ||
79 | |||
80 | if (fscanf(fp, "%d", &max_cpu_num) < 1) | ||
81 | die("Failed to read 'kernel_max' from sysfs"); | ||
82 | max_cpu_num++; | ||
83 | |||
84 | cpunode_map = calloc(max_cpu_num, sizeof(int)); | ||
85 | if (!cpunode_map) | ||
86 | die("calloc"); | ||
87 | for (i = 0; i < max_cpu_num; i++) | ||
88 | cpunode_map[i] = -1; | ||
89 | fclose(fp); | ||
90 | } | ||
91 | |||
92 | static void setup_cpunode_map(void) | ||
93 | { | ||
94 | struct dirent *dent1, *dent2; | ||
95 | DIR *dir1, *dir2; | ||
96 | unsigned int cpu, mem; | ||
97 | char buf[PATH_MAX]; | ||
98 | |||
99 | init_cpunode_map(); | ||
100 | |||
101 | dir1 = opendir(PATH_SYS_NODE); | ||
102 | if (!dir1) | ||
103 | return; | ||
104 | |||
105 | while (true) { | ||
106 | dent1 = readdir(dir1); | ||
107 | if (!dent1) | ||
108 | break; | ||
109 | |||
110 | if (sscanf(dent1->d_name, "node%u", &mem) < 1) | ||
111 | continue; | ||
112 | |||
113 | snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); | ||
114 | dir2 = opendir(buf); | ||
115 | if (!dir2) | ||
116 | continue; | ||
117 | while (true) { | ||
118 | dent2 = readdir(dir2); | ||
119 | if (!dent2) | ||
120 | break; | ||
121 | if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1) | ||
122 | continue; | ||
123 | cpunode_map[cpu] = mem; | ||
124 | } | ||
125 | } | ||
126 | } | ||
127 | |||
63 | static int | 128 | static int |
64 | process_comm_event(event_t *event, unsigned long offset, unsigned long head) | 129 | process_comm_event(event_t *event, unsigned long offset, unsigned long head) |
65 | { | 130 | { |
@@ -157,15 +222,16 @@ static void insert_caller_stat(unsigned long call_site, | |||
157 | 222 | ||
158 | static void process_alloc_event(struct raw_event_sample *raw, | 223 | static void process_alloc_event(struct raw_event_sample *raw, |
159 | struct event *event, | 224 | struct event *event, |
160 | int cpu __used, | 225 | int cpu, |
161 | u64 timestamp __used, | 226 | u64 timestamp __used, |
162 | struct thread *thread __used, | 227 | struct thread *thread __used, |
163 | int node __used) | 228 | int node) |
164 | { | 229 | { |
165 | unsigned long call_site; | 230 | unsigned long call_site; |
166 | unsigned long ptr; | 231 | unsigned long ptr; |
167 | int bytes_req; | 232 | int bytes_req; |
168 | int bytes_alloc; | 233 | int bytes_alloc; |
234 | int node1, node2; | ||
169 | 235 | ||
170 | ptr = raw_field_value(event, "ptr", raw->data); | 236 | ptr = raw_field_value(event, "ptr", raw->data); |
171 | call_site = raw_field_value(event, "call_site", raw->data); | 237 | call_site = raw_field_value(event, "call_site", raw->data); |
@@ -177,6 +243,14 @@ static void process_alloc_event(struct raw_event_sample *raw, | |||
177 | 243 | ||
178 | total_requested += bytes_req; | 244 | total_requested += bytes_req; |
179 | total_allocated += bytes_alloc; | 245 | total_allocated += bytes_alloc; |
246 | |||
247 | if (node) { | ||
248 | node1 = cpunode_map[cpu]; | ||
249 | node2 = raw_field_value(event, "node", raw->data); | ||
250 | if (node1 != node2) | ||
251 | nr_cross_allocs++; | ||
252 | } | ||
253 | nr_allocs++; | ||
180 | } | 254 | } |
181 | 255 | ||
182 | static void process_free_event(struct raw_event_sample *raw __used, | 256 | static void process_free_event(struct raw_event_sample *raw __used, |
@@ -359,6 +433,7 @@ static void print_summary(void) | |||
359 | total_allocated - total_requested); | 433 | total_allocated - total_requested); |
360 | printf("Internal fragmentation: %f%%\n", | 434 | printf("Internal fragmentation: %f%%\n", |
361 | fragmentation(total_requested, total_allocated)); | 435 | fragmentation(total_requested, total_allocated)); |
436 | printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); | ||
362 | } | 437 | } |
363 | 438 | ||
364 | static void print_result(void) | 439 | static void print_result(void) |
@@ -685,6 +760,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used) | |||
685 | if (list_empty(&alloc_sort)) | 760 | if (list_empty(&alloc_sort)) |
686 | setup_sorting(&alloc_sort, default_sort_order); | 761 | setup_sorting(&alloc_sort, default_sort_order); |
687 | 762 | ||
763 | setup_cpunode_map(); | ||
764 | |||
688 | return __cmd_kmem(); | 765 | return __cmd_kmem(); |
689 | } | 766 | } |
690 | 767 | ||