aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/perf/builtin-kmem.c81
1 files changed, 79 insertions, 2 deletions
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index dc86f1e64b66..1ecf3f4415ce 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -36,6 +36,9 @@ static char default_sort_order[] = "frag,hit,bytes";
36static char *cwd; 36static char *cwd;
37static int cwdlen; 37static int cwdlen;
38 38
39static int *cpunode_map;
40static int max_cpu_num;
41
39struct alloc_stat { 42struct alloc_stat {
40 union { 43 union {
41 u64 call_site; 44 u64 call_site;
@@ -54,12 +57,74 @@ static struct rb_root root_caller_stat;
54static struct rb_root root_caller_sorted; 57static struct rb_root root_caller_sorted;
55 58
56static unsigned long total_requested, total_allocated; 59static unsigned long total_requested, total_allocated;
60static unsigned long nr_allocs, nr_cross_allocs;
57 61
58struct raw_event_sample { 62struct raw_event_sample {
59 u32 size; 63 u32 size;
60 char data[0]; 64 char data[0];
61}; 65};
62 66
67#define PATH_SYS_NODE "/sys/devices/system/node"
68
69static void init_cpunode_map(void)
70{
71 FILE *fp;
72 int i;
73
74 fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
75 if (!fp) {
76 max_cpu_num = 4096;
77 return;
78 }
79
80 if (fscanf(fp, "%d", &max_cpu_num) < 1)
81 die("Failed to read 'kernel_max' from sysfs");
82 max_cpu_num++;
83
84 cpunode_map = calloc(max_cpu_num, sizeof(int));
85 if (!cpunode_map)
86 die("calloc");
87 for (i = 0; i < max_cpu_num; i++)
88 cpunode_map[i] = -1;
89 fclose(fp);
90}
91
92static void setup_cpunode_map(void)
93{
94 struct dirent *dent1, *dent2;
95 DIR *dir1, *dir2;
96 unsigned int cpu, mem;
97 char buf[PATH_MAX];
98
99 init_cpunode_map();
100
101 dir1 = opendir(PATH_SYS_NODE);
102 if (!dir1)
103 return;
104
105 while (true) {
106 dent1 = readdir(dir1);
107 if (!dent1)
108 break;
109
110 if (sscanf(dent1->d_name, "node%u", &mem) < 1)
111 continue;
112
113 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
114 dir2 = opendir(buf);
115 if (!dir2)
116 continue;
117 while (true) {
118 dent2 = readdir(dir2);
119 if (!dent2)
120 break;
121 if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
122 continue;
123 cpunode_map[cpu] = mem;
124 }
125 }
126}
127
63static int 128static int
64process_comm_event(event_t *event, unsigned long offset, unsigned long head) 129process_comm_event(event_t *event, unsigned long offset, unsigned long head)
65{ 130{
@@ -157,15 +222,16 @@ static void insert_caller_stat(unsigned long call_site,
157 222
158static void process_alloc_event(struct raw_event_sample *raw, 223static void process_alloc_event(struct raw_event_sample *raw,
159 struct event *event, 224 struct event *event,
160 int cpu __used, 225 int cpu,
161 u64 timestamp __used, 226 u64 timestamp __used,
162 struct thread *thread __used, 227 struct thread *thread __used,
163 int node __used) 228 int node)
164{ 229{
165 unsigned long call_site; 230 unsigned long call_site;
166 unsigned long ptr; 231 unsigned long ptr;
167 int bytes_req; 232 int bytes_req;
168 int bytes_alloc; 233 int bytes_alloc;
234 int node1, node2;
169 235
170 ptr = raw_field_value(event, "ptr", raw->data); 236 ptr = raw_field_value(event, "ptr", raw->data);
171 call_site = raw_field_value(event, "call_site", raw->data); 237 call_site = raw_field_value(event, "call_site", raw->data);
@@ -177,6 +243,14 @@ static void process_alloc_event(struct raw_event_sample *raw,
177 243
178 total_requested += bytes_req; 244 total_requested += bytes_req;
179 total_allocated += bytes_alloc; 245 total_allocated += bytes_alloc;
246
247 if (node) {
248 node1 = cpunode_map[cpu];
249 node2 = raw_field_value(event, "node", raw->data);
250 if (node1 != node2)
251 nr_cross_allocs++;
252 }
253 nr_allocs++;
180} 254}
181 255
182static void process_free_event(struct raw_event_sample *raw __used, 256static void process_free_event(struct raw_event_sample *raw __used,
@@ -359,6 +433,7 @@ static void print_summary(void)
359 total_allocated - total_requested); 433 total_allocated - total_requested);
360 printf("Internal fragmentation: %f%%\n", 434 printf("Internal fragmentation: %f%%\n",
361 fragmentation(total_requested, total_allocated)); 435 fragmentation(total_requested, total_allocated));
436 printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
362} 437}
363 438
364static void print_result(void) 439static void print_result(void)
@@ -685,6 +760,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
685 if (list_empty(&alloc_sort)) 760 if (list_empty(&alloc_sort))
686 setup_sorting(&alloc_sort, default_sort_order); 761 setup_sorting(&alloc_sort, default_sort_order);
687 762
763 setup_cpunode_map();
764
688 return __cmd_kmem(); 765 return __cmd_kmem();
689} 766}
690 767