diff options
Diffstat (limited to 'include/os/linux/debug_kmem.c')
-rw-r--r-- | include/os/linux/debug_kmem.c | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/include/os/linux/debug_kmem.c b/include/os/linux/debug_kmem.c new file mode 100644 index 0000000..a0c7d47 --- /dev/null +++ b/include/os/linux/debug_kmem.c | |||
@@ -0,0 +1,312 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/debugfs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | #include "debug_kmem.h" | ||
19 | #include "kmem_priv.h" | ||
20 | |||
21 | /** | ||
22 | * to_human_readable_bytes - Determine suffix for passed size. | ||
23 | * | ||
24 | * @bytes - Number of bytes to generate a suffix for. | ||
25 | * @hr_bytes [out] - The human readable number of bytes. | ||
26 | * @hr_suffix [out] - The suffix for the HR number of bytes. | ||
27 | * | ||
28 | * Computes a human readable decomposition of the passed number of bytes. The | ||
29 | * suffix for the bytes is passed back through the @hr_suffix pointer. The right | ||
30 | * number of bytes is then passed back in @hr_bytes. This returns the following | ||
31 | * ranges: | ||
32 | * | ||
33 | * 0 - 1023 B | ||
34 | * 1 - 1023 KB | ||
35 | * 1 - 1023 MB | ||
36 | * 1 - 1023 GB | ||
37 | * 1 - 1023 TB | ||
38 | * 1 - ... PB | ||
39 | */ | ||
40 | static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, | ||
41 | const char **hr_suffix) | ||
42 | { | ||
43 | static const char *suffixes[] = | ||
44 | { "B", "KB", "MB", "GB", "TB", "PB" }; | ||
45 | |||
46 | u64 suffix_ind = 0; | ||
47 | |||
48 | while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { | ||
49 | bytes >>= 10; | ||
50 | suffix_ind++; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Handle case where bytes > 1023PB. | ||
55 | */ | ||
56 | suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? | ||
57 | suffix_ind : ARRAY_SIZE(suffixes) - 1; | ||
58 | |||
59 | *hr_bytes = bytes; | ||
60 | *hr_suffix = suffixes[suffix_ind]; | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * print_hr_bytes - Print human readable bytes | ||
65 | * | ||
66 | * @s - A seq_file to print to. May be NULL. | ||
67 | * @msg - A message to print before the bytes. | ||
68 | * @bytes - Number of bytes. | ||
69 | * | ||
70 | * Print @msg followed by the human readable decomposition of the passed number | ||
71 | * of bytes. | ||
72 | * | ||
73 | * If @s is NULL then this prints will be made to the kernel log. | ||
74 | */ | ||
75 | static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) | ||
76 | { | ||
77 | u64 hr_bytes; | ||
78 | const char *hr_suffix; | ||
79 | |||
80 | __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); | ||
81 | __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); | ||
82 | } | ||
83 | |||
84 | /** | ||
85 | * print_histogram - Build a histogram of the memory usage. | ||
86 | * | ||
87 | * @tracker The tracking to pull data from. | ||
88 | * @s A seq_file to dump info into. | ||
89 | */ | ||
90 | static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, | ||
91 | struct seq_file *s) | ||
92 | { | ||
93 | int i; | ||
94 | u64 pot_min, pot_max; | ||
95 | u64 nr_buckets; | ||
96 | unsigned int *buckets; | ||
97 | unsigned int total_allocs; | ||
98 | struct nvgpu_rbtree_node *node; | ||
99 | static const char histogram_line[] = | ||
100 | "++++++++++++++++++++++++++++++++++++++++"; | ||
101 | |||
102 | /* | ||
103 | * pot_min is essentially a round down to the nearest power of 2. This | ||
104 | * is the start of the histogram. pot_max is just a round up to the | ||
105 | * nearest power of two. Each histogram bucket is one power of two so | ||
106 | * the histogram buckets are exponential. | ||
107 | */ | ||
108 | pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); | ||
109 | pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); | ||
110 | |||
111 | nr_buckets = __ffs(pot_max) - __ffs(pot_min); | ||
112 | |||
113 | buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); | ||
114 | if (!buckets) { | ||
115 | __pstat(s, "OOM: could not allocate bucket storage!?\n"); | ||
116 | return; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Iterate across all of the allocs and determine what bucket they | ||
121 | * should go in. Round the size down to the nearest power of two to | ||
122 | * find the right bucket. | ||
123 | */ | ||
124 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
125 | while (node) { | ||
126 | int b; | ||
127 | u64 bucket_min; | ||
128 | struct nvgpu_mem_alloc *alloc = | ||
129 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
130 | |||
131 | bucket_min = (u64)rounddown_pow_of_two(alloc->size); | ||
132 | if (bucket_min < tracker->min_alloc) | ||
133 | bucket_min = tracker->min_alloc; | ||
134 | |||
135 | b = __ffs(bucket_min) - __ffs(pot_min); | ||
136 | |||
137 | /* | ||
138 | * Handle the one case were there's an alloc exactly as big as | ||
139 | * the maximum bucket size of the largest bucket. Most of the | ||
140 | * buckets have an inclusive minimum and exclusive maximum. But | ||
141 | * the largest bucket needs to have an _inclusive_ maximum as | ||
142 | * well. | ||
143 | */ | ||
144 | if (b == (int)nr_buckets) | ||
145 | b--; | ||
146 | |||
147 | buckets[b]++; | ||
148 | |||
149 | nvgpu_rbtree_enum_next(&node, node); | ||
150 | } | ||
151 | |||
152 | total_allocs = 0; | ||
153 | for (i = 0; i < (int)nr_buckets; i++) | ||
154 | total_allocs += buckets[i]; | ||
155 | |||
156 | __pstat(s, "Alloc histogram:\n"); | ||
157 | |||
158 | /* | ||
159 | * Actually compute the histogram lines. | ||
160 | */ | ||
161 | for (i = 0; i < (int)nr_buckets; i++) { | ||
162 | char this_line[sizeof(histogram_line) + 1]; | ||
163 | u64 line_length; | ||
164 | u64 hr_bytes; | ||
165 | const char *hr_suffix; | ||
166 | |||
167 | memset(this_line, 0, sizeof(this_line)); | ||
168 | |||
169 | /* | ||
170 | * Compute the normalized line length. Cant use floating point | ||
171 | * so we will just multiply everything by 1000 and use fixed | ||
172 | * point. | ||
173 | */ | ||
174 | line_length = (1000 * buckets[i]) / total_allocs; | ||
175 | line_length *= sizeof(histogram_line); | ||
176 | line_length /= 1000; | ||
177 | |||
178 | memset(this_line, '+', line_length); | ||
179 | |||
180 | __to_human_readable_bytes(1 << (__ffs(pot_min) + i), | ||
181 | &hr_bytes, &hr_suffix); | ||
182 | __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", | ||
183 | hr_bytes, hr_bytes << 1, | ||
184 | hr_suffix, buckets[i], this_line); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | /** | ||
189 | * nvgpu_kmem_print_stats - Print kmem tracking stats. | ||
190 | * | ||
191 | * @tracker The tracking to pull data from. | ||
192 | * @s A seq_file to dump info into. | ||
193 | * | ||
194 | * Print stats from a tracker. If @s is non-null then seq_printf() will be | ||
195 | * used with @s. Otherwise the stats are pr_info()ed. | ||
196 | */ | ||
197 | void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, | ||
198 | struct seq_file *s) | ||
199 | { | ||
200 | nvgpu_lock_tracker(tracker); | ||
201 | |||
202 | __pstat(s, "Mem tracker: %s\n\n", tracker->name); | ||
203 | |||
204 | __pstat(s, "Basic Stats:\n"); | ||
205 | __pstat(s, " Number of allocs %lld\n", | ||
206 | tracker->nr_allocs); | ||
207 | __pstat(s, " Number of frees %lld\n", | ||
208 | tracker->nr_frees); | ||
209 | print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); | ||
210 | print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); | ||
211 | print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); | ||
212 | print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); | ||
213 | print_hr_bytes(s, " Bytes allocated (real) ", | ||
214 | tracker->bytes_alloced_real); | ||
215 | print_hr_bytes(s, " Bytes freed (real) ", | ||
216 | tracker->bytes_freed_real); | ||
217 | __pstat(s, "\n"); | ||
218 | |||
219 | print_histogram(tracker, s); | ||
220 | |||
221 | nvgpu_unlock_tracker(tracker); | ||
222 | } | ||
223 | |||
224 | static int __kmem_tracking_show(struct seq_file *s, void *unused) | ||
225 | { | ||
226 | struct nvgpu_mem_alloc_tracker *tracker = s->private; | ||
227 | |||
228 | nvgpu_kmem_print_stats(tracker, s); | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | static int __kmem_tracking_open(struct inode *inode, struct file *file) | ||
234 | { | ||
235 | return single_open(file, __kmem_tracking_show, inode->i_private); | ||
236 | } | ||
237 | |||
238 | static const struct file_operations __kmem_tracking_fops = { | ||
239 | .open = __kmem_tracking_open, | ||
240 | .read = seq_read, | ||
241 | .llseek = seq_lseek, | ||
242 | .release = single_release, | ||
243 | }; | ||
244 | |||
245 | static int __kmem_traces_dump_tracker(struct gk20a *g, | ||
246 | struct nvgpu_mem_alloc_tracker *tracker, | ||
247 | struct seq_file *s) | ||
248 | { | ||
249 | struct nvgpu_rbtree_node *node; | ||
250 | |||
251 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
252 | while (node) { | ||
253 | struct nvgpu_mem_alloc *alloc = | ||
254 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
255 | |||
256 | kmem_print_mem_alloc(g, alloc, s); | ||
257 | |||
258 | nvgpu_rbtree_enum_next(&node, node); | ||
259 | } | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static int __kmem_traces_show(struct seq_file *s, void *unused) | ||
265 | { | ||
266 | struct gk20a *g = s->private; | ||
267 | |||
268 | nvgpu_lock_tracker(g->vmallocs); | ||
269 | seq_puts(s, "Oustanding vmallocs:\n"); | ||
270 | __kmem_traces_dump_tracker(g, g->vmallocs, s); | ||
271 | seq_puts(s, "\n"); | ||
272 | nvgpu_unlock_tracker(g->vmallocs); | ||
273 | |||
274 | nvgpu_lock_tracker(g->kmallocs); | ||
275 | seq_puts(s, "Oustanding kmallocs:\n"); | ||
276 | __kmem_traces_dump_tracker(g, g->kmallocs, s); | ||
277 | nvgpu_unlock_tracker(g->kmallocs); | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static int __kmem_traces_open(struct inode *inode, struct file *file) | ||
283 | { | ||
284 | return single_open(file, __kmem_traces_show, inode->i_private); | ||
285 | } | ||
286 | |||
287 | static const struct file_operations __kmem_traces_fops = { | ||
288 | .open = __kmem_traces_open, | ||
289 | .read = seq_read, | ||
290 | .llseek = seq_lseek, | ||
291 | .release = single_release, | ||
292 | }; | ||
293 | |||
294 | void nvgpu_kmem_debugfs_init(struct gk20a *g) | ||
295 | { | ||
296 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
297 | struct dentry *node; | ||
298 | |||
299 | l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs); | ||
300 | if (IS_ERR_OR_NULL(l->debugfs_kmem)) | ||
301 | return; | ||
302 | |||
303 | node = debugfs_create_file(g->vmallocs->name, S_IRUGO, | ||
304 | l->debugfs_kmem, | ||
305 | g->vmallocs, &__kmem_tracking_fops); | ||
306 | node = debugfs_create_file(g->kmallocs->name, S_IRUGO, | ||
307 | l->debugfs_kmem, | ||
308 | g->kmallocs, &__kmem_tracking_fops); | ||
309 | node = debugfs_create_file("traces", S_IRUGO, | ||
310 | l->debugfs_kmem, | ||
311 | g, &__kmem_traces_fops); | ||
312 | } | ||