diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/debug_kmem.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/debug_kmem.c | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c new file mode 100644 index 00000000..2ee542a8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c | |||
@@ -0,0 +1,315 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_kmem.h" | ||
16 | #include "kmem_priv.h" | ||
17 | #include "gk20a/platform_gk20a.h" | ||
18 | |||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | |||
22 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
23 | /** | ||
24 | * to_human_readable_bytes - Determine suffix for passed size. | ||
25 | * | ||
26 | * @bytes - Number of bytes to generate a suffix for. | ||
27 | * @hr_bytes [out] - The human readable number of bytes. | ||
28 | * @hr_suffix [out] - The suffix for the HR number of bytes. | ||
29 | * | ||
30 | * Computes a human readable decomposition of the passed number of bytes. The | ||
31 | * suffix for the bytes is passed back through the @hr_suffix pointer. The right | ||
32 | * number of bytes is then passed back in @hr_bytes. This returns the following | ||
33 | * ranges: | ||
34 | * | ||
35 | * 0 - 1023 B | ||
36 | * 1 - 1023 KB | ||
37 | * 1 - 1023 MB | ||
38 | * 1 - 1023 GB | ||
39 | * 1 - 1023 TB | ||
40 | * 1 - ... PB | ||
41 | */ | ||
42 | static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, | ||
43 | const char **hr_suffix) | ||
44 | { | ||
45 | static const char *suffixes[] = | ||
46 | { "B", "KB", "MB", "GB", "TB", "PB" }; | ||
47 | |||
48 | u64 suffix_ind = 0; | ||
49 | |||
50 | while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { | ||
51 | bytes >>= 10; | ||
52 | suffix_ind++; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Handle case where bytes > 1023PB. | ||
57 | */ | ||
58 | suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? | ||
59 | suffix_ind : ARRAY_SIZE(suffixes) - 1; | ||
60 | |||
61 | *hr_bytes = bytes; | ||
62 | *hr_suffix = suffixes[suffix_ind]; | ||
63 | } | ||
64 | |||
65 | /** | ||
66 | * print_hr_bytes - Print human readable bytes | ||
67 | * | ||
68 | * @s - A seq_file to print to. May be NULL. | ||
69 | * @msg - A message to print before the bytes. | ||
70 | * @bytes - Number of bytes. | ||
71 | * | ||
72 | * Print @msg followed by the human readable decomposition of the passed number | ||
73 | * of bytes. | ||
74 | * | ||
75 | * If @s is NULL then this prints will be made to the kernel log. | ||
76 | */ | ||
77 | static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) | ||
78 | { | ||
79 | u64 hr_bytes; | ||
80 | const char *hr_suffix; | ||
81 | |||
82 | __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); | ||
83 | __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); | ||
84 | } | ||
85 | |||
86 | /** | ||
87 | * print_histogram - Build a histogram of the memory usage. | ||
88 | * | ||
89 | * @tracker The tracking to pull data from. | ||
90 | * @s A seq_file to dump info into. | ||
91 | */ | ||
92 | static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, | ||
93 | struct seq_file *s) | ||
94 | { | ||
95 | int i; | ||
96 | u64 pot_min, pot_max; | ||
97 | u64 nr_buckets; | ||
98 | unsigned int *buckets; | ||
99 | unsigned int total_allocs; | ||
100 | struct nvgpu_rbtree_node *node; | ||
101 | static const char histogram_line[] = | ||
102 | "++++++++++++++++++++++++++++++++++++++++"; | ||
103 | |||
104 | /* | ||
105 | * pot_min is essentially a round down to the nearest power of 2. This | ||
106 | * is the start of the histogram. pot_max is just a round up to the | ||
107 | * nearest power of two. Each histogram bucket is one power of two so | ||
108 | * the histogram buckets are exponential. | ||
109 | */ | ||
110 | pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); | ||
111 | pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); | ||
112 | |||
113 | nr_buckets = __ffs(pot_max) - __ffs(pot_min); | ||
114 | |||
115 | buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); | ||
116 | if (!buckets) { | ||
117 | __pstat(s, "OOM: could not allocate bucket storage!?\n"); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Iterate across all of the allocs and determine what bucket they | ||
123 | * should go in. Round the size down to the nearest power of two to | ||
124 | * find the right bucket. | ||
125 | */ | ||
126 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
127 | while (node) { | ||
128 | int b; | ||
129 | u64 bucket_min; | ||
130 | struct nvgpu_mem_alloc *alloc = | ||
131 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
132 | |||
133 | bucket_min = (u64)rounddown_pow_of_two(alloc->size); | ||
134 | if (bucket_min < tracker->min_alloc) | ||
135 | bucket_min = tracker->min_alloc; | ||
136 | |||
137 | b = __ffs(bucket_min) - __ffs(pot_min); | ||
138 | |||
139 | /* | ||
140 | * Handle the one case were there's an alloc exactly as big as | ||
141 | * the maximum bucket size of the largest bucket. Most of the | ||
142 | * buckets have an inclusive minimum and exclusive maximum. But | ||
143 | * the largest bucket needs to have an _inclusive_ maximum as | ||
144 | * well. | ||
145 | */ | ||
146 | if (b == (int)nr_buckets) | ||
147 | b--; | ||
148 | |||
149 | buckets[b]++; | ||
150 | |||
151 | nvgpu_rbtree_enum_next(&node, node); | ||
152 | } | ||
153 | |||
154 | total_allocs = 0; | ||
155 | for (i = 0; i < (int)nr_buckets; i++) | ||
156 | total_allocs += buckets[i]; | ||
157 | |||
158 | __pstat(s, "Alloc histogram:\n"); | ||
159 | |||
160 | /* | ||
161 | * Actually compute the histogram lines. | ||
162 | */ | ||
163 | for (i = 0; i < (int)nr_buckets; i++) { | ||
164 | char this_line[sizeof(histogram_line) + 1]; | ||
165 | u64 line_length; | ||
166 | u64 hr_bytes; | ||
167 | const char *hr_suffix; | ||
168 | |||
169 | memset(this_line, 0, sizeof(this_line)); | ||
170 | |||
171 | /* | ||
172 | * Compute the normalized line length. Cant use floating point | ||
173 | * so we will just multiply everything by 1000 and use fixed | ||
174 | * point. | ||
175 | */ | ||
176 | line_length = (1000 * buckets[i]) / total_allocs; | ||
177 | line_length *= sizeof(histogram_line); | ||
178 | line_length /= 1000; | ||
179 | |||
180 | memset(this_line, '+', line_length); | ||
181 | |||
182 | __to_human_readable_bytes(1 << (__ffs(pot_min) + i), | ||
183 | &hr_bytes, &hr_suffix); | ||
184 | __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", | ||
185 | hr_bytes, hr_bytes << 1, | ||
186 | hr_suffix, buckets[i], this_line); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | /** | ||
191 | * nvgpu_kmem_print_stats - Print kmem tracking stats. | ||
192 | * | ||
193 | * @tracker The tracking to pull data from. | ||
194 | * @s A seq_file to dump info into. | ||
195 | * | ||
196 | * Print stats from a tracker. If @s is non-null then seq_printf() will be | ||
197 | * used with @s. Otherwise the stats are pr_info()ed. | ||
198 | */ | ||
199 | void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, | ||
200 | struct seq_file *s) | ||
201 | { | ||
202 | nvgpu_lock_tracker(tracker); | ||
203 | |||
204 | __pstat(s, "Mem tracker: %s\n\n", tracker->name); | ||
205 | |||
206 | __pstat(s, "Basic Stats:\n"); | ||
207 | __pstat(s, " Number of allocs %lld\n", | ||
208 | tracker->nr_allocs); | ||
209 | __pstat(s, " Number of frees %lld\n", | ||
210 | tracker->nr_frees); | ||
211 | print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); | ||
212 | print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); | ||
213 | print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); | ||
214 | print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); | ||
215 | print_hr_bytes(s, " Bytes allocated (real) ", | ||
216 | tracker->bytes_alloced_real); | ||
217 | print_hr_bytes(s, " Bytes freed (real) ", | ||
218 | tracker->bytes_freed_real); | ||
219 | __pstat(s, "\n"); | ||
220 | |||
221 | print_histogram(tracker, s); | ||
222 | |||
223 | nvgpu_unlock_tracker(tracker); | ||
224 | } | ||
225 | |||
226 | static int __kmem_tracking_show(struct seq_file *s, void *unused) | ||
227 | { | ||
228 | struct nvgpu_mem_alloc_tracker *tracker = s->private; | ||
229 | |||
230 | nvgpu_kmem_print_stats(tracker, s); | ||
231 | |||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | static int __kmem_tracking_open(struct inode *inode, struct file *file) | ||
236 | { | ||
237 | return single_open(file, __kmem_tracking_show, inode->i_private); | ||
238 | } | ||
239 | |||
240 | static const struct file_operations __kmem_tracking_fops = { | ||
241 | .open = __kmem_tracking_open, | ||
242 | .read = seq_read, | ||
243 | .llseek = seq_lseek, | ||
244 | .release = single_release, | ||
245 | }; | ||
246 | |||
247 | static int __kmem_traces_dump_tracker(struct gk20a *g, | ||
248 | struct nvgpu_mem_alloc_tracker *tracker, | ||
249 | struct seq_file *s) | ||
250 | { | ||
251 | struct nvgpu_rbtree_node *node; | ||
252 | |||
253 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
254 | while (node) { | ||
255 | struct nvgpu_mem_alloc *alloc = | ||
256 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
257 | |||
258 | kmem_print_mem_alloc(g, alloc, s); | ||
259 | |||
260 | nvgpu_rbtree_enum_next(&node, node); | ||
261 | } | ||
262 | |||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | static int __kmem_traces_show(struct seq_file *s, void *unused) | ||
267 | { | ||
268 | struct gk20a *g = s->private; | ||
269 | |||
270 | nvgpu_lock_tracker(g->vmallocs); | ||
271 | seq_puts(s, "Oustanding vmallocs:\n"); | ||
272 | __kmem_traces_dump_tracker(g, g->vmallocs, s); | ||
273 | seq_puts(s, "\n"); | ||
274 | nvgpu_unlock_tracker(g->vmallocs); | ||
275 | |||
276 | nvgpu_lock_tracker(g->kmallocs); | ||
277 | seq_puts(s, "Oustanding kmallocs:\n"); | ||
278 | __kmem_traces_dump_tracker(g, g->kmallocs, s); | ||
279 | nvgpu_unlock_tracker(g->kmallocs); | ||
280 | |||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static int __kmem_traces_open(struct inode *inode, struct file *file) | ||
285 | { | ||
286 | return single_open(file, __kmem_traces_show, inode->i_private); | ||
287 | } | ||
288 | |||
289 | static const struct file_operations __kmem_traces_fops = { | ||
290 | .open = __kmem_traces_open, | ||
291 | .read = seq_read, | ||
292 | .llseek = seq_lseek, | ||
293 | .release = single_release, | ||
294 | }; | ||
295 | |||
296 | void nvgpu_kmem_debugfs_init(struct gk20a *g) | ||
297 | { | ||
298 | struct gk20a_platform *platform = dev_get_drvdata(g->dev); | ||
299 | struct dentry *node; | ||
300 | |||
301 | g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs); | ||
302 | if (IS_ERR_OR_NULL(g->debugfs_kmem)) | ||
303 | return; | ||
304 | |||
305 | node = debugfs_create_file(g->vmallocs->name, S_IRUGO, | ||
306 | g->debugfs_kmem, | ||
307 | g->vmallocs, &__kmem_tracking_fops); | ||
308 | node = debugfs_create_file(g->kmallocs->name, S_IRUGO, | ||
309 | g->debugfs_kmem, | ||
310 | g->kmallocs, &__kmem_tracking_fops); | ||
311 | node = debugfs_create_file("traces", S_IRUGO, | ||
312 | g->debugfs_kmem, | ||
313 | g, &__kmem_traces_fops); | ||
314 | } | ||
315 | #endif | ||