diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/kmem.c | 806 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/kmem_priv.h | 90 |
2 files changed, 883 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c index 24e0ca5d..60e79348 100644 --- a/drivers/gpu/nvgpu/common/linux/kmem.c +++ b/drivers/gpu/nvgpu/common/linux/kmem.c | |||
@@ -15,11 +15,22 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/mutex.h> | ||
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
19 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
21 | #include <linux/rbtree.h> | ||
22 | #include <linux/debugfs.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | #include <linux/seq_file.h> | ||
25 | #include <linux/vmalloc.h> | ||
26 | #include <linux/stacktrace.h> | ||
20 | 27 | ||
21 | #include <nvgpu/kmem.h> | 28 | #include <nvgpu/kmem.h> |
22 | 29 | ||
30 | #include "gk20a/gk20a.h" | ||
31 | |||
32 | #include "kmem_priv.h" | ||
33 | |||
23 | /* | 34 | /* |
24 | * Statically declared because this needs to be shared across all nvgpu driver | 35 | * Statically declared because this needs to be shared across all nvgpu driver |
25 | * instances. This makes sure that all kmem caches are _definitely_ uniquely | 36 | * instances. This makes sure that all kmem caches are _definitely_ uniquely |
@@ -27,26 +38,793 @@ | |||
27 | */ | 38 | */ |
28 | static atomic_t kmem_cache_id; | 39 | static atomic_t kmem_cache_id; |
29 | 40 | ||
30 | /* | 41 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE |
31 | * Linux specific version of the nvgpu_kmem_cache struct. This type is | 42 | |
32 | * completely opaque to the rest of the driver. | 43 | static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) |
44 | { | ||
45 | mutex_lock(&tracker->lock); | ||
46 | } | ||
47 | |||
48 | static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | ||
49 | { | ||
50 | mutex_unlock(&tracker->lock); | ||
51 | } | ||
52 | |||
53 | static void kmem_print_mem_alloc(struct gk20a *g, | ||
54 | struct nvgpu_mem_alloc *alloc, | ||
55 | struct seq_file *s) | ||
56 | { | ||
57 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
58 | int i; | ||
59 | |||
60 | __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n", | ||
61 | alloc->addr, alloc->size); | ||
62 | for (i = 0; i < alloc->stack_length; i++) | ||
63 | __pstat(s, " %3d [<%p>] %pS\n", i, | ||
64 | (void *)alloc->stack[i], | ||
65 | (void *)alloc->stack[i]); | ||
66 | __pstat(s, "\n"); | ||
67 | #else | ||
68 | __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n", | ||
69 | alloc->addr, alloc->size, alloc->ip); | ||
70 | #endif | ||
71 | } | ||
72 | |||
73 | static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
74 | struct nvgpu_mem_alloc *alloc) | ||
75 | { | ||
76 | struct rb_node **new = &tracker->allocs.rb_node; | ||
77 | struct rb_node *parent = NULL; | ||
78 | |||
79 | while (*new) { | ||
80 | struct nvgpu_mem_alloc *tmp = rb_entry(*new, | ||
81 | struct nvgpu_mem_alloc, | ||
82 | allocs_entry); | ||
83 | |||
84 | parent = *new; | ||
85 | |||
86 | if (alloc->addr < tmp->addr) | ||
87 | new = &(*new)->rb_left; | ||
88 | else if (alloc->addr > tmp->addr) | ||
89 | new = &(*new)->rb_right; | ||
90 | else | ||
91 | return -EINVAL; | ||
92 | } | ||
93 | |||
94 | /* Put the new node there */ | ||
95 | rb_link_node(&alloc->allocs_entry, parent, new); | ||
96 | rb_insert_color(&alloc->allocs_entry, &tracker->allocs); | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | static struct nvgpu_mem_alloc *nvgpu_rem_alloc( | ||
102 | struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr) | ||
103 | { | ||
104 | struct rb_node *node = tracker->allocs.rb_node; | ||
105 | struct nvgpu_mem_alloc *alloc; | ||
106 | |||
107 | while (node) { | ||
108 | alloc = container_of(node, | ||
109 | struct nvgpu_mem_alloc, allocs_entry); | ||
110 | |||
111 | if (alloc_addr < alloc->addr) | ||
112 | node = node->rb_left; | ||
113 | else if (alloc_addr > alloc->addr) | ||
114 | node = node->rb_right; | ||
115 | else | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | if (!node) | ||
120 | return NULL; | ||
121 | |||
122 | rb_erase(node, &tracker->allocs); | ||
123 | |||
124 | return alloc; | ||
125 | } | ||
126 | |||
127 | static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
128 | unsigned long size, unsigned long real_size, | ||
129 | u64 addr, unsigned long ip) | ||
130 | { | ||
131 | int ret; | ||
132 | struct nvgpu_mem_alloc *alloc; | ||
133 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
134 | struct stack_trace stack_trace; | ||
135 | #endif | ||
136 | |||
137 | alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); | ||
138 | if (!alloc) | ||
139 | return -ENOMEM; | ||
140 | |||
141 | alloc->owner = tracker; | ||
142 | alloc->size = size; | ||
143 | alloc->real_size = real_size; | ||
144 | alloc->addr = addr; | ||
145 | alloc->ip = (void *)(uintptr_t)ip; | ||
146 | |||
147 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
148 | stack_trace.max_entries = MAX_STACK_TRACE; | ||
149 | stack_trace.nr_entries = 0; | ||
150 | stack_trace.entries = alloc->stack; | ||
151 | /* | ||
152 | * This 4 here skips the 2 function calls that happen for all traced | ||
153 | * allocs due to nvgpu: | ||
154 | * | ||
155 | * __nvgpu_save_kmem_alloc+0x7c/0x128 | ||
156 | * __nvgpu_track_kzalloc+0xcc/0xf8 | ||
157 | * | ||
158 | * And the function calls that get made by the stack trace code itself. | ||
159 | * If the trace savings code changes this will likely have to change | ||
160 | * as well. | ||
161 | */ | ||
162 | stack_trace.skip = 4; | ||
163 | save_stack_trace(&stack_trace); | ||
164 | alloc->stack_length = stack_trace.nr_entries; | ||
165 | #endif | ||
166 | |||
167 | lock_tracker(tracker); | ||
168 | tracker->bytes_alloced += size; | ||
169 | tracker->bytes_alloced_real += real_size; | ||
170 | tracker->nr_allocs++; | ||
171 | |||
172 | /* Keep track of this for building a histogram later on. */ | ||
173 | if (tracker->max_alloc < size) | ||
174 | tracker->max_alloc = size; | ||
175 | if (tracker->min_alloc > size) | ||
176 | tracker->min_alloc = size; | ||
177 | |||
178 | ret = nvgpu_add_alloc(tracker, alloc); | ||
179 | if (ret) { | ||
180 | WARN(1, "Duplicate alloc??? 0x%llx\n", addr); | ||
181 | kfree(alloc); | ||
182 | unlock_tracker(tracker); | ||
183 | return ret; | ||
184 | } | ||
185 | unlock_tracker(tracker); | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
191 | u64 addr) | ||
192 | { | ||
193 | struct nvgpu_mem_alloc *alloc; | ||
194 | |||
195 | lock_tracker(tracker); | ||
196 | alloc = nvgpu_rem_alloc(tracker, addr); | ||
197 | if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { | ||
198 | unlock_tracker(tracker); | ||
199 | return -EINVAL; | ||
200 | } | ||
201 | |||
202 | tracker->nr_frees++; | ||
203 | tracker->bytes_freed += alloc->size; | ||
204 | tracker->bytes_freed_real += alloc->real_size; | ||
205 | unlock_tracker(tracker); | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static void __nvgpu_check_valloc_size(unsigned long size) | ||
211 | { | ||
212 | WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size); | ||
213 | } | ||
214 | |||
215 | static void __nvgpu_check_kalloc_size(size_t size) | ||
216 | { | ||
217 | WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size); | ||
218 | } | ||
219 | |||
220 | void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size, | ||
221 | unsigned long ip) | ||
222 | { | ||
223 | void *alloc = vmalloc(size); | ||
224 | |||
225 | if (!alloc) | ||
226 | return NULL; | ||
227 | |||
228 | kmem_dbg("vmalloc: size=%-6ld addr=0x%p", size, alloc); | ||
229 | __nvgpu_check_valloc_size(size); | ||
230 | |||
231 | /* | ||
232 | * Ignore the return message. If this fails let's not cause any issues | ||
233 | * for the rest of the driver. | ||
234 | */ | ||
235 | __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), | ||
236 | (u64)(uintptr_t)alloc, ip); | ||
237 | |||
238 | return alloc; | ||
239 | } | ||
240 | |||
241 | void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size, | ||
242 | unsigned long ip) | ||
243 | { | ||
244 | void *alloc = vzalloc(size); | ||
245 | |||
246 | if (!alloc) | ||
247 | return NULL; | ||
248 | |||
249 | kmem_dbg("vzalloc: size=%-6ld addr=0x%p", size, alloc); | ||
250 | __nvgpu_check_valloc_size(size); | ||
251 | |||
252 | /* | ||
253 | * Ignore the return message. If this fails let's not cause any issues | ||
254 | * for the rest of the driver. | ||
255 | */ | ||
256 | __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), | ||
257 | (u64)(uintptr_t)alloc, ip); | ||
258 | |||
259 | return alloc; | ||
260 | } | ||
261 | |||
262 | void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip) | ||
263 | { | ||
264 | void *alloc = kmalloc(size, GFP_KERNEL); | ||
265 | |||
266 | if (!alloc) | ||
267 | return NULL; | ||
268 | |||
269 | kmem_dbg("kmalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
270 | size, alloc, GFP_KERNEL); | ||
271 | __nvgpu_check_kalloc_size(size); | ||
272 | |||
273 | __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), | ||
274 | (u64)(uintptr_t)alloc, ip); | ||
275 | |||
276 | return alloc; | ||
277 | } | ||
278 | |||
279 | void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip) | ||
280 | { | ||
281 | void *alloc = kzalloc(size, GFP_KERNEL); | ||
282 | |||
283 | if (!alloc) | ||
284 | return NULL; | ||
285 | |||
286 | kmem_dbg("kzalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
287 | size, alloc, GFP_KERNEL); | ||
288 | __nvgpu_check_kalloc_size(size); | ||
289 | |||
290 | __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), | ||
291 | (u64)(uintptr_t)alloc, ip); | ||
292 | |||
293 | return alloc; | ||
294 | } | ||
295 | |||
296 | void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, | ||
297 | unsigned long ip) | ||
298 | { | ||
299 | void *alloc = kcalloc(n, size, GFP_KERNEL); | ||
300 | |||
301 | if (!alloc) | ||
302 | return NULL; | ||
303 | |||
304 | kmem_dbg("kcalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
305 | n * size, alloc, GFP_KERNEL); | ||
306 | __nvgpu_check_kalloc_size(n * size); | ||
307 | |||
308 | __nvgpu_save_kmem_alloc(g->kmallocs, n * size, | ||
309 | roundup_pow_of_two(n * size), | ||
310 | (u64)(uintptr_t)alloc, ip); | ||
311 | |||
312 | return alloc; | ||
313 | } | ||
314 | |||
315 | void __nvgpu_track_vfree(struct gk20a *g, void *addr) | ||
316 | { | ||
317 | /* | ||
318 | * Often it is accepted practice to pass NULL pointers into free | ||
319 | * functions to save code. | ||
320 | */ | ||
321 | if (!addr) | ||
322 | return; | ||
323 | |||
324 | vfree(addr); | ||
325 | |||
326 | kmem_dbg("vfree: addr=0x%p", addr); | ||
327 | |||
328 | __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr); | ||
329 | } | ||
330 | |||
331 | void __nvgpu_track_kfree(struct gk20a *g, void *addr) | ||
332 | { | ||
333 | if (!addr) | ||
334 | return; | ||
335 | |||
336 | kfree(addr); | ||
337 | |||
338 | kmem_dbg("kfree: addr=0x%p", addr); | ||
339 | |||
340 | __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); | ||
341 | } | ||
342 | |||
343 | /** | ||
344 | * to_human_readable_bytes - Determine suffix for passed size. | ||
345 | * | ||
346 | * @bytes - Number of bytes to generate a suffix for. | ||
347 | * @hr_bytes [out] - The human readable number of bytes. | ||
348 | * @hr_suffix [out] - The suffix for the HR number of bytes. | ||
349 | * | ||
350 | * Computes a human readable decomposition of the passed number of bytes. The | ||
351 | * suffix for the bytes is passed back through the @hr_suffix pointer. The right | ||
352 | * number of bytes is then passed back in @hr_bytes. This returns the following | ||
353 | * ranges: | ||
354 | * | ||
355 | * 0 - 1023 B | ||
356 | * 1 - 1023 KB | ||
357 | * 1 - 1023 MB | ||
358 | * 1 - 1023 GB | ||
359 | * 1 - 1023 TB | ||
360 | * 1 - ... PB | ||
361 | */ | ||
362 | static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, | ||
363 | const char **hr_suffix) | ||
364 | { | ||
365 | static const char *suffixes[] = | ||
366 | { "B", "KB", "MB", "GB", "TB", "PB" }; | ||
367 | |||
368 | u64 suffix_ind = 0; | ||
369 | |||
370 | while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { | ||
371 | bytes >>= 10; | ||
372 | suffix_ind++; | ||
373 | } | ||
374 | |||
375 | /* | ||
376 | * Handle case where bytes > 1023PB. | ||
377 | */ | ||
378 | suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? | ||
379 | suffix_ind : ARRAY_SIZE(suffixes) - 1; | ||
380 | |||
381 | *hr_bytes = bytes; | ||
382 | *hr_suffix = suffixes[suffix_ind]; | ||
383 | } | ||
384 | |||
385 | /** | ||
386 | * print_hr_bytes - Print human readable bytes | ||
387 | * | ||
388 | * @s - A seq_file to print to. May be NULL. | ||
389 | * @msg - A message to print before the bytes. | ||
390 | * @bytes - Number of bytes. | ||
391 | * | ||
392 | * Print @msg followed by the human readable decomposition of the passed number | ||
393 | * of bytes. | ||
394 | * | ||
395 | * If @s is NULL then this prints will be made to the kernel log. | ||
396 | */ | ||
397 | static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) | ||
398 | { | ||
399 | u64 hr_bytes; | ||
400 | const char *hr_suffix; | ||
401 | |||
402 | __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); | ||
403 | __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); | ||
404 | } | ||
405 | |||
406 | /** | ||
407 | * print_histogram - Build a histogram of the memory usage. | ||
408 | * | ||
409 | * @tracker The tracking to pull data from. | ||
410 | * @s A seq_file to dump info into. | ||
33 | */ | 411 | */ |
34 | struct nvgpu_kmem_cache { | 412 | static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, |
35 | struct gk20a *g; | 413 | struct seq_file *s) |
36 | struct kmem_cache *cache; | 414 | { |
415 | int i; | ||
416 | u64 pot_min, pot_max; | ||
417 | u64 nr_buckets; | ||
418 | unsigned int *buckets; | ||
419 | unsigned int total_allocs; | ||
420 | struct rb_node *node; | ||
421 | static const char histogram_line[] = | ||
422 | "++++++++++++++++++++++++++++++++++++++++"; | ||
423 | |||
424 | /* | ||
425 | * pot_min is essentially a round down to the nearest power of 2. This | ||
426 | * is the start of the histogram. pot_max is just a round up to the | ||
427 | * nearest power of two. Each histogram bucket is one power of two so | ||
428 | * the histogram buckets are exponential. | ||
429 | */ | ||
430 | pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); | ||
431 | pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); | ||
432 | |||
433 | nr_buckets = __ffs(pot_max) - __ffs(pot_min); | ||
434 | |||
435 | buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); | ||
436 | if (!buckets) { | ||
437 | __pstat(s, "OOM: could not allocate bucket storage!?\n"); | ||
438 | return; | ||
439 | } | ||
37 | 440 | ||
38 | /* | 441 | /* |
39 | * Memory to hold the kmem_cache unique name. Only necessary on our | 442 | * Iterate across all of the allocs and determine what bucket they |
40 | * k3.10 kernel when not using the SLUB allocator but it's easier to | 443 | * should go in. Round the size down to the nearest power of two to |
41 | * just carry this on to newer kernels. | 444 | * find the right bucket. |
42 | */ | 445 | */ |
43 | char name[128]; | 446 | for (node = rb_first(&tracker->allocs); |
447 | node != NULL; | ||
448 | node = rb_next(node)) { | ||
449 | int b; | ||
450 | u64 bucket_min; | ||
451 | struct nvgpu_mem_alloc *alloc; | ||
452 | |||
453 | alloc = container_of(node, struct nvgpu_mem_alloc, | ||
454 | allocs_entry); | ||
455 | bucket_min = (u64)rounddown_pow_of_two(alloc->size); | ||
456 | if (bucket_min < tracker->min_alloc) | ||
457 | bucket_min = tracker->min_alloc; | ||
458 | |||
459 | b = __ffs(bucket_min) - __ffs(pot_min); | ||
460 | |||
461 | /* | ||
462 | * Handle the one case were there's an alloc exactly as big as | ||
463 | * the maximum bucket size of the largest bucket. Most of the | ||
464 | * buckets have an inclusive minimum and exclusive maximum. But | ||
465 | * the largest bucket needs to have an _inclusive_ maximum as | ||
466 | * well. | ||
467 | */ | ||
468 | if (b == (int)nr_buckets) | ||
469 | b--; | ||
470 | |||
471 | buckets[b]++; | ||
472 | } | ||
473 | |||
474 | total_allocs = 0; | ||
475 | for (i = 0; i < (int)nr_buckets; i++) | ||
476 | total_allocs += buckets[i]; | ||
477 | |||
478 | __pstat(s, "Alloc histogram:\n"); | ||
479 | |||
480 | /* | ||
481 | * Actually compute the histogram lines. | ||
482 | */ | ||
483 | for (i = 0; i < (int)nr_buckets; i++) { | ||
484 | char this_line[sizeof(histogram_line) + 1]; | ||
485 | u64 line_length; | ||
486 | u64 hr_bytes; | ||
487 | const char *hr_suffix; | ||
488 | |||
489 | memset(this_line, 0, sizeof(this_line)); | ||
490 | |||
491 | /* | ||
492 | * Compute the normalized line length. Cant use floating point | ||
493 | * so we will just multiply everything by 1000 and use fixed | ||
494 | * point. | ||
495 | */ | ||
496 | line_length = (1000 * buckets[i]) / total_allocs; | ||
497 | line_length *= sizeof(histogram_line); | ||
498 | line_length /= 1000; | ||
499 | |||
500 | memset(this_line, '+', line_length); | ||
501 | |||
502 | __to_human_readable_bytes(1 << (__ffs(pot_min) + i), | ||
503 | &hr_bytes, &hr_suffix); | ||
504 | __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", | ||
505 | hr_bytes, hr_bytes << 1, | ||
506 | hr_suffix, buckets[i], this_line); | ||
507 | } | ||
508 | } | ||
509 | |||
510 | /** | ||
511 | * nvgpu_kmem_print_stats - Print kmem tracking stats. | ||
512 | * | ||
513 | * @tracker The tracking to pull data from. | ||
514 | * @s A seq_file to dump info into. | ||
515 | * | ||
516 | * Print stats from a tracker. If @s is non-null then seq_printf() will be | ||
517 | * used with @s. Otherwise the stats are pr_info()ed. | ||
518 | */ | ||
519 | void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, | ||
520 | struct seq_file *s) | ||
521 | { | ||
522 | lock_tracker(tracker); | ||
523 | |||
524 | __pstat(s, "Mem tracker: %s\n\n", tracker->name); | ||
525 | |||
526 | __pstat(s, "Basic Stats:\n"); | ||
527 | __pstat(s, " Number of allocs %lld\n", | ||
528 | tracker->nr_allocs); | ||
529 | __pstat(s, " Number of frees %lld\n", | ||
530 | tracker->nr_frees); | ||
531 | print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); | ||
532 | print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); | ||
533 | print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); | ||
534 | print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); | ||
535 | print_hr_bytes(s, " Bytes allocated (real) ", | ||
536 | tracker->bytes_alloced_real); | ||
537 | print_hr_bytes(s, " Bytes freed (real) ", | ||
538 | tracker->bytes_freed_real); | ||
539 | __pstat(s, "\n"); | ||
540 | |||
541 | print_histogram(tracker, s); | ||
542 | |||
543 | unlock_tracker(tracker); | ||
544 | } | ||
545 | |||
546 | #if defined(CONFIG_DEBUG_FS) | ||
547 | static int __kmem_tracking_show(struct seq_file *s, void *unused) | ||
548 | { | ||
549 | struct nvgpu_mem_alloc_tracker *tracker = s->private; | ||
550 | |||
551 | nvgpu_kmem_print_stats(tracker, s); | ||
552 | |||
553 | return 0; | ||
554 | } | ||
555 | |||
556 | static int __kmem_tracking_open(struct inode *inode, struct file *file) | ||
557 | { | ||
558 | return single_open(file, __kmem_tracking_show, inode->i_private); | ||
559 | } | ||
560 | |||
561 | static const struct file_operations __kmem_tracking_fops = { | ||
562 | .open = __kmem_tracking_open, | ||
563 | .read = seq_read, | ||
564 | .llseek = seq_lseek, | ||
565 | .release = single_release, | ||
566 | }; | ||
567 | |||
568 | static int __kmem_traces_dump_tracker(struct gk20a *g, | ||
569 | struct nvgpu_mem_alloc_tracker *tracker, | ||
570 | struct seq_file *s) | ||
571 | { | ||
572 | struct rb_node *node; | ||
573 | |||
574 | for (node = rb_first(&tracker->allocs); | ||
575 | node != NULL; | ||
576 | node = rb_next(node)) { | ||
577 | struct nvgpu_mem_alloc *alloc; | ||
578 | |||
579 | alloc = container_of(node, struct nvgpu_mem_alloc, | ||
580 | allocs_entry); | ||
581 | |||
582 | kmem_print_mem_alloc(g, alloc, s); | ||
583 | } | ||
584 | |||
585 | return 0; | ||
586 | } | ||
587 | |||
588 | static int __kmem_traces_show(struct seq_file *s, void *unused) | ||
589 | { | ||
590 | struct gk20a *g = s->private; | ||
591 | |||
592 | lock_tracker(g->vmallocs); | ||
593 | seq_puts(s, "Oustanding vmallocs:\n"); | ||
594 | __kmem_traces_dump_tracker(g, g->vmallocs, s); | ||
595 | seq_puts(s, "\n"); | ||
596 | unlock_tracker(g->vmallocs); | ||
597 | |||
598 | lock_tracker(g->kmallocs); | ||
599 | seq_puts(s, "Oustanding kmallocs:\n"); | ||
600 | __kmem_traces_dump_tracker(g, g->kmallocs, s); | ||
601 | unlock_tracker(g->kmallocs); | ||
602 | |||
603 | return 0; | ||
604 | } | ||
605 | |||
606 | static int __kmem_traces_open(struct inode *inode, struct file *file) | ||
607 | { | ||
608 | return single_open(file, __kmem_traces_show, inode->i_private); | ||
609 | } | ||
610 | |||
611 | static const struct file_operations __kmem_traces_fops = { | ||
612 | .open = __kmem_traces_open, | ||
613 | .read = seq_read, | ||
614 | .llseek = seq_lseek, | ||
615 | .release = single_release, | ||
44 | }; | 616 | }; |
45 | 617 | ||
618 | void nvgpu_kmem_debugfs_init(struct device *dev) | ||
619 | { | ||
620 | struct gk20a_platform *plat = dev_get_drvdata(dev); | ||
621 | struct gk20a *g = get_gk20a(dev); | ||
622 | struct dentry *gpu_root = plat->debugfs; | ||
623 | struct dentry *node; | ||
624 | |||
625 | g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root); | ||
626 | if (IS_ERR_OR_NULL(g->debugfs_kmem)) | ||
627 | return; | ||
628 | |||
629 | node = debugfs_create_file(g->vmallocs->name, S_IRUGO, | ||
630 | g->debugfs_kmem, | ||
631 | g->vmallocs, &__kmem_tracking_fops); | ||
632 | node = debugfs_create_file(g->kmallocs->name, S_IRUGO, | ||
633 | g->debugfs_kmem, | ||
634 | g->kmallocs, &__kmem_tracking_fops); | ||
635 | node = debugfs_create_file("traces", S_IRUGO, | ||
636 | g->debugfs_kmem, | ||
637 | g, &__kmem_traces_fops); | ||
638 | } | ||
639 | #else | ||
640 | void nvgpu_kmem_debugfs_init(struct device *dev) | ||
641 | { | ||
642 | } | ||
643 | #endif | ||
644 | |||
645 | static int __do_check_for_outstanding_allocs( | ||
646 | struct gk20a *g, | ||
647 | struct nvgpu_mem_alloc_tracker *tracker, | ||
648 | const char *type, bool silent) | ||
649 | { | ||
650 | struct rb_node *node; | ||
651 | int count = 0; | ||
652 | |||
653 | for (node = rb_first(&tracker->allocs); | ||
654 | node != NULL; | ||
655 | node = rb_next(node)) { | ||
656 | struct nvgpu_mem_alloc *alloc; | ||
657 | |||
658 | alloc = container_of(node, struct nvgpu_mem_alloc, | ||
659 | allocs_entry); | ||
660 | |||
661 | if (!silent) | ||
662 | kmem_print_mem_alloc(g, alloc, NULL); | ||
663 | |||
664 | count++; | ||
665 | } | ||
666 | |||
667 | return count; | ||
668 | } | ||
669 | |||
670 | /** | ||
671 | * check_for_outstanding_allocs - Count and display outstanding allocs | ||
672 | * | ||
673 | * @g - The GPU. | ||
674 | * @silent - If set don't print anything about the allocs. | ||
675 | * | ||
676 | * Dump (or just count) the number of allocations left outstanding. | ||
677 | */ | ||
678 | static int check_for_outstanding_allocs(struct gk20a *g, bool silent) | ||
679 | { | ||
680 | int count = 0; | ||
681 | |||
682 | count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc", | ||
683 | silent); | ||
684 | count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc", | ||
685 | silent); | ||
686 | |||
687 | return count; | ||
688 | } | ||
689 | |||
690 | static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker, | ||
691 | void (*force_free_func)(const void *)) | ||
692 | { | ||
693 | struct rb_node *node; | ||
694 | |||
695 | while ((node = rb_first(&tracker->allocs)) != NULL) { | ||
696 | struct nvgpu_mem_alloc *alloc; | ||
697 | |||
698 | alloc = container_of(node, struct nvgpu_mem_alloc, | ||
699 | allocs_entry); | ||
700 | if (force_free_func) | ||
701 | force_free_func((void *)alloc->addr); | ||
702 | |||
703 | kfree(alloc); | ||
704 | } | ||
705 | } | ||
706 | |||
707 | /** | ||
708 | * nvgpu_kmem_cleanup - Cleanup the kmem tracking | ||
709 | * | ||
710 | * @g - The GPU. | ||
711 | * @force_free - If set will also free leaked objects if possible. | ||
712 | * | ||
713 | * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free | ||
714 | * is non-zero then the allocation made by nvgpu is also freed. This is risky, | ||
715 | * though, as it is possible that the memory is still in use by other parts of | ||
716 | * the GPU driver not aware that this has happened. | ||
717 | * | ||
718 | * In theory it should be fine if the GPU driver has been deinitialized and | ||
719 | * there are no bugs in that code. However, if there are any bugs in that code | ||
720 | * then they could likely manifest as odd crashes indeterminate amounts of time | ||
721 | * in the future. So use @force_free at your own risk. | ||
722 | */ | ||
723 | static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free) | ||
724 | { | ||
725 | do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL); | ||
726 | do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL); | ||
727 | } | ||
728 | |||
729 | void nvgpu_kmem_fini(struct gk20a *g, int flags) | ||
730 | { | ||
731 | int count; | ||
732 | bool silent, force_free; | ||
733 | |||
734 | if (!flags) | ||
735 | return; | ||
736 | |||
737 | silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS); | ||
738 | force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP); | ||
739 | |||
740 | count = check_for_outstanding_allocs(g, silent); | ||
741 | nvgpu_kmem_cleanup(g, force_free); | ||
742 | |||
743 | /* | ||
744 | * If we leak objects we can either BUG() out or just WARN(). In general | ||
745 | * it doesn't make sense to BUG() on here since leaking a few objects | ||
746 | * won't crash the kernel but it can be helpful for development. | ||
747 | * | ||
748 | * If neither flag is set then we just silently do nothing. | ||
749 | */ | ||
750 | if (count > 0) { | ||
751 | if (flags & NVGPU_KMEM_FINI_WARN) { | ||
752 | WARN(1, "Letting %d allocs leak!!\n", count); | ||
753 | } else if (flags & NVGPU_KMEM_FINI_BUG) { | ||
754 | gk20a_err(g->dev, "Letting %d allocs leak!!\n", count); | ||
755 | BUG(); | ||
756 | } | ||
757 | } | ||
758 | } | ||
759 | |||
760 | int nvgpu_kmem_init(struct gk20a *g) | ||
761 | { | ||
762 | int err; | ||
763 | |||
764 | g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL); | ||
765 | g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL); | ||
766 | |||
767 | if (!g->vmallocs || !g->kmallocs) { | ||
768 | err = -ENOMEM; | ||
769 | goto fail; | ||
770 | } | ||
771 | |||
772 | g->vmallocs->name = "vmalloc"; | ||
773 | g->kmallocs->name = "kmalloc"; | ||
774 | |||
775 | g->vmallocs->allocs = RB_ROOT; | ||
776 | g->kmallocs->allocs = RB_ROOT; | ||
777 | |||
778 | mutex_init(&g->vmallocs->lock); | ||
779 | mutex_init(&g->kmallocs->lock); | ||
780 | |||
781 | g->vmallocs->min_alloc = PAGE_SIZE; | ||
782 | g->kmallocs->min_alloc = KMALLOC_MIN_SIZE; | ||
783 | |||
784 | /* | ||
785 | * This needs to go after all the other initialization since they use | ||
786 | * the nvgpu_kzalloc() API. | ||
787 | */ | ||
788 | g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g, | ||
789 | sizeof(struct nvgpu_mem_alloc)); | ||
790 | g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g, | ||
791 | sizeof(struct nvgpu_mem_alloc)); | ||
792 | |||
793 | if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) { | ||
794 | err = -ENOMEM; | ||
795 | if (g->vmallocs->allocs_cache) | ||
796 | nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache); | ||
797 | if (g->kmallocs->allocs_cache) | ||
798 | nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache); | ||
799 | goto fail; | ||
800 | } | ||
801 | |||
802 | return 0; | ||
803 | |||
804 | fail: | ||
805 | if (g->vmallocs) | ||
806 | kfree(g->vmallocs); | ||
807 | if (g->kmallocs) | ||
808 | kfree(g->kmallocs); | ||
809 | return err; | ||
810 | } | ||
811 | |||
812 | #else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
813 | |||
814 | int nvgpu_kmem_init(struct gk20a *g) | ||
815 | { | ||
816 | return 0; | ||
817 | } | ||
818 | |||
819 | void nvgpu_kmem_fini(struct gk20a *g, int flags) | ||
820 | { | ||
821 | } | ||
822 | #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
823 | |||
46 | struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) | 824 | struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) |
47 | { | 825 | { |
48 | struct nvgpu_kmem_cache *cache = | 826 | struct nvgpu_kmem_cache *cache = |
49 | kzalloc(sizeof(struct nvgpu_kmem_cache), GFP_KERNEL); | 827 | nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache)); |
50 | 828 | ||
51 | if (!cache) | 829 | if (!cache) |
52 | return NULL; | 830 | return NULL; |
@@ -59,7 +837,7 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) | |||
59 | cache->cache = kmem_cache_create(cache->name, | 837 | cache->cache = kmem_cache_create(cache->name, |
60 | size, size, 0, NULL); | 838 | size, size, 0, NULL); |
61 | if (!cache->cache) { | 839 | if (!cache->cache) { |
62 | kfree(cache); | 840 | nvgpu_kfree(g, cache); |
63 | return NULL; | 841 | return NULL; |
64 | } | 842 | } |
65 | 843 | ||
@@ -68,8 +846,10 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) | |||
68 | 846 | ||
69 | void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) | 847 | void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) |
70 | { | 848 | { |
849 | struct gk20a *g = cache->g; | ||
850 | |||
71 | kmem_cache_destroy(cache->cache); | 851 | kmem_cache_destroy(cache->cache); |
72 | kfree(cache); | 852 | nvgpu_kfree(g, cache); |
73 | } | 853 | } |
74 | 854 | ||
75 | void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) | 855 | void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) |
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h new file mode 100644 index 00000000..5e38ad5d --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __KMEM_PRIV_H__ | ||
18 | #define __KMEM_PRIV_H__ | ||
19 | |||
20 | #include <linux/rbtree.h> | ||
21 | |||
22 | #define __pstat(s, fmt, msg...) \ | ||
23 | do { \ | ||
24 | if (s) \ | ||
25 | seq_printf(s, fmt, ##msg); \ | ||
26 | else \ | ||
27 | pr_info(fmt, ##msg); \ | ||
28 | } while (0) | ||
29 | |||
30 | #define MAX_STACK_TRACE 20 | ||
31 | |||
32 | /* | ||
33 | * Linux specific version of the nvgpu_kmem_cache struct. This type is | ||
34 | * completely opaque to the rest of the driver. | ||
35 | */ | ||
36 | struct nvgpu_kmem_cache { | ||
37 | struct gk20a *g; | ||
38 | struct kmem_cache *cache; | ||
39 | |||
40 | /* | ||
41 | * Memory to hold the kmem_cache unique name. Only necessary on our | ||
42 | * k3.10 kernel when not using the SLUB allocator but it's easier to | ||
43 | * just carry this on to newer kernels. | ||
44 | */ | ||
45 | char name[128]; | ||
46 | }; | ||
47 | |||
48 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
49 | |||
50 | struct nvgpu_mem_alloc { | ||
51 | struct nvgpu_mem_alloc_tracker *owner; | ||
52 | |||
53 | void *ip; | ||
54 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
55 | unsigned long stack[MAX_STACK_TRACE]; | ||
56 | int stack_length; | ||
57 | #endif | ||
58 | |||
59 | u64 addr; | ||
60 | |||
61 | unsigned long size; | ||
62 | unsigned long real_size; | ||
63 | |||
64 | /* Ugh - linux specific. Will need to be abstracted. */ | ||
65 | struct rb_node allocs_entry; | ||
66 | }; | ||
67 | |||
68 | /* | ||
69 | * Linux specific tracking of vmalloc, kmalloc, etc. | ||
70 | */ | ||
71 | struct nvgpu_mem_alloc_tracker { | ||
72 | const char *name; | ||
73 | struct nvgpu_kmem_cache *allocs_cache; | ||
74 | struct rb_root allocs; | ||
75 | struct mutex lock; | ||
76 | |||
77 | u64 bytes_alloced; | ||
78 | u64 bytes_freed; | ||
79 | u64 bytes_alloced_real; | ||
80 | u64 bytes_freed_real; | ||
81 | u64 nr_allocs; | ||
82 | u64 nr_frees; | ||
83 | |||
84 | unsigned long min_alloc; | ||
85 | unsigned long max_alloc; | ||
86 | }; | ||
87 | |||
88 | #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
89 | |||
90 | #endif /* __KMEM_PRIV_H__ */ | ||