summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-01-11 18:00:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-03 13:34:48 -0500
commit707ea45e0f1d7a07885597777496b186dd5fb6f0 (patch)
tree9b48640703ccdf0108d731e66574370179a44b23 /drivers
parent3966efc2e58f1802411f44fd00967dde448f278d (diff)
gpu: nvgpu: kmem abstraction and tracking
Implement kmem abstraction and tracking in nvgpu. The abstraction helps move nvgpu's core code away from being Linux dependent and allows kmem allocation tracking to be done for Linux and any other OS supported by nvgpu. Bug 1799159 Bug 1823380 Change-Id: Ieaae4ca1bbd1d4db4a1546616ab8b9fc53a4079d Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1283828 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/Kconfig.nvgpu11
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem.c806
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem_priv.h90
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/kmem.h223
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h123
-rw-r--r--drivers/gpu/nvgpu/pci.c3
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c4
14 files changed, 1253 insertions, 49 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig.nvgpu b/drivers/gpu/nvgpu/Kconfig.nvgpu
index 3e3607e0..8baf6897 100644
--- a/drivers/gpu/nvgpu/Kconfig.nvgpu
+++ b/drivers/gpu/nvgpu/Kconfig.nvgpu
@@ -47,6 +47,17 @@ config GK20A_DEVFREQ
47 47
48endchoice 48endchoice
49 49
50config NVGPU_TRACK_MEM_USAGE
51 bool "Track the usage of system memory in nvgpu"
52 depends on GK20A
53 default n
54 help
55 Say Y here to allow nvgpu to track and keep statistics on
56 the system memory used by the driver. This does recreate
57 some of the kmem_leak tracking but this is also applicable
58 to other OSes which do not have Linux' kmem_leak.
59
60
50config GK20A_CYCLE_STATS 61config GK20A_CYCLE_STATS
51 bool "Support GK20A GPU CYCLE STATS" 62 bool "Support GK20A GPU CYCLE STATS"
52 depends on GK20A 63 depends on GK20A
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index 24e0ca5d..60e79348 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -15,11 +15,22 @@
15 */ 15 */
16 16
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/mutex.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <linux/rbtree.h>
22#include <linux/debugfs.h>
23#include <linux/spinlock.h>
24#include <linux/seq_file.h>
25#include <linux/vmalloc.h>
26#include <linux/stacktrace.h>
20 27
21#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
22 29
30#include "gk20a/gk20a.h"
31
32#include "kmem_priv.h"
33
23/* 34/*
24 * Statically declared because this needs to be shared across all nvgpu driver 35 * Statically declared because this needs to be shared across all nvgpu driver
25 * instances. This makes sure that all kmem caches are _definitely_ uniquely 36 * instances. This makes sure that all kmem caches are _definitely_ uniquely
@@ -27,26 +38,793 @@
27 */ 38 */
28static atomic_t kmem_cache_id; 39static atomic_t kmem_cache_id;
29 40
30/* 41#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
31 * Linux specific version of the nvgpu_kmem_cache struct. This type is 42
32 * completely opaque to the rest of the driver. 43static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
44{
45 mutex_lock(&tracker->lock);
46}
47
48static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
49{
50 mutex_unlock(&tracker->lock);
51}
52
53static void kmem_print_mem_alloc(struct gk20a *g,
54 struct nvgpu_mem_alloc *alloc,
55 struct seq_file *s)
56{
57#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
58 int i;
59
60 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
61 alloc->addr, alloc->size);
62 for (i = 0; i < alloc->stack_length; i++)
63 __pstat(s, " %3d [<%p>] %pS\n", i,
64 (void *)alloc->stack[i],
65 (void *)alloc->stack[i]);
66 __pstat(s, "\n");
67#else
68 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
69 alloc->addr, alloc->size, alloc->ip);
70#endif
71}
72
73static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
74 struct nvgpu_mem_alloc *alloc)
75{
76 struct rb_node **new = &tracker->allocs.rb_node;
77 struct rb_node *parent = NULL;
78
79 while (*new) {
80 struct nvgpu_mem_alloc *tmp = rb_entry(*new,
81 struct nvgpu_mem_alloc,
82 allocs_entry);
83
84 parent = *new;
85
86 if (alloc->addr < tmp->addr)
87 new = &(*new)->rb_left;
88 else if (alloc->addr > tmp->addr)
89 new = &(*new)->rb_right;
90 else
91 return -EINVAL;
92 }
93
94 /* Put the new node there */
95 rb_link_node(&alloc->allocs_entry, parent, new);
96 rb_insert_color(&alloc->allocs_entry, &tracker->allocs);
97
98 return 0;
99}
100
101static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
102 struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
103{
104 struct rb_node *node = tracker->allocs.rb_node;
105 struct nvgpu_mem_alloc *alloc;
106
107 while (node) {
108 alloc = container_of(node,
109 struct nvgpu_mem_alloc, allocs_entry);
110
111 if (alloc_addr < alloc->addr)
112 node = node->rb_left;
113 else if (alloc_addr > alloc->addr)
114 node = node->rb_right;
115 else
116 break;
117 }
118
119 if (!node)
120 return NULL;
121
122 rb_erase(node, &tracker->allocs);
123
124 return alloc;
125}
126
127static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
128 unsigned long size, unsigned long real_size,
129 u64 addr, unsigned long ip)
130{
131 int ret;
132 struct nvgpu_mem_alloc *alloc;
133#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
134 struct stack_trace stack_trace;
135#endif
136
137 alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
138 if (!alloc)
139 return -ENOMEM;
140
141 alloc->owner = tracker;
142 alloc->size = size;
143 alloc->real_size = real_size;
144 alloc->addr = addr;
145 alloc->ip = (void *)(uintptr_t)ip;
146
147#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
148 stack_trace.max_entries = MAX_STACK_TRACE;
149 stack_trace.nr_entries = 0;
150 stack_trace.entries = alloc->stack;
151 /*
152 * This 4 here skips the 2 function calls that happen for all traced
153 * allocs due to nvgpu:
154 *
155 * __nvgpu_save_kmem_alloc+0x7c/0x128
156 * __nvgpu_track_kzalloc+0xcc/0xf8
157 *
158 * And the function calls that get made by the stack trace code itself.
159 * If the trace savings code changes this will likely have to change
160 * as well.
161 */
162 stack_trace.skip = 4;
163 save_stack_trace(&stack_trace);
164 alloc->stack_length = stack_trace.nr_entries;
165#endif
166
167 lock_tracker(tracker);
168 tracker->bytes_alloced += size;
169 tracker->bytes_alloced_real += real_size;
170 tracker->nr_allocs++;
171
172 /* Keep track of this for building a histogram later on. */
173 if (tracker->max_alloc < size)
174 tracker->max_alloc = size;
175 if (tracker->min_alloc > size)
176 tracker->min_alloc = size;
177
178 ret = nvgpu_add_alloc(tracker, alloc);
179 if (ret) {
180 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
181 kfree(alloc);
182 unlock_tracker(tracker);
183 return ret;
184 }
185 unlock_tracker(tracker);
186
187 return 0;
188}
189
190static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
191 u64 addr)
192{
193 struct nvgpu_mem_alloc *alloc;
194
195 lock_tracker(tracker);
196 alloc = nvgpu_rem_alloc(tracker, addr);
197 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
198 unlock_tracker(tracker);
199 return -EINVAL;
200 }
201
202 tracker->nr_frees++;
203 tracker->bytes_freed += alloc->size;
204 tracker->bytes_freed_real += alloc->real_size;
205 unlock_tracker(tracker);
206
207 return 0;
208}
209
210static void __nvgpu_check_valloc_size(unsigned long size)
211{
212 WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
213}
214
215static void __nvgpu_check_kalloc_size(size_t size)
216{
217 WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
218}
219
220void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
221 unsigned long ip)
222{
223 void *alloc = vmalloc(size);
224
225 if (!alloc)
226 return NULL;
227
228 kmem_dbg("vmalloc: size=%-6ld addr=0x%p", size, alloc);
229 __nvgpu_check_valloc_size(size);
230
231 /*
232 * Ignore the return message. If this fails let's not cause any issues
233 * for the rest of the driver.
234 */
235 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
236 (u64)(uintptr_t)alloc, ip);
237
238 return alloc;
239}
240
241void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
242 unsigned long ip)
243{
244 void *alloc = vzalloc(size);
245
246 if (!alloc)
247 return NULL;
248
249 kmem_dbg("vzalloc: size=%-6ld addr=0x%p", size, alloc);
250 __nvgpu_check_valloc_size(size);
251
252 /*
253 * Ignore the return message. If this fails let's not cause any issues
254 * for the rest of the driver.
255 */
256 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
257 (u64)(uintptr_t)alloc, ip);
258
259 return alloc;
260}
261
262void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
263{
264 void *alloc = kmalloc(size, GFP_KERNEL);
265
266 if (!alloc)
267 return NULL;
268
269 kmem_dbg("kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
270 size, alloc, GFP_KERNEL);
271 __nvgpu_check_kalloc_size(size);
272
273 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
274 (u64)(uintptr_t)alloc, ip);
275
276 return alloc;
277}
278
279void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
280{
281 void *alloc = kzalloc(size, GFP_KERNEL);
282
283 if (!alloc)
284 return NULL;
285
286 kmem_dbg("kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
287 size, alloc, GFP_KERNEL);
288 __nvgpu_check_kalloc_size(size);
289
290 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
291 (u64)(uintptr_t)alloc, ip);
292
293 return alloc;
294}
295
296void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
297 unsigned long ip)
298{
299 void *alloc = kcalloc(n, size, GFP_KERNEL);
300
301 if (!alloc)
302 return NULL;
303
304 kmem_dbg("kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
305 n * size, alloc, GFP_KERNEL);
306 __nvgpu_check_kalloc_size(n * size);
307
308 __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
309 roundup_pow_of_two(n * size),
310 (u64)(uintptr_t)alloc, ip);
311
312 return alloc;
313}
314
315void __nvgpu_track_vfree(struct gk20a *g, void *addr)
316{
317 /*
318 * Often it is accepted practice to pass NULL pointers into free
319 * functions to save code.
320 */
321 if (!addr)
322 return;
323
324 vfree(addr);
325
326 kmem_dbg("vfree: addr=0x%p", addr);
327
328 __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
329}
330
331void __nvgpu_track_kfree(struct gk20a *g, void *addr)
332{
333 if (!addr)
334 return;
335
336 kfree(addr);
337
338 kmem_dbg("kfree: addr=0x%p", addr);
339
340 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
341}
342
343/**
344 * to_human_readable_bytes - Determine suffix for passed size.
345 *
346 * @bytes - Number of bytes to generate a suffix for.
347 * @hr_bytes [out] - The human readable number of bytes.
348 * @hr_suffix [out] - The suffix for the HR number of bytes.
349 *
350 * Computes a human readable decomposition of the passed number of bytes. The
351 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
352 * number of bytes is then passed back in @hr_bytes. This returns the following
353 * ranges:
354 *
355 * 0 - 1023 B
356 * 1 - 1023 KB
357 * 1 - 1023 MB
358 * 1 - 1023 GB
359 * 1 - 1023 TB
360 * 1 - ... PB
361 */
362static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
363 const char **hr_suffix)
364{
365 static const char *suffixes[] =
366 { "B", "KB", "MB", "GB", "TB", "PB" };
367
368 u64 suffix_ind = 0;
369
370 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
371 bytes >>= 10;
372 suffix_ind++;
373 }
374
375 /*
376 * Handle case where bytes > 1023PB.
377 */
378 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
379 suffix_ind : ARRAY_SIZE(suffixes) - 1;
380
381 *hr_bytes = bytes;
382 *hr_suffix = suffixes[suffix_ind];
383}
384
385/**
386 * print_hr_bytes - Print human readable bytes
387 *
388 * @s - A seq_file to print to. May be NULL.
389 * @msg - A message to print before the bytes.
390 * @bytes - Number of bytes.
391 *
392 * Print @msg followed by the human readable decomposition of the passed number
393 * of bytes.
394 *
395 * If @s is NULL then this prints will be made to the kernel log.
396 */
397static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
398{
399 u64 hr_bytes;
400 const char *hr_suffix;
401
402 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
403 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
404}
405
406/**
407 * print_histogram - Build a histogram of the memory usage.
408 *
409 * @tracker The tracking to pull data from.
410 * @s A seq_file to dump info into.
33 */ 411 */
34struct nvgpu_kmem_cache { 412static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
35 struct gk20a *g; 413 struct seq_file *s)
36 struct kmem_cache *cache; 414{
415 int i;
416 u64 pot_min, pot_max;
417 u64 nr_buckets;
418 unsigned int *buckets;
419 unsigned int total_allocs;
420 struct rb_node *node;
421 static const char histogram_line[] =
422 "++++++++++++++++++++++++++++++++++++++++";
423
424 /*
425 * pot_min is essentially a round down to the nearest power of 2. This
426 * is the start of the histogram. pot_max is just a round up to the
427 * nearest power of two. Each histogram bucket is one power of two so
428 * the histogram buckets are exponential.
429 */
430 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
431 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
432
433 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
434
435 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
436 if (!buckets) {
437 __pstat(s, "OOM: could not allocate bucket storage!?\n");
438 return;
439 }
37 440
38 /* 441 /*
39 * Memory to hold the kmem_cache unique name. Only necessary on our 442 * Iterate across all of the allocs and determine what bucket they
40 * k3.10 kernel when not using the SLUB allocator but it's easier to 443 * should go in. Round the size down to the nearest power of two to
41 * just carry this on to newer kernels. 444 * find the right bucket.
42 */ 445 */
43 char name[128]; 446 for (node = rb_first(&tracker->allocs);
447 node != NULL;
448 node = rb_next(node)) {
449 int b;
450 u64 bucket_min;
451 struct nvgpu_mem_alloc *alloc;
452
453 alloc = container_of(node, struct nvgpu_mem_alloc,
454 allocs_entry);
455 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
456 if (bucket_min < tracker->min_alloc)
457 bucket_min = tracker->min_alloc;
458
459 b = __ffs(bucket_min) - __ffs(pot_min);
460
461 /*
462 * Handle the one case were there's an alloc exactly as big as
463 * the maximum bucket size of the largest bucket. Most of the
464 * buckets have an inclusive minimum and exclusive maximum. But
465 * the largest bucket needs to have an _inclusive_ maximum as
466 * well.
467 */
468 if (b == (int)nr_buckets)
469 b--;
470
471 buckets[b]++;
472 }
473
474 total_allocs = 0;
475 for (i = 0; i < (int)nr_buckets; i++)
476 total_allocs += buckets[i];
477
478 __pstat(s, "Alloc histogram:\n");
479
480 /*
481 * Actually compute the histogram lines.
482 */
483 for (i = 0; i < (int)nr_buckets; i++) {
484 char this_line[sizeof(histogram_line) + 1];
485 u64 line_length;
486 u64 hr_bytes;
487 const char *hr_suffix;
488
489 memset(this_line, 0, sizeof(this_line));
490
491 /*
492 * Compute the normalized line length. Cant use floating point
493 * so we will just multiply everything by 1000 and use fixed
494 * point.
495 */
496 line_length = (1000 * buckets[i]) / total_allocs;
497 line_length *= sizeof(histogram_line);
498 line_length /= 1000;
499
500 memset(this_line, '+', line_length);
501
502 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
503 &hr_bytes, &hr_suffix);
504 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
505 hr_bytes, hr_bytes << 1,
506 hr_suffix, buckets[i], this_line);
507 }
508}
509
510/**
511 * nvgpu_kmem_print_stats - Print kmem tracking stats.
512 *
513 * @tracker The tracking to pull data from.
514 * @s A seq_file to dump info into.
515 *
516 * Print stats from a tracker. If @s is non-null then seq_printf() will be
517 * used with @s. Otherwise the stats are pr_info()ed.
518 */
519void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
520 struct seq_file *s)
521{
522 lock_tracker(tracker);
523
524 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
525
526 __pstat(s, "Basic Stats:\n");
527 __pstat(s, " Number of allocs %lld\n",
528 tracker->nr_allocs);
529 __pstat(s, " Number of frees %lld\n",
530 tracker->nr_frees);
531 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
532 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
533 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
534 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
535 print_hr_bytes(s, " Bytes allocated (real) ",
536 tracker->bytes_alloced_real);
537 print_hr_bytes(s, " Bytes freed (real) ",
538 tracker->bytes_freed_real);
539 __pstat(s, "\n");
540
541 print_histogram(tracker, s);
542
543 unlock_tracker(tracker);
544}
545
546#if defined(CONFIG_DEBUG_FS)
547static int __kmem_tracking_show(struct seq_file *s, void *unused)
548{
549 struct nvgpu_mem_alloc_tracker *tracker = s->private;
550
551 nvgpu_kmem_print_stats(tracker, s);
552
553 return 0;
554}
555
556static int __kmem_tracking_open(struct inode *inode, struct file *file)
557{
558 return single_open(file, __kmem_tracking_show, inode->i_private);
559}
560
561static const struct file_operations __kmem_tracking_fops = {
562 .open = __kmem_tracking_open,
563 .read = seq_read,
564 .llseek = seq_lseek,
565 .release = single_release,
566};
567
568static int __kmem_traces_dump_tracker(struct gk20a *g,
569 struct nvgpu_mem_alloc_tracker *tracker,
570 struct seq_file *s)
571{
572 struct rb_node *node;
573
574 for (node = rb_first(&tracker->allocs);
575 node != NULL;
576 node = rb_next(node)) {
577 struct nvgpu_mem_alloc *alloc;
578
579 alloc = container_of(node, struct nvgpu_mem_alloc,
580 allocs_entry);
581
582 kmem_print_mem_alloc(g, alloc, s);
583 }
584
585 return 0;
586}
587
588static int __kmem_traces_show(struct seq_file *s, void *unused)
589{
590 struct gk20a *g = s->private;
591
592 lock_tracker(g->vmallocs);
593 seq_puts(s, "Oustanding vmallocs:\n");
594 __kmem_traces_dump_tracker(g, g->vmallocs, s);
595 seq_puts(s, "\n");
596 unlock_tracker(g->vmallocs);
597
598 lock_tracker(g->kmallocs);
599 seq_puts(s, "Oustanding kmallocs:\n");
600 __kmem_traces_dump_tracker(g, g->kmallocs, s);
601 unlock_tracker(g->kmallocs);
602
603 return 0;
604}
605
606static int __kmem_traces_open(struct inode *inode, struct file *file)
607{
608 return single_open(file, __kmem_traces_show, inode->i_private);
609}
610
611static const struct file_operations __kmem_traces_fops = {
612 .open = __kmem_traces_open,
613 .read = seq_read,
614 .llseek = seq_lseek,
615 .release = single_release,
44}; 616};
45 617
618void nvgpu_kmem_debugfs_init(struct device *dev)
619{
620 struct gk20a_platform *plat = dev_get_drvdata(dev);
621 struct gk20a *g = get_gk20a(dev);
622 struct dentry *gpu_root = plat->debugfs;
623 struct dentry *node;
624
625 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
626 if (IS_ERR_OR_NULL(g->debugfs_kmem))
627 return;
628
629 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
630 g->debugfs_kmem,
631 g->vmallocs, &__kmem_tracking_fops);
632 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
633 g->debugfs_kmem,
634 g->kmallocs, &__kmem_tracking_fops);
635 node = debugfs_create_file("traces", S_IRUGO,
636 g->debugfs_kmem,
637 g, &__kmem_traces_fops);
638}
639#else
640void nvgpu_kmem_debugfs_init(struct device *dev)
641{
642}
643#endif
644
645static int __do_check_for_outstanding_allocs(
646 struct gk20a *g,
647 struct nvgpu_mem_alloc_tracker *tracker,
648 const char *type, bool silent)
649{
650 struct rb_node *node;
651 int count = 0;
652
653 for (node = rb_first(&tracker->allocs);
654 node != NULL;
655 node = rb_next(node)) {
656 struct nvgpu_mem_alloc *alloc;
657
658 alloc = container_of(node, struct nvgpu_mem_alloc,
659 allocs_entry);
660
661 if (!silent)
662 kmem_print_mem_alloc(g, alloc, NULL);
663
664 count++;
665 }
666
667 return count;
668}
669
670/**
671 * check_for_outstanding_allocs - Count and display outstanding allocs
672 *
673 * @g - The GPU.
674 * @silent - If set don't print anything about the allocs.
675 *
676 * Dump (or just count) the number of allocations left outstanding.
677 */
678static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
679{
680 int count = 0;
681
682 count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
683 silent);
684 count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
685 silent);
686
687 return count;
688}
689
690static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
691 void (*force_free_func)(const void *))
692{
693 struct rb_node *node;
694
695 while ((node = rb_first(&tracker->allocs)) != NULL) {
696 struct nvgpu_mem_alloc *alloc;
697
698 alloc = container_of(node, struct nvgpu_mem_alloc,
699 allocs_entry);
700 if (force_free_func)
701 force_free_func((void *)alloc->addr);
702
703 kfree(alloc);
704 }
705}
706
707/**
708 * nvgpu_kmem_cleanup - Cleanup the kmem tracking
709 *
710 * @g - The GPU.
711 * @force_free - If set will also free leaked objects if possible.
712 *
713 * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
714 * is non-zero then the allocation made by nvgpu is also freed. This is risky,
715 * though, as it is possible that the memory is still in use by other parts of
716 * the GPU driver not aware that this has happened.
717 *
718 * In theory it should be fine if the GPU driver has been deinitialized and
719 * there are no bugs in that code. However, if there are any bugs in that code
720 * then they could likely manifest as odd crashes indeterminate amounts of time
721 * in the future. So use @force_free at your own risk.
722 */
723static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
724{
725 do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
726 do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
727}
728
729void nvgpu_kmem_fini(struct gk20a *g, int flags)
730{
731 int count;
732 bool silent, force_free;
733
734 if (!flags)
735 return;
736
737 silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
738 force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
739
740 count = check_for_outstanding_allocs(g, silent);
741 nvgpu_kmem_cleanup(g, force_free);
742
743 /*
744 * If we leak objects we can either BUG() out or just WARN(). In general
745 * it doesn't make sense to BUG() on here since leaking a few objects
746 * won't crash the kernel but it can be helpful for development.
747 *
748 * If neither flag is set then we just silently do nothing.
749 */
750 if (count > 0) {
751 if (flags & NVGPU_KMEM_FINI_WARN) {
752 WARN(1, "Letting %d allocs leak!!\n", count);
753 } else if (flags & NVGPU_KMEM_FINI_BUG) {
754 gk20a_err(g->dev, "Letting %d allocs leak!!\n", count);
755 BUG();
756 }
757 }
758}
759
760int nvgpu_kmem_init(struct gk20a *g)
761{
762 int err;
763
764 g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
765 g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
766
767 if (!g->vmallocs || !g->kmallocs) {
768 err = -ENOMEM;
769 goto fail;
770 }
771
772 g->vmallocs->name = "vmalloc";
773 g->kmallocs->name = "kmalloc";
774
775 g->vmallocs->allocs = RB_ROOT;
776 g->kmallocs->allocs = RB_ROOT;
777
778 mutex_init(&g->vmallocs->lock);
779 mutex_init(&g->kmallocs->lock);
780
781 g->vmallocs->min_alloc = PAGE_SIZE;
782 g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
783
784 /*
785 * This needs to go after all the other initialization since they use
786 * the nvgpu_kzalloc() API.
787 */
788 g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
789 sizeof(struct nvgpu_mem_alloc));
790 g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
791 sizeof(struct nvgpu_mem_alloc));
792
793 if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
794 err = -ENOMEM;
795 if (g->vmallocs->allocs_cache)
796 nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
797 if (g->kmallocs->allocs_cache)
798 nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
799 goto fail;
800 }
801
802 return 0;
803
804fail:
805 if (g->vmallocs)
806 kfree(g->vmallocs);
807 if (g->kmallocs)
808 kfree(g->kmallocs);
809 return err;
810}
811
812#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
813
814int nvgpu_kmem_init(struct gk20a *g)
815{
816 return 0;
817}
818
819void nvgpu_kmem_fini(struct gk20a *g, int flags)
820{
821}
822#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
823
46struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) 824struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
47{ 825{
48 struct nvgpu_kmem_cache *cache = 826 struct nvgpu_kmem_cache *cache =
49 kzalloc(sizeof(struct nvgpu_kmem_cache), GFP_KERNEL); 827 nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
50 828
51 if (!cache) 829 if (!cache)
52 return NULL; 830 return NULL;
@@ -59,7 +837,7 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
59 cache->cache = kmem_cache_create(cache->name, 837 cache->cache = kmem_cache_create(cache->name,
60 size, size, 0, NULL); 838 size, size, 0, NULL);
61 if (!cache->cache) { 839 if (!cache->cache) {
62 kfree(cache); 840 nvgpu_kfree(g, cache);
63 return NULL; 841 return NULL;
64 } 842 }
65 843
@@ -68,8 +846,10 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
68 846
69void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) 847void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
70{ 848{
849 struct gk20a *g = cache->g;
850
71 kmem_cache_destroy(cache->cache); 851 kmem_cache_destroy(cache->cache);
72 kfree(cache); 852 nvgpu_kfree(g, cache);
73} 853}
74 854
75void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) 855void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
new file mode 100644
index 00000000..5e38ad5d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
@@ -0,0 +1,90 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __KMEM_PRIV_H__
18#define __KMEM_PRIV_H__
19
20#include <linux/rbtree.h>
21
22#define __pstat(s, fmt, msg...) \
23 do { \
24 if (s) \
25 seq_printf(s, fmt, ##msg); \
26 else \
27 pr_info(fmt, ##msg); \
28 } while (0)
29
30#define MAX_STACK_TRACE 20
31
32/*
33 * Linux specific version of the nvgpu_kmem_cache struct. This type is
34 * completely opaque to the rest of the driver.
35 */
36struct nvgpu_kmem_cache {
37 struct gk20a *g;
38 struct kmem_cache *cache;
39
40 /*
41 * Memory to hold the kmem_cache unique name. Only necessary on our
42 * k3.10 kernel when not using the SLUB allocator but it's easier to
43 * just carry this on to newer kernels.
44 */
45 char name[128];
46};
47
48#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
49
50struct nvgpu_mem_alloc {
51 struct nvgpu_mem_alloc_tracker *owner;
52
53 void *ip;
54#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
55 unsigned long stack[MAX_STACK_TRACE];
56 int stack_length;
57#endif
58
59 u64 addr;
60
61 unsigned long size;
62 unsigned long real_size;
63
64 /* Ugh - linux specific. Will need to be abstracted. */
65 struct rb_node allocs_entry;
66};
67
68/*
69 * Linux specific tracking of vmalloc, kmalloc, etc.
70 */
71struct nvgpu_mem_alloc_tracker {
72 const char *name;
73 struct nvgpu_kmem_cache *allocs_cache;
74 struct rb_root allocs;
75 struct mutex lock;
76
77 u64 bytes_alloced;
78 u64 bytes_freed;
79 u64 bytes_alloced_real;
80 u64 bytes_freed_real;
81 u64 nr_allocs;
82 u64 nr_frees;
83
84 unsigned long min_alloc;
85 unsigned long max_alloc;
86};
87
88#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
89
90#endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index f228110e..68e43259 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -986,7 +986,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
986 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); 986 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
987 987
988 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); 988 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
989 nvgpu_big_free(ch->gpfifo.pipe); 989 nvgpu_big_free(g, ch->gpfifo.pipe);
990 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); 990 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
991 991
992#if defined(CONFIG_GK20A_CYCLE_STATS) 992#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1856,7 +1856,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1856 } 1856 }
1857 1857
1858 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 1858 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
1859 c->gpfifo.pipe = nvgpu_big_malloc( 1859 c->gpfifo.pipe = nvgpu_big_malloc(g,
1860 gpfifo_size * sizeof(struct nvgpu_gpfifo)); 1860 gpfifo_size * sizeof(struct nvgpu_gpfifo));
1861 if (!c->gpfifo.pipe) { 1861 if (!c->gpfifo.pipe) {
1862 err = -ENOMEM; 1862 err = -ENOMEM;
@@ -1927,7 +1927,7 @@ clean_up_sync:
1927 c->sync = NULL; 1927 c->sync = NULL;
1928 } 1928 }
1929clean_up_unmap: 1929clean_up_unmap:
1930 nvgpu_big_free(c->gpfifo.pipe); 1930 nvgpu_big_free(g, c->gpfifo.pipe);
1931 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1931 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1932clean_up: 1932clean_up:
1933 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); 1933 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -2057,12 +2057,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2057 if (!g) { 2057 if (!g) {
2058 size = count * sizeof(struct nvgpu_gpfifo); 2058 size = count * sizeof(struct nvgpu_gpfifo);
2059 if (size) { 2059 if (size) {
2060 g = nvgpu_big_malloc(size); 2060 g = nvgpu_big_malloc(c->g, size);
2061 if (!g) 2061 if (!g)
2062 return; 2062 return;
2063 2063
2064 if (copy_from_user(g, user_gpfifo, size)) { 2064 if (copy_from_user(g, user_gpfifo, size)) {
2065 nvgpu_big_free(g); 2065 nvgpu_big_free(c->g, g);
2066 return; 2066 return;
2067 } 2067 }
2068 } 2068 }
@@ -2074,7 +2074,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2074 trace_write_pushbuffer(c, gp); 2074 trace_write_pushbuffer(c, gp);
2075 2075
2076 if (gpfifo_allocated) 2076 if (gpfifo_allocated)
2077 nvgpu_big_free(g); 2077 nvgpu_big_free(c->g, g);
2078} 2078}
2079 2079
2080static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) 2080static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 4a42e03f..0a0aada7 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -819,7 +819,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
819 goto fail_dmabuf_put; 819 goto fail_dmabuf_put;
820 } 820 }
821 821
822 buffer = nvgpu_big_zalloc(access_limit_size); 822 buffer = nvgpu_big_zalloc(g, access_limit_size);
823 if (!buffer) { 823 if (!buffer) {
824 err = -ENOMEM; 824 err = -ENOMEM;
825 goto fail_dmabuf_put; 825 goto fail_dmabuf_put;
@@ -865,7 +865,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
865fail_idle: 865fail_idle:
866 gk20a_idle(g->dev); 866 gk20a_idle(g->dev);
867fail_free_buffer: 867fail_free_buffer:
868 nvgpu_big_free(buffer); 868 nvgpu_big_free(g, buffer);
869fail_dmabuf_put: 869fail_dmabuf_put:
870 dma_buf_put(dmabuf); 870 dma_buf_put(dmabuf);
871 871
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 67f9b532..6341a962 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -21,6 +21,7 @@
21#include <linux/io.h> 21#include <linux/io.h>
22 22
23#include <nvgpu/semaphore.h> 23#include <nvgpu/semaphore.h>
24#include <nvgpu/kmem.h>
24 25
25#include "gk20a.h" 26#include "gk20a.h"
26#include "debug_gk20a.h" 27#include "debug_gk20a.h"
@@ -485,6 +486,9 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
485 gk20a_mm_debugfs_init(g->dev); 486 gk20a_mm_debugfs_init(g->dev);
486 gk20a_fifo_debugfs_init(g->dev); 487 gk20a_fifo_debugfs_init(g->dev);
487 gk20a_sched_debugfs_init(g->dev); 488 gk20a_sched_debugfs_init(g->dev);
489#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
490 nvgpu_kmem_debugfs_init(g->dev);
491#endif
488#endif 492#endif
489 493
490} 494}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 3504a32f..6b026ee2 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -43,6 +43,7 @@
43#include <linux/version.h> 43#include <linux/version.h>
44 44
45#include <nvgpu/nvgpu_common.h> 45#include <nvgpu/nvgpu_common.h>
46#include <nvgpu/kmem.h>
46#include <nvgpu/allocator.h> 47#include <nvgpu/allocator.h>
47#include <nvgpu/timers.h> 48#include <nvgpu/timers.h>
48 49
@@ -1598,6 +1599,8 @@ static int gk20a_probe(struct platform_device *dev)
1598 set_gk20a(dev, gk20a); 1599 set_gk20a(dev, gk20a);
1599 gk20a->dev = &dev->dev; 1600 gk20a->dev = &dev->dev;
1600 1601
1602 nvgpu_kmem_init(gk20a);
1603
1601 gk20a->irq_stall = platform_get_irq(dev, 0); 1604 gk20a->irq_stall = platform_get_irq(dev, 0);
1602 gk20a->irq_nonstall = platform_get_irq(dev, 1); 1605 gk20a->irq_nonstall = platform_get_irq(dev, 1);
1603 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) 1606 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8006a4fe..69528c1f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -27,6 +27,7 @@ struct gk20a_ctxsw_ucode_segments;
27struct gk20a_fecs_trace; 27struct gk20a_fecs_trace;
28struct gk20a_ctxsw_trace; 28struct gk20a_ctxsw_trace;
29struct acr_desc; 29struct acr_desc;
30struct nvgpu_mem_alloc_tracker;
30 31
31#include <linux/sched.h> 32#include <linux/sched.h>
32#include <nvgpu/lock.h> 33#include <nvgpu/lock.h>
@@ -915,6 +916,7 @@ struct gk20a {
915 struct dentry *debugfs_runlist_interleave; 916 struct dentry *debugfs_runlist_interleave;
916 struct dentry *debugfs_allocators; 917 struct dentry *debugfs_allocators;
917 struct dentry *debugfs_xve; 918 struct dentry *debugfs_xve;
919 struct dentry *debugfs_kmem;
918#endif 920#endif
919 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; 921 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
920 922
@@ -1055,6 +1057,10 @@ struct gk20a {
1055 /* Check if msi is enabled */ 1057 /* Check if msi is enabled */
1056 bool msi_enabled; 1058 bool msi_enabled;
1057#endif 1059#endif
1060#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
1061 struct nvgpu_mem_alloc_tracker *vmallocs;
1062 struct nvgpu_mem_alloc_tracker *kmallocs;
1063#endif
1058}; 1064};
1059 1065
1060static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) 1066static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
@@ -1131,6 +1137,7 @@ enum gk20a_dbg_categories {
1131 gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */ 1137 gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */
1132 gpu_dbg_xv = BIT(18), /* XVE debugging */ 1138 gpu_dbg_xv = BIT(18), /* XVE debugging */
1133 gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */ 1139 gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */
1140 gpu_dbg_kmem = BIT(20), /* Kmem tracking debugging */
1134 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ 1141 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
1135}; 1142};
1136 1143
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 36b85f3b..e695f02e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3424,7 +3424,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3424 gr->ctx_vars.local_golden_image = NULL; 3424 gr->ctx_vars.local_golden_image = NULL;
3425 3425
3426 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) 3426 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
3427 nvgpu_big_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); 3427 nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3428 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; 3428 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3429 3429
3430 gk20a_comptag_allocator_destroy(&gr->comp_tags); 3430 gk20a_comptag_allocator_destroy(&gr->comp_tags);
@@ -8055,7 +8055,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8055 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; 8055 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
8056 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); 8056 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
8057 8057
8058 map = nvgpu_big_zalloc(map_size); 8058 map = nvgpu_big_zalloc(g, map_size);
8059 if (!map) 8059 if (!map)
8060 return -ENOMEM; 8060 return -ENOMEM;
8061 8061
@@ -8145,7 +8145,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8145 return 0; 8145 return 0;
8146cleanup: 8146cleanup:
8147 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); 8147 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
8148 nvgpu_big_free(map); 8148 nvgpu_big_free(g, map);
8149 return -EINVAL; 8149 return -EINVAL;
8150} 8150}
8151 8151
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 7a64f79b..2ff54653 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1487,8 +1487,8 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
1487 1487
1488 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1488 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1489 1489
1490 buffer_list = nvgpu_big_zalloc(sizeof(*buffer_list) * 1490 buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) *
1491 vm->num_user_mapped_buffers); 1491 vm->num_user_mapped_buffers);
1492 if (!buffer_list) { 1492 if (!buffer_list) {
1493 nvgpu_mutex_release(&vm->update_gmmu_lock); 1493 nvgpu_mutex_release(&vm->update_gmmu_lock);
1494 return -ENOMEM; 1494 return -ENOMEM;
@@ -1572,7 +1572,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
1572 gk20a_vm_mapping_batch_finish_locked(vm, &batch); 1572 gk20a_vm_mapping_batch_finish_locked(vm, &batch);
1573 nvgpu_mutex_release(&vm->update_gmmu_lock); 1573 nvgpu_mutex_release(&vm->update_gmmu_lock);
1574 1574
1575 nvgpu_big_free(mapped_buffers); 1575 nvgpu_big_free(vm->mm->g, mapped_buffers);
1576} 1576}
1577 1577
1578static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, 1578static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/kmem.h b/drivers/gpu/nvgpu/include/nvgpu/kmem.h
index c08e40a6..59192525 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/kmem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/kmem.h
@@ -14,18 +14,21 @@
14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */ 15 */
16 16
17#ifndef NVGPU_KMEM_H 17#ifndef __NVGPU_KMEM_H__
18#define NVGPU_KMEM_H 18#define __NVGPU_KMEM_H__
19 19
20#include <linux/mm.h> 20/*
21#include <linux/slab.h> 21 * Incase this isn't defined already.
22#include <linux/vmalloc.h> 22 */
23 23#ifndef _THIS_IP_
24#include <asm/page.h> 24#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
25#endif
25 26
26struct gk20a; 27struct gk20a;
27 28
28/* 29/**
30 * DOC: Kmem cache support
31 *
29 * In Linux there is support for the notion of a kmem_cache. It gives better 32 * In Linux there is support for the notion of a kmem_cache. It gives better
30 * memory usage characteristics for lots of allocations of the same size. Think 33 * memory usage characteristics for lots of allocations of the same size. Think
31 * structs that get allocated over and over. Normal kmalloc() type routines 34 * structs that get allocated over and over. Normal kmalloc() type routines
@@ -37,26 +40,200 @@ struct gk20a;
37 */ 40 */
38struct nvgpu_kmem_cache; 41struct nvgpu_kmem_cache;
39 42
43#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
44/*
45 * Uncomment this if you want to enable stack traces in the memory profiling.
46 * Since this is a fairly high overhead operation and is only necessary for
47 * debugging actual bugs it's left here for developers to enable.
48 */
49/* #define __NVGPU_SAVE_KALLOC_STACK_TRACES */
50
51/*
52 * Defined per-OS.
53 */
54struct nvgpu_mem_alloc_tracker;
55#endif
56
57
58/**
59 * nvgpu_kmem_cache_create - create an nvgpu kernel memory cache.
60 *
61 * @g The GPU driver struct using this cache.
62 * @size Size of the object allocated by the cache.
63 *
64 * This cache can be used to allocate objects of size @size. Common usage would
65 * be for a struct that gets allocated a lot. In that case @size should be
66 * sizeof(struct my_struct).
67 *
68 * A given implementation of this need not do anything special. The allocation
69 * routines can simply be passed on to nvgpu_kzalloc() if desired so packing
70 * and alignment of the structs cannot be assumed.
71 */
40struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size); 72struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size);
73
74/**
75 * nvgpu_kmem_cache_destroy - destroy a cache created by
76 * nvgpu_kmem_cache_create().
77 *
78 * @cache The cache to destroy.
79 */
41void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache); 80void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache);
42 81
82/**
83 * nvgpu_kmem_cache_alloc - Allocate an object from the cache
84 *
85 * @cache The cache to alloc from.
86 */
43void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache); 87void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache);
88
89/**
90 * nvgpu_kmem_cache_free - Free an object back to a cache
91 *
92 * @cache The cache to return the object to.
93 * @ptr Pointer to the object to free.
94 */
44void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr); 95void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr);
45 96
46static inline void *__nvgpu_big_alloc(size_t size, bool clear) 97/**
98 * nvgpu_kmalloc - Allocate from the kernel's allocator.
99 *
100 * @g: Current GPU.
101 * @size: Size of the allocation.
102 *
103 * Allocate a chunk of system memory from the kernel. Allocations larger than 1
104 * page may fail even when there may appear to be enough memory.
105 *
106 * This function may sleep so cannot be used in IRQs.
107 */
108#define nvgpu_kmalloc(g, size) __nvgpu_kmalloc(g, size, _THIS_IP_)
109
110/**
111 * nvgpu_kzalloc - Allocate from the kernel's allocator.
112 *
113 * @g: Current GPU.
114 * @size: Size of the allocation.
115 *
116 * Identical to nvgpu_kalloc() except the memory will be zeroed before being
117 * returned.
118 */
119#define nvgpu_kzalloc(g, size) __nvgpu_kzalloc(g, size, _THIS_IP_)
120
121/**
122 * nvgpu_kcalloc - Allocate from the kernel's allocator.
123 *
124 * @g: Current GPU.
125 * @n: Number of objects.
126 * @size: Size of each object.
127 *
128 * Identical to nvgpu_kalloc() except the size of the memory chunk returned is
129 * @n * @size.
130 */
131#define nvgpu_kcalloc(g, n, size) __nvgpu_kcalloc(g, n, size, _THIS_IP_)
132
133/**
134 * nvgpu_vmalloc - Allocate memory and return a map to it.
135 *
136 * @g: Current GPU.
137 * @size: Size of the allocation.
138 *
139 * Allocate some memory and return a pointer to a virtual memory mapping of
140 * that memory in the kernel's virtual address space. The underlying physical
141 * memory is not guaranteed to be contiguous (and indeed likely isn't). This
142 * allows for much larger allocations to be done without worrying about as much
143 * about physical memory fragmentation.
144 *
145 * This function may sleep.
146 */
147#define nvgpu_vmalloc(g, size) __nvgpu_vmalloc(g, size, _THIS_IP_)
148
149/**
150 * nvgpu_vzalloc - Allocate memory and return a map to it.
151 *
152 * @g: Current GPU.
153 * @size: Size of the allocation.
154 *
155 * Identical to nvgpu_vmalloc() except this will return zero'ed memory.
156 */
157#define nvgpu_vzalloc(g, size) __nvgpu_vzalloc(g, size, _THIS_IP_)
158
159/**
160 * nvgpu_kfree - Frees an alloc from nvgpu_kmalloc, nvgpu_kzalloc,
161 * nvgpu_kcalloc.
162 *
163 * @g: Current GPU.
164 * @addr: Address of object to free.
165 */
166#define nvgpu_kfree(g, addr) __nvgpu_kfree(g, addr)
167
168/**
169 * nvgpu_vfree - Frees an alloc from nvgpu_vmalloc, nvgpu_vzalloc.
170 *
171 * @g: Current GPU.
172 * @addr: Address of object to free.
173 */
174#define nvgpu_vfree(g, addr) __nvgpu_vfree(g, addr)
175
176#define kmem_dbg(fmt, args...) \
177 gk20a_dbg(gpu_dbg_kmem, fmt, ##args)
178
179/**
180 * nvgpu_kmem_init - Initialize the kmem tracking stuff.
181 *
182 *@g: The driver to init.
183 *
184 * Returns non-zero on failure.
185 */
186int nvgpu_kmem_init(struct gk20a *g);
187
188/**
189 * nvgpu_kmem_fini - Finalize the kmem tracking code
190 *
191 * @g - The GPU.
192 * @flags - Flags that control operation of this finalization.
193 *
194 * Cleanup resources used by nvgpu_kmem. Available flags for cleanup are:
195 *
196 * %NVGPU_KMEM_FINI_DO_NOTHING
197 * %NVGPU_KMEM_FINI_FORCE_CLEANUP
198 * %NVGPU_KMEM_FINI_DUMP_ALLOCS
199 * %NVGPU_KMEM_FINI_WARN
200 * %NVGPU_KMEM_FINI_BUG
201 *
202 * %NVGPU_KMEM_FINI_DO_NOTHING will be overridden by anything else specified.
203 * Put another way don't just add %NVGPU_KMEM_FINI_DO_NOTHING and expect that
204 * to suppress other flags from doing anything.
205 */
206void nvgpu_kmem_fini(struct gk20a *g, int flags);
207
208/*
209 * These will simply be ignored if CONFIG_NVGPU_TRACK_MEM_USAGE is not defined.
210 */
211#define NVGPU_KMEM_FINI_DO_NOTHING 0
212#define NVGPU_KMEM_FINI_FORCE_CLEANUP (1 << 0)
213#define NVGPU_KMEM_FINI_DUMP_ALLOCS (1 << 1)
214#define NVGPU_KMEM_FINI_WARN (1 << 2)
215#define NVGPU_KMEM_FINI_BUG (1 << 3)
216
217/*
218 * When there's other implementations make sure they are included instead of
219 * Linux when not compiling on Linux!
220 */
221#include <nvgpu/kmem_linux.h>
222
223static inline void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
47{ 224{
48 void *p; 225 void *p;
49 226
50 if (size > PAGE_SIZE) { 227 if (size > PAGE_SIZE) {
51 if (clear) 228 if (clear)
52 p = vzalloc(size); 229 p = nvgpu_vzalloc(g, size);
53 else 230 else
54 p = vmalloc(size); 231 p = nvgpu_vmalloc(g, size);
55 } else { 232 } else {
56 if (clear) 233 if (clear)
57 p = kzalloc(size, GFP_KERNEL); 234 p = nvgpu_kzalloc(g, size);
58 else 235 else
59 p = kmalloc(size, GFP_KERNEL); 236 p = nvgpu_kmalloc(g, size);
60 } 237 }
61 238
62 return p; 239 return p;
@@ -65,6 +242,7 @@ static inline void *__nvgpu_big_alloc(size_t size, bool clear)
65/** 242/**
66 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size 243 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size
67 * 244 *
245 * @g - The GPU.
68 * @size - Size of the allocation. 246 * @size - Size of the allocation.
69 * 247 *
70 * On some platforms (i.e Linux) it is possible to allocate memory directly 248 * On some platforms (i.e Linux) it is possible to allocate memory directly
@@ -83,30 +261,31 @@ static inline void *__nvgpu_big_alloc(size_t size, bool clear)
83 * Returns a pointer to a virtual address range that the kernel can access or 261 * Returns a pointer to a virtual address range that the kernel can access or
84 * %NULL on failure. 262 * %NULL on failure.
85 */ 263 */
86static inline void *nvgpu_big_malloc(size_t size) 264static inline void *nvgpu_big_malloc(struct gk20a *g, size_t size)
87{ 265{
88 return __nvgpu_big_alloc(size, false); 266 return __nvgpu_big_alloc(g, size, false);
89} 267}
90 268
91/** 269/**
92 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size 270 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size
93 * 271 *
272 * @g - The GPU.
94 * @size - Size of the allocation. 273 * @size - Size of the allocation.
95 * 274 *
96 * Zeroed memory version of nvgpu_big_malloc(). 275 * Zeroed memory version of nvgpu_big_malloc().
97 */ 276 */
98static inline void *nvgpu_big_zalloc(size_t size) 277static inline void *nvgpu_big_zalloc(struct gk20a *g, size_t size)
99{ 278{
100 return __nvgpu_big_alloc(size, true); 279 return __nvgpu_big_alloc(g, size, true);
101} 280}
102 281
103/** 282/**
104 * nvgpu_big_free - Free and alloc from nvgpu_big_zalloc() or 283 * nvgpu_big_free - Free and alloc from nvgpu_big_zalloc() or
105 * nvgpu_big_malloc(). 284 * nvgpu_big_malloc().
106 * 285 * @g - The GPU.
107 * @p - A pointer allocated by nvgpu_big_zalloc() or nvgpu_big_malloc(). 286 * @p - A pointer allocated by nvgpu_big_zalloc() or nvgpu_big_malloc().
108 */ 287 */
109static inline void nvgpu_big_free(void *p) 288static inline void nvgpu_big_free(struct gk20a *g, void *p)
110{ 289{
111 /* 290 /*
112 * This will have to be fixed eventually. Allocs that use 291 * This will have to be fixed eventually. Allocs that use
@@ -114,9 +293,9 @@ static inline void nvgpu_big_free(void *p)
114 * when freeing. 293 * when freeing.
115 */ 294 */
116 if (virt_addr_valid(p)) 295 if (virt_addr_valid(p))
117 kfree(p); 296 nvgpu_kfree(g, p);
118 else 297 else
119 vfree(p); 298 nvgpu_vfree(g, p);
120} 299}
121 300
122#endif 301#endif /* __NVGPU_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h b/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h
new file mode 100644
index 00000000..d1cd27f3
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h
@@ -0,0 +1,123 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_KMEM_LINUX_H__
18#define __NVGPU_KMEM_LINUX_H__
19
20#include <linux/mm.h>
21#include <linux/slab.h>
22#include <linux/vmalloc.h>
23
24#include <asm/page.h>
25
26struct gk20a;
27struct device;
28
29#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
30void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
31 unsigned long ip);
32void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
33 unsigned long ip);
34void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip);
35void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip);
36void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
37 unsigned long ip);
38void __nvgpu_track_vfree(struct gk20a *g, void *addr);
39void __nvgpu_track_kfree(struct gk20a *g, void *addr);
40
41void nvgpu_kmem_debugfs_init(struct device *dev);
42#else
43static inline void nvgpu_kmem_debugfs_init(struct device *dev)
44{
45}
46#endif
47
48/**
49 * DOC: Linux pass through kmem implementation.
50 *
51 * These are the Linux implementations of the various kmem functions defined by
52 * nvgpu. This should not be included directly - instead include <nvgpu/kmem.h>.
53 */
54
55static inline void *__nvgpu_kmalloc(struct gk20a *g, unsigned long size,
56 unsigned long ip)
57{
58#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
59 return __nvgpu_track_vmalloc(g, size, ip);
60#else
61 return kmalloc(size, GFP_KERNEL);
62#endif
63}
64
65static inline void *__nvgpu_kzalloc(struct gk20a *g, size_t size,
66 unsigned long ip)
67{
68#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
69 return __nvgpu_track_kzalloc(g, size, ip);
70#else
71 return kzalloc(size, GFP_KERNEL);
72#endif
73}
74
75static inline void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size,
76 unsigned long ip)
77{
78#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
79 return __nvgpu_track_kcalloc(g, n, size, ip);
80#else
81 return kcalloc(n, size, GFP_KERNEL);
82#endif
83}
84
85static inline void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size,
86 unsigned long ip)
87{
88#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
89 return __nvgpu_track_vmalloc(g, size, ip);
90#else
91 return vmalloc(size);
92#endif
93}
94
95static inline void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size,
96 unsigned long ip)
97{
98#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
99 return __nvgpu_track_vzalloc(g, size, ip);
100#else
101 return vzalloc(size);
102#endif
103}
104
105static inline void __nvgpu_kfree(struct gk20a *g, void *addr)
106{
107#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
108 __nvgpu_track_kfree(g, addr);
109#else
110 kfree(addr);
111#endif
112}
113
114static inline void __nvgpu_vfree(struct gk20a *g, void *addr)
115{
116#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
117 __nvgpu_track_vfree(g, addr);
118#else
119 vfree(addr);
120#endif
121}
122
123#endif
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c
index 3677b02d..39559dac 100644
--- a/drivers/gpu/nvgpu/pci.c
+++ b/drivers/gpu/nvgpu/pci.c
@@ -19,6 +19,7 @@
19#include <linux/pm_runtime.h> 19#include <linux/pm_runtime.h>
20 20
21#include <nvgpu/nvgpu_common.h> 21#include <nvgpu/nvgpu_common.h>
22#include <nvgpu/kmem.h>
22 23
23#include "gk20a/gk20a.h" 24#include "gk20a/gk20a.h"
24#include "gk20a/platform_gk20a.h" 25#include "gk20a/platform_gk20a.h"
@@ -358,6 +359,8 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
358 platform->g = g; 359 platform->g = g;
359 g->dev = &pdev->dev; 360 g->dev = &pdev->dev;
360 361
362 nvgpu_kmem_init(g);
363
361 err = pci_enable_device(pdev); 364 err = pci_enable_device(pdev);
362 if (err) 365 if (err)
363 return err; 366 return err;
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index d8e0dfa1..37b4633b 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -19,6 +19,8 @@
19#include <linux/pm_runtime.h> 19#include <linux/pm_runtime.h>
20#include <linux/pm_qos.h> 20#include <linux/pm_qos.h>
21 21
22#include <nvgpu/kmem.h>
23
22#include "vgpu/vgpu.h" 24#include "vgpu/vgpu.h"
23#include "vgpu/fecs_trace_vgpu.h" 25#include "vgpu/fecs_trace_vgpu.h"
24#include "gk20a/debug_gk20a.h" 26#include "gk20a/debug_gk20a.h"
@@ -562,6 +564,8 @@ int vgpu_probe(struct platform_device *pdev)
562 platform->vgpu_priv = priv; 564 platform->vgpu_priv = priv;
563 gk20a->dev = dev; 565 gk20a->dev = dev;
564 566
567 nvgpu_kmem_init(gk20a);
568
565 err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); 569 err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
566 if (err) 570 if (err)
567 return err; 571 return err;