summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/Kconfig.nvgpu11
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem.c806
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem_priv.h90
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/kmem.h223
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h123
-rw-r--r--drivers/gpu/nvgpu/pci.c3
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c4
14 files changed, 1253 insertions, 49 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig.nvgpu b/drivers/gpu/nvgpu/Kconfig.nvgpu
index 3e3607e0..8baf6897 100644
--- a/drivers/gpu/nvgpu/Kconfig.nvgpu
+++ b/drivers/gpu/nvgpu/Kconfig.nvgpu
@@ -47,6 +47,17 @@ config GK20A_DEVFREQ
47 47
48endchoice 48endchoice
49 49
50config NVGPU_TRACK_MEM_USAGE
51 bool "Track the usage of system memory in nvgpu"
52 depends on GK20A
53 default n
54 help
55 Say Y here to allow nvgpu to track and keep statistics on
56 the system memory used by the driver. This does recreate
57 some of the kmem_leak tracking but this is also applicable
58 to other OSes which do not have Linux' kmem_leak.
59
60
50config GK20A_CYCLE_STATS 61config GK20A_CYCLE_STATS
51 bool "Support GK20A GPU CYCLE STATS" 62 bool "Support GK20A GPU CYCLE STATS"
52 depends on GK20A 63 depends on GK20A
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index 24e0ca5d..60e79348 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -15,11 +15,22 @@
15 */ 15 */
16 16
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/mutex.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <linux/rbtree.h>
22#include <linux/debugfs.h>
23#include <linux/spinlock.h>
24#include <linux/seq_file.h>
25#include <linux/vmalloc.h>
26#include <linux/stacktrace.h>
20 27
21#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
22 29
30#include "gk20a/gk20a.h"
31
32#include "kmem_priv.h"
33
23/* 34/*
24 * Statically declared because this needs to be shared across all nvgpu driver 35 * Statically declared because this needs to be shared across all nvgpu driver
25 * instances. This makes sure that all kmem caches are _definitely_ uniquely 36 * instances. This makes sure that all kmem caches are _definitely_ uniquely
@@ -27,26 +38,793 @@
27 */ 38 */
28static atomic_t kmem_cache_id; 39static atomic_t kmem_cache_id;
29 40
30/* 41#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
31 * Linux specific version of the nvgpu_kmem_cache struct. This type is 42
32 * completely opaque to the rest of the driver. 43static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
44{
45 mutex_lock(&tracker->lock);
46}
47
48static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
49{
50 mutex_unlock(&tracker->lock);
51}
52
53static void kmem_print_mem_alloc(struct gk20a *g,
54 struct nvgpu_mem_alloc *alloc,
55 struct seq_file *s)
56{
57#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
58 int i;
59
60 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
61 alloc->addr, alloc->size);
62 for (i = 0; i < alloc->stack_length; i++)
63 __pstat(s, " %3d [<%p>] %pS\n", i,
64 (void *)alloc->stack[i],
65 (void *)alloc->stack[i]);
66 __pstat(s, "\n");
67#else
68 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
69 alloc->addr, alloc->size, alloc->ip);
70#endif
71}
72
73static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
74 struct nvgpu_mem_alloc *alloc)
75{
76 struct rb_node **new = &tracker->allocs.rb_node;
77 struct rb_node *parent = NULL;
78
79 while (*new) {
80 struct nvgpu_mem_alloc *tmp = rb_entry(*new,
81 struct nvgpu_mem_alloc,
82 allocs_entry);
83
84 parent = *new;
85
86 if (alloc->addr < tmp->addr)
87 new = &(*new)->rb_left;
88 else if (alloc->addr > tmp->addr)
89 new = &(*new)->rb_right;
90 else
91 return -EINVAL;
92 }
93
94 /* Put the new node there */
95 rb_link_node(&alloc->allocs_entry, parent, new);
96 rb_insert_color(&alloc->allocs_entry, &tracker->allocs);
97
98 return 0;
99}
100
101static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
102 struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
103{
104 struct rb_node *node = tracker->allocs.rb_node;
105 struct nvgpu_mem_alloc *alloc;
106
107 while (node) {
108 alloc = container_of(node,
109 struct nvgpu_mem_alloc, allocs_entry);
110
111 if (alloc_addr < alloc->addr)
112 node = node->rb_left;
113 else if (alloc_addr > alloc->addr)
114 node = node->rb_right;
115 else
116 break;
117 }
118
119 if (!node)
120 return NULL;
121
122 rb_erase(node, &tracker->allocs);
123
124 return alloc;
125}
126
127static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
128 unsigned long size, unsigned long real_size,
129 u64 addr, unsigned long ip)
130{
131 int ret;
132 struct nvgpu_mem_alloc *alloc;
133#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
134 struct stack_trace stack_trace;
135#endif
136
137 alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
138 if (!alloc)
139 return -ENOMEM;
140
141 alloc->owner = tracker;
142 alloc->size = size;
143 alloc->real_size = real_size;
144 alloc->addr = addr;
145 alloc->ip = (void *)(uintptr_t)ip;
146
147#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
148 stack_trace.max_entries = MAX_STACK_TRACE;
149 stack_trace.nr_entries = 0;
150 stack_trace.entries = alloc->stack;
151 /*
152 * This 4 here skips the 2 function calls that happen for all traced
153 * allocs due to nvgpu:
154 *
155 * __nvgpu_save_kmem_alloc+0x7c/0x128
156 * __nvgpu_track_kzalloc+0xcc/0xf8
157 *
158 * And the function calls that get made by the stack trace code itself.
159 * If the trace savings code changes this will likely have to change
160 * as well.
161 */
162 stack_trace.skip = 4;
163 save_stack_trace(&stack_trace);
164 alloc->stack_length = stack_trace.nr_entries;
165#endif
166
167 lock_tracker(tracker);
168 tracker->bytes_alloced += size;
169 tracker->bytes_alloced_real += real_size;
170 tracker->nr_allocs++;
171
172 /* Keep track of this for building a histogram later on. */
173 if (tracker->max_alloc < size)
174 tracker->max_alloc = size;
175 if (tracker->min_alloc > size)
176 tracker->min_alloc = size;
177
178 ret = nvgpu_add_alloc(tracker, alloc);
179 if (ret) {
180 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
181 kfree(alloc);
182 unlock_tracker(tracker);
183 return ret;
184 }
185 unlock_tracker(tracker);
186
187 return 0;
188}
189
190static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
191 u64 addr)
192{
193 struct nvgpu_mem_alloc *alloc;
194
195 lock_tracker(tracker);
196 alloc = nvgpu_rem_alloc(tracker, addr);
197 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
198 unlock_tracker(tracker);
199 return -EINVAL;
200 }
201
202 tracker->nr_frees++;
203 tracker->bytes_freed += alloc->size;
204 tracker->bytes_freed_real += alloc->real_size;
205 unlock_tracker(tracker);
206
207 return 0;
208}
209
210static void __nvgpu_check_valloc_size(unsigned long size)
211{
212 WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
213}
214
215static void __nvgpu_check_kalloc_size(size_t size)
216{
217 WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
218}
219
220void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
221 unsigned long ip)
222{
223 void *alloc = vmalloc(size);
224
225 if (!alloc)
226 return NULL;
227
228 kmem_dbg("vmalloc: size=%-6ld addr=0x%p", size, alloc);
229 __nvgpu_check_valloc_size(size);
230
231 /*
232 * Ignore the return message. If this fails let's not cause any issues
233 * for the rest of the driver.
234 */
235 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
236 (u64)(uintptr_t)alloc, ip);
237
238 return alloc;
239}
240
241void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
242 unsigned long ip)
243{
244 void *alloc = vzalloc(size);
245
246 if (!alloc)
247 return NULL;
248
249 kmem_dbg("vzalloc: size=%-6ld addr=0x%p", size, alloc);
250 __nvgpu_check_valloc_size(size);
251
252 /*
253 * Ignore the return message. If this fails let's not cause any issues
254 * for the rest of the driver.
255 */
256 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
257 (u64)(uintptr_t)alloc, ip);
258
259 return alloc;
260}
261
262void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
263{
264 void *alloc = kmalloc(size, GFP_KERNEL);
265
266 if (!alloc)
267 return NULL;
268
269 kmem_dbg("kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
270 size, alloc, GFP_KERNEL);
271 __nvgpu_check_kalloc_size(size);
272
273 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
274 (u64)(uintptr_t)alloc, ip);
275
276 return alloc;
277}
278
279void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
280{
281 void *alloc = kzalloc(size, GFP_KERNEL);
282
283 if (!alloc)
284 return NULL;
285
286 kmem_dbg("kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
287 size, alloc, GFP_KERNEL);
288 __nvgpu_check_kalloc_size(size);
289
290 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
291 (u64)(uintptr_t)alloc, ip);
292
293 return alloc;
294}
295
296void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
297 unsigned long ip)
298{
299 void *alloc = kcalloc(n, size, GFP_KERNEL);
300
301 if (!alloc)
302 return NULL;
303
304 kmem_dbg("kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
305 n * size, alloc, GFP_KERNEL);
306 __nvgpu_check_kalloc_size(n * size);
307
308 __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
309 roundup_pow_of_two(n * size),
310 (u64)(uintptr_t)alloc, ip);
311
312 return alloc;
313}
314
315void __nvgpu_track_vfree(struct gk20a *g, void *addr)
316{
317 /*
318 * Often it is accepted practice to pass NULL pointers into free
319 * functions to save code.
320 */
321 if (!addr)
322 return;
323
324 vfree(addr);
325
326 kmem_dbg("vfree: addr=0x%p", addr);
327
328 __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
329}
330
331void __nvgpu_track_kfree(struct gk20a *g, void *addr)
332{
333 if (!addr)
334 return;
335
336 kfree(addr);
337
338 kmem_dbg("kfree: addr=0x%p", addr);
339
340 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
341}
342
343/**
344 * to_human_readable_bytes - Determine suffix for passed size.
345 *
346 * @bytes - Number of bytes to generate a suffix for.
347 * @hr_bytes [out] - The human readable number of bytes.
348 * @hr_suffix [out] - The suffix for the HR number of bytes.
349 *
350 * Computes a human readable decomposition of the passed number of bytes. The
351 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
352 * number of bytes is then passed back in @hr_bytes. This returns the following
353 * ranges:
354 *
355 * 0 - 1023 B
356 * 1 - 1023 KB
357 * 1 - 1023 MB
358 * 1 - 1023 GB
359 * 1 - 1023 TB
360 * 1 - ... PB
361 */
362static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
363 const char **hr_suffix)
364{
365 static const char *suffixes[] =
366 { "B", "KB", "MB", "GB", "TB", "PB" };
367
368 u64 suffix_ind = 0;
369
370 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
371 bytes >>= 10;
372 suffix_ind++;
373 }
374
375 /*
376 * Handle case where bytes > 1023PB.
377 */
378 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
379 suffix_ind : ARRAY_SIZE(suffixes) - 1;
380
381 *hr_bytes = bytes;
382 *hr_suffix = suffixes[suffix_ind];
383}
384
385/**
386 * print_hr_bytes - Print human readable bytes
387 *
388 * @s - A seq_file to print to. May be NULL.
389 * @msg - A message to print before the bytes.
390 * @bytes - Number of bytes.
391 *
392 * Print @msg followed by the human readable decomposition of the passed number
393 * of bytes.
394 *
395 * If @s is NULL then this prints will be made to the kernel log.
396 */
397static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
398{
399 u64 hr_bytes;
400 const char *hr_suffix;
401
402 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
403 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
404}
405
406/**
407 * print_histogram - Build a histogram of the memory usage.
408 *
409 * @tracker The tracking to pull data from.
410 * @s A seq_file to dump info into.
33 */ 411 */
34struct nvgpu_kmem_cache { 412static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
35 struct gk20a *g; 413 struct seq_file *s)
36 struct kmem_cache *cache; 414{
415 int i;
416 u64 pot_min, pot_max;
417 u64 nr_buckets;
418 unsigned int *buckets;
419 unsigned int total_allocs;
420 struct rb_node *node;
421 static const char histogram_line[] =
422 "++++++++++++++++++++++++++++++++++++++++";
423
424 /*
425 * pot_min is essentially a round down to the nearest power of 2. This
426 * is the start of the histogram. pot_max is just a round up to the
427 * nearest power of two. Each histogram bucket is one power of two so
428 * the histogram buckets are exponential.
429 */
430 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
431 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
432
433 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
434
435 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
436 if (!buckets) {
437 __pstat(s, "OOM: could not allocate bucket storage!?\n");
438 return;
439 }
37 440
38 /* 441 /*
39 * Memory to hold the kmem_cache unique name. Only necessary on our 442 * Iterate across all of the allocs and determine what bucket they
40 * k3.10 kernel when not using the SLUB allocator but it's easier to 443 * should go in. Round the size down to the nearest power of two to
41 * just carry this on to newer kernels. 444 * find the right bucket.
42 */ 445 */
43 char name[128]; 446 for (node = rb_first(&tracker->allocs);
447 node != NULL;
448 node = rb_next(node)) {
449 int b;
450 u64 bucket_min;
451 struct nvgpu_mem_alloc *alloc;
452
453 alloc = container_of(node, struct nvgpu_mem_alloc,
454 allocs_entry);
455 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
456 if (bucket_min < tracker->min_alloc)
457 bucket_min = tracker->min_alloc;
458
459 b = __ffs(bucket_min) - __ffs(pot_min);
460
461 /*
462 * Handle the one case were there's an alloc exactly as big as
463 * the maximum bucket size of the largest bucket. Most of the
464 * buckets have an inclusive minimum and exclusive maximum. But
465 * the largest bucket needs to have an _inclusive_ maximum as
466 * well.
467 */
468 if (b == (int)nr_buckets)
469 b--;
470
471 buckets[b]++;
472 }
473
474 total_allocs = 0;
475 for (i = 0; i < (int)nr_buckets; i++)
476 total_allocs += buckets[i];
477
478 __pstat(s, "Alloc histogram:\n");
479
480 /*
481 * Actually compute the histogram lines.
482 */
483 for (i = 0; i < (int)nr_buckets; i++) {
484 char this_line[sizeof(histogram_line) + 1];
485 u64 line_length;
486 u64 hr_bytes;
487 const char *hr_suffix;
488
489 memset(this_line, 0, sizeof(this_line));
490
491 /*
492 * Compute the normalized line length. Cant use floating point
493 * so we will just multiply everything by 1000 and use fixed
494 * point.
495 */
496 line_length = (1000 * buckets[i]) / total_allocs;
497 line_length *= sizeof(histogram_line);
498 line_length /= 1000;
499
500 memset(this_line, '+', line_length);
501
502 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
503 &hr_bytes, &hr_suffix);
504 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
505 hr_bytes, hr_bytes << 1,
506 hr_suffix, buckets[i], this_line);
507 }
508}
509
510/**
511 * nvgpu_kmem_print_stats - Print kmem tracking stats.
512 *
513 * @tracker The tracking to pull data from.
514 * @s A seq_file to dump info into.
515 *
516 * Print stats from a tracker. If @s is non-null then seq_printf() will be
517 * used with @s. Otherwise the stats are pr_info()ed.
518 */
519void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
520 struct seq_file *s)
521{
522 lock_tracker(tracker);
523
524 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
525
526 __pstat(s, "Basic Stats:\n");
527 __pstat(s, " Number of allocs %lld\n",
528 tracker->nr_allocs);
529 __pstat(s, " Number of frees %lld\n",
530 tracker->nr_frees);
531 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
532 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
533 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
534 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
535 print_hr_bytes(s, " Bytes allocated (real) ",
536 tracker->bytes_alloced_real);
537 print_hr_bytes(s, " Bytes freed (real) ",
538 tracker->bytes_freed_real);
539 __pstat(s, "\n");
540
541 print_histogram(tracker, s);
542
543 unlock_tracker(tracker);
544}
545
546#if defined(CONFIG_DEBUG_FS)
547static int __kmem_tracking_show(struct seq_file *s, void *unused)
548{
549 struct nvgpu_mem_alloc_tracker *tracker = s->private;
550
551 nvgpu_kmem_print_stats(tracker, s);
552
553 return 0;
554}
555
556static int __kmem_tracking_open(struct inode *inode, struct file *file)
557{
558 return single_open(file, __kmem_tracking_show, inode->i_private);
559}
560
561static const struct file_operations __kmem_tracking_fops = {
562 .open = __kmem_tracking_open,
563 .read = seq_read,
564 .llseek = seq_lseek,
565 .release = single_release,
566};
567
568static int __kmem_traces_dump_tracker(struct gk20a *g,
569 struct nvgpu_mem_alloc_tracker *tracker,
570 struct seq_file *s)
571{
572 struct rb_node *node;
573
574 for (node = rb_first(&tracker->allocs);
575 node != NULL;
576 node = rb_next(node)) {
577 struct nvgpu_mem_alloc *alloc;
578
579 alloc = container_of(node, struct nvgpu_mem_alloc,
580 allocs_entry);
581
582 kmem_print_mem_alloc(g, alloc, s);
583 }
584
585 return 0;
586}
587
588static int __kmem_traces_show(struct seq_file *s, void *unused)
589{
590 struct gk20a *g = s->private;
591
592 lock_tracker(g->vmallocs);
593 seq_puts(s, "Oustanding vmallocs:\n");
594 __kmem_traces_dump_tracker(g, g->vmallocs, s);
595 seq_puts(s, "\n");
596 unlock_tracker(g->vmallocs);
597
598 lock_tracker(g->kmallocs);
599 seq_puts(s, "Oustanding kmallocs:\n");
600 __kmem_traces_dump_tracker(g, g->kmallocs, s);
601 unlock_tracker(g->kmallocs);
602
603 return 0;
604}
605
606static int __kmem_traces_open(struct inode *inode, struct file *file)
607{
608 return single_open(file, __kmem_traces_show, inode->i_private);
609}
610
611static const struct file_operations __kmem_traces_fops = {
612 .open = __kmem_traces_open,
613 .read = seq_read,
614 .llseek = seq_lseek,
615 .release = single_release,
44}; 616};
45 617
618void nvgpu_kmem_debugfs_init(struct device *dev)
619{
620 struct gk20a_platform *plat = dev_get_drvdata(dev);
621 struct gk20a *g = get_gk20a(dev);
622 struct dentry *gpu_root = plat->debugfs;
623 struct dentry *node;
624
625 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
626 if (IS_ERR_OR_NULL(g->debugfs_kmem))
627 return;
628
629 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
630 g->debugfs_kmem,
631 g->vmallocs, &__kmem_tracking_fops);
632 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
633 g->debugfs_kmem,
634 g->kmallocs, &__kmem_tracking_fops);
635 node = debugfs_create_file("traces", S_IRUGO,
636 g->debugfs_kmem,
637 g, &__kmem_traces_fops);
638}
639#else
640void nvgpu_kmem_debugfs_init(struct device *dev)
641{
642}
643#endif
644
645static int __do_check_for_outstanding_allocs(
646 struct gk20a *g,
647 struct nvgpu_mem_alloc_tracker *tracker,
648 const char *type, bool silent)
649{
650 struct rb_node *node;
651 int count = 0;
652
653 for (node = rb_first(&tracker->allocs);
654 node != NULL;
655 node = rb_next(node)) {
656 struct nvgpu_mem_alloc *alloc;
657
658 alloc = container_of(node, struct nvgpu_mem_alloc,
659 allocs_entry);
660
661 if (!silent)
662 kmem_print_mem_alloc(g, alloc, NULL);
663
664 count++;
665 }
666
667 return count;
668}
669
670/**
671 * check_for_outstanding_allocs - Count and display outstanding allocs
672 *
673 * @g - The GPU.
674 * @silent - If set don't print anything about the allocs.
675 *
676 * Dump (or just count) the number of allocations left outstanding.
677 */
678static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
679{
680 int count = 0;
681
682 count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
683 silent);
684 count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
685 silent);
686
687 return count;
688}
689
690static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
691 void (*force_free_func)(const void *))
692{
693 struct rb_node *node;
694
695 while ((node = rb_first(&tracker->allocs)) != NULL) {
696 struct nvgpu_mem_alloc *alloc;
697
698 alloc = container_of(node, struct nvgpu_mem_alloc,
699 allocs_entry);
700 if (force_free_func)
701 force_free_func((void *)alloc->addr);
702
703 kfree(alloc);
704 }
705}
706
707/**
708 * nvgpu_kmem_cleanup - Cleanup the kmem tracking
709 *
710 * @g - The GPU.
711 * @force_free - If set will also free leaked objects if possible.
712 *
713 * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
714 * is non-zero then the allocation made by nvgpu is also freed. This is risky,
715 * though, as it is possible that the memory is still in use by other parts of
716 * the GPU driver not aware that this has happened.
717 *
718 * In theory it should be fine if the GPU driver has been deinitialized and
719 * there are no bugs in that code. However, if there are any bugs in that code
720 * then they could likely manifest as odd crashes indeterminate amounts of time
721 * in the future. So use @force_free at your own risk.
722 */
723static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
724{
725 do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
726 do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
727}
728
729void nvgpu_kmem_fini(struct gk20a *g, int flags)
730{
731 int count;
732 bool silent, force_free;
733
734 if (!flags)
735 return;
736
737 silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
738 force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
739
740 count = check_for_outstanding_allocs(g, silent);
741 nvgpu_kmem_cleanup(g, force_free);
742
743 /*
744 * If we leak objects we can either BUG() out or just WARN(). In general
745 * it doesn't make sense to BUG() on here since leaking a few objects
746 * won't crash the kernel but it can be helpful for development.
747 *
748 * If neither flag is set then we just silently do nothing.
749 */
750 if (count > 0) {
751 if (flags & NVGPU_KMEM_FINI_WARN) {
752 WARN(1, "Letting %d allocs leak!!\n", count);
753 } else if (flags & NVGPU_KMEM_FINI_BUG) {
754 gk20a_err(g->dev, "Letting %d allocs leak!!\n", count);
755 BUG();
756 }
757 }
758}
759
760int nvgpu_kmem_init(struct gk20a *g)
761{
762 int err;
763
764 g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
765 g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
766
767 if (!g->vmallocs || !g->kmallocs) {
768 err = -ENOMEM;
769 goto fail;
770 }
771
772 g->vmallocs->name = "vmalloc";
773 g->kmallocs->name = "kmalloc";
774
775 g->vmallocs->allocs = RB_ROOT;
776 g->kmallocs->allocs = RB_ROOT;
777
778 mutex_init(&g->vmallocs->lock);
779 mutex_init(&g->kmallocs->lock);
780
781 g->vmallocs->min_alloc = PAGE_SIZE;
782 g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
783
784 /*
785 * This needs to go after all the other initialization since they use
786 * the nvgpu_kzalloc() API.
787 */
788 g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
789 sizeof(struct nvgpu_mem_alloc));
790 g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
791 sizeof(struct nvgpu_mem_alloc));
792
793 if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
794 err = -ENOMEM;
795 if (g->vmallocs->allocs_cache)
796 nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
797 if (g->kmallocs->allocs_cache)
798 nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
799 goto fail;
800 }
801
802 return 0;
803
804fail:
805 if (g->vmallocs)
806 kfree(g->vmallocs);
807 if (g->kmallocs)
808 kfree(g->kmallocs);
809 return err;
810}
811
812#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
813
814int nvgpu_kmem_init(struct gk20a *g)
815{
816 return 0;
817}
818
819void nvgpu_kmem_fini(struct gk20a *g, int flags)
820{
821}
822#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
823
46struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) 824struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
47{ 825{
48 struct nvgpu_kmem_cache *cache = 826 struct nvgpu_kmem_cache *cache =
49 kzalloc(sizeof(struct nvgpu_kmem_cache), GFP_KERNEL); 827 nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
50 828
51 if (!cache) 829 if (!cache)
52 return NULL; 830 return NULL;
@@ -59,7 +837,7 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
59 cache->cache = kmem_cache_create(cache->name, 837 cache->cache = kmem_cache_create(cache->name,
60 size, size, 0, NULL); 838 size, size, 0, NULL);
61 if (!cache->cache) { 839 if (!cache->cache) {
62 kfree(cache); 840 nvgpu_kfree(g, cache);
63 return NULL; 841 return NULL;
64 } 842 }
65 843
@@ -68,8 +846,10 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
68 846
69void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) 847void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
70{ 848{
849 struct gk20a *g = cache->g;
850
71 kmem_cache_destroy(cache->cache); 851 kmem_cache_destroy(cache->cache);
72 kfree(cache); 852 nvgpu_kfree(g, cache);
73} 853}
74 854
75void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) 855void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
new file mode 100644
index 00000000..5e38ad5d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
@@ -0,0 +1,90 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __KMEM_PRIV_H__
18#define __KMEM_PRIV_H__
19
20#include <linux/rbtree.h>
21
22#define __pstat(s, fmt, msg...) \
23 do { \
24 if (s) \
25 seq_printf(s, fmt, ##msg); \
26 else \
27 pr_info(fmt, ##msg); \
28 } while (0)
29
30#define MAX_STACK_TRACE 20
31
32/*
33 * Linux specific version of the nvgpu_kmem_cache struct. This type is
34 * completely opaque to the rest of the driver.
35 */
36struct nvgpu_kmem_cache {
37 struct gk20a *g;
38 struct kmem_cache *cache;
39
40 /*
41 * Memory to hold the kmem_cache unique name. Only necessary on our
42 * k3.10 kernel when not using the SLUB allocator but it's easier to
43 * just carry this on to newer kernels.
44 */
45 char name[128];
46};
47
48#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
49
50struct nvgpu_mem_alloc {
51 struct nvgpu_mem_alloc_tracker *owner;
52
53 void *ip;
54#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
55 unsigned long stack[MAX_STACK_TRACE];
56 int stack_length;
57#endif
58
59 u64 addr;
60
61 unsigned long size;
62 unsigned long real_size;
63
64 /* Ugh - linux specific. Will need to be abstracted. */
65 struct rb_node allocs_entry;
66};
67
68/*
69 * Linux specific tracking of vmalloc, kmalloc, etc.
70 */
71struct nvgpu_mem_alloc_tracker {
72 const char *name;
73 struct nvgpu_kmem_cache *allocs_cache;
74 struct rb_root allocs;
75 struct mutex lock;
76
77 u64 bytes_alloced;
78 u64 bytes_freed;
79 u64 bytes_alloced_real;
80 u64 bytes_freed_real;
81 u64 nr_allocs;
82 u64 nr_frees;
83
84 unsigned long min_alloc;
85 unsigned long max_alloc;
86};
87
88#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
89
90#endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index f228110e..68e43259 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -986,7 +986,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
986 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); 986 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
987 987
988 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); 988 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
989 nvgpu_big_free(ch->gpfifo.pipe); 989 nvgpu_big_free(g, ch->gpfifo.pipe);
990 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); 990 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
991 991
992#if defined(CONFIG_GK20A_CYCLE_STATS) 992#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1856,7 +1856,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1856 } 1856 }
1857 1857
1858 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 1858 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
1859 c->gpfifo.pipe = nvgpu_big_malloc( 1859 c->gpfifo.pipe = nvgpu_big_malloc(g,
1860 gpfifo_size * sizeof(struct nvgpu_gpfifo)); 1860 gpfifo_size * sizeof(struct nvgpu_gpfifo));
1861 if (!c->gpfifo.pipe) { 1861 if (!c->gpfifo.pipe) {
1862 err = -ENOMEM; 1862 err = -ENOMEM;
@@ -1927,7 +1927,7 @@ clean_up_sync:
1927 c->sync = NULL; 1927 c->sync = NULL;
1928 } 1928 }
1929clean_up_unmap: 1929clean_up_unmap:
1930 nvgpu_big_free(c->gpfifo.pipe); 1930 nvgpu_big_free(g, c->gpfifo.pipe);
1931 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1931 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1932clean_up: 1932clean_up:
1933 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); 1933 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -2057,12 +2057,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2057 if (!g) { 2057 if (!g) {
2058 size = count * sizeof(struct nvgpu_gpfifo); 2058 size = count * sizeof(struct nvgpu_gpfifo);
2059 if (size) { 2059 if (size) {
2060 g = nvgpu_big_malloc(size); 2060 g = nvgpu_big_malloc(c->g, size);
2061 if (!g) 2061 if (!g)
2062 return; 2062 return;
2063 2063
2064 if (copy_from_user(g, user_gpfifo, size)) { 2064 if (copy_from_user(g, user_gpfifo, size)) {
2065 nvgpu_big_free(g); 2065 nvgpu_big_free(c->g, g);
2066 return; 2066 return;
2067 } 2067 }
2068 } 2068 }
@@ -2074,7 +2074,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2074 trace_write_pushbuffer(c, gp); 2074 trace_write_pushbuffer(c, gp);
2075 2075
2076 if (gpfifo_allocated) 2076 if (gpfifo_allocated)
2077 nvgpu_big_free(g); 2077 nvgpu_big_free(c->g, g);
2078} 2078}
2079 2079
2080static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) 2080static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 4a42e03f..0a0aada7 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -819,7 +819,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
819 goto fail_dmabuf_put; 819 goto fail_dmabuf_put;
820 } 820 }
821 821
822 buffer = nvgpu_big_zalloc(access_limit_size); 822 buffer = nvgpu_big_zalloc(g, access_limit_size);
823 if (!buffer) { 823 if (!buffer) {
824 err = -ENOMEM; 824 err = -ENOMEM;
825 goto fail_dmabuf_put; 825 goto fail_dmabuf_put;
@@ -865,7 +865,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
865fail_idle: 865fail_idle:
866 gk20a_idle(g->dev); 866 gk20a_idle(g->dev);
867fail_free_buffer: 867fail_free_buffer:
868 nvgpu_big_free(buffer); 868 nvgpu_big_free(g, buffer);
869fail_dmabuf_put: 869fail_dmabuf_put:
870 dma_buf_put(dmabuf); 870 dma_buf_put(dmabuf);
871 871
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 67f9b532..6341a962 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -21,6 +21,7 @@
21#include <linux/io.h> 21#include <linux/io.h>
22 22
23#include <nvgpu/semaphore.h> 23#include <nvgpu/semaphore.h>
24#include <nvgpu/kmem.h>
24 25
25#include "gk20a.h" 26#include "gk20a.h"
26#include "debug_gk20a.h" 27#include "debug_gk20a.h"
@@ -485,6 +486,9 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
485 gk20a_mm_debugfs_init(g->dev); 486 gk20a_mm_debugfs_init(g->dev);
486 gk20a_fifo_debugfs_init(g->dev); 487 gk20a_fifo_debugfs_init(g->dev);
487 gk20a_sched_debugfs_init(g->dev); 488 gk20a_sched_debugfs_init(g->dev);
489#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
490 nvgpu_kmem_debugfs_init(g->dev);
491#endif
488#endif 492#endif
489 493
490} 494}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 3504a32f..6b026ee2 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -43,6 +43,7 @@
43#include <linux/version.h> 43#include <linux/version.h>
44 44
45#include <nvgpu/nvgpu_common.h> 45#include <nvgpu/nvgpu_common.h>
46#include <nvgpu/kmem.h>
46#include <nvgpu/allocator.h> 47#include <nvgpu/allocator.h>
47#include <nvgpu/timers.h> 48#include <nvgpu/timers.h>
48 49
@@ -1598,6 +1599,8 @@ static int gk20a_probe(struct platform_device *dev)
1598 set_gk20a(dev, gk20a); 1599 set_gk20a(dev, gk20a);
1599 gk20a->dev = &dev->dev; 1600 gk20a->dev = &dev->dev;
1600 1601
1602 nvgpu_kmem_init(gk20a);
1603
1601 gk20a->irq_stall = platform_get_irq(dev, 0); 1604 gk20a->irq_stall = platform_get_irq(dev, 0);
1602 gk20a->irq_nonstall = platform_get_irq(dev, 1); 1605 gk20a->irq_nonstall = platform_get_irq(dev, 1);
1603 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) 1606 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8006a4fe..69528c1f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -27,6 +27,7 @@ struct gk20a_ctxsw_ucode_segments;
27struct gk20a_fecs_trace; 27struct gk20a_fecs_trace;
28struct gk20a_ctxsw_trace; 28struct gk20a_ctxsw_trace;
29struct acr_desc; 29struct acr_desc;
30struct nvgpu_mem_alloc_tracker;
30 31
31#include <linux/sched.h> 32#include <linux/sched.h>
32#include <nvgpu/lock.h> 33#include <nvgpu/lock.h>
@@ -915,6 +916,7 @@ struct gk20a {
915 struct dentry *debugfs_runlist_interleave; 916 struct dentry *debugfs_runlist_interleave;
916 struct dentry *debugfs_allocators; 917 struct dentry *debugfs_allocators;
917 struct dentry *debugfs_xve; 918 struct dentry *debugfs_xve;
919 struct dentry *debugfs_kmem;
918#endif 920#endif
919 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; 921 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
920 922
@@ -1055,6 +1057,10 @@ struct gk20a {
1055 /* Check if msi is enabled */ 1057 /* Check if msi is enabled */
1056 bool msi_enabled; 1058 bool msi_enabled;
1057#endif 1059#endif
1060#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
1061 struct nvgpu_mem_alloc_tracker *vmallocs;
1062 struct nvgpu_mem_alloc_tracker *kmallocs;
1063#endif
1058}; 1064};
1059 1065
1060static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) 1066static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
@@ -1131,6 +1137,7 @@ enum gk20a_dbg_categories {
1131 gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */ 1137 gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */
1132 gpu_dbg_xv = BIT(18), /* XVE debugging */ 1138 gpu_dbg_xv = BIT(18), /* XVE debugging */
1133 gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */ 1139 gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */
1140 gpu_dbg_kmem = BIT(20), /* Kmem tracking debugging */
1134 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ 1141 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
1135}; 1142};
1136 1143
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 36b85f3b..e695f02e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3424,7 +3424,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3424 gr->ctx_vars.local_golden_image = NULL; 3424 gr->ctx_vars.local_golden_image = NULL;
3425 3425
3426 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) 3426 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
3427 nvgpu_big_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); 3427 nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3428 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; 3428 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3429 3429
3430 gk20a_comptag_allocator_destroy(&gr->comp_tags); 3430 gk20a_comptag_allocator_destroy(&gr->comp_tags);
@@ -8055,7 +8055,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8055 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; 8055 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
8056 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); 8056 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
8057 8057
8058 map = nvgpu_big_zalloc(map_size); 8058 map = nvgpu_big_zalloc(g, map_size);
8059 if (!map) 8059 if (!map)
8060 return -ENOMEM; 8060 return -ENOMEM;
8061 8061
@@ -8145,7 +8145,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8145 return 0; 8145 return 0;
8146cleanup: 8146cleanup:
8147 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); 8147 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
8148 nvgpu_big_free(map); 8148 nvgpu_big_free(g, map);
8149 return -EINVAL; 8149 return -EINVAL;
8150} 8150}
8151 8151
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 7a64f79b..2ff54653 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1487,8 +1487,8 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
1487 1487
1488 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1488 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1489 1489
1490 buffer_list = nvgpu_big_zalloc(sizeof(*buffer_list) * 1490 buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) *
1491 vm->num_user_mapped_buffers); 1491 vm->num_user_mapped_buffers);
1492 if (!buffer_list) { 1492 if (!buffer_list) {
1493 nvgpu_mutex_release(&vm->update_gmmu_lock); 1493 nvgpu_mutex_release(&vm->update_gmmu_lock);
1494 return -ENOMEM; 1494 return -ENOMEM;
@@ -1572,7 +1572,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
1572 gk20a_vm_mapping_batch_finish_locked(vm, &batch); 1572 gk20a_vm_mapping_batch_finish_locked(vm, &batch);
1573 nvgpu_mutex_release(&vm->update_gmmu_lock); 1573 nvgpu_mutex_release(&vm->update_gmmu_lock);
1574 1574
1575 nvgpu_big_free(mapped_buffers); 1575 nvgpu_big_free(vm->mm->g, mapped_buffers);
1576} 1576}
1577 1577
1578static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, 1578static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/kmem.h b/drivers/gpu/nvgpu/include/nvgpu/kmem.h
index c08e40a6..59192525 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/kmem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/kmem.h
@@ -14,18 +14,21 @@
14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */ 15 */
16 16
17#ifndef NVGPU_KMEM_H 17#ifndef __NVGPU_KMEM_H__
18#define NVGPU_KMEM_H 18#define __NVGPU_KMEM_H__
19 19
20#include <linux/mm.h> 20/*
21#include <linux/slab.h> 21 * Incase this isn't defined already.
22#include <linux/vmalloc.h> 22 */
23 23#ifndef _THIS_IP_
24#include <asm/page.h> 24#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
25#endif
25 26
26struct gk20a; 27struct gk20a;
27 28
28/* 29/**
30 * DOC: Kmem cache support
31 *
29 * In Linux there is support for the notion of a kmem_cache. It gives better 32 * In Linux there is support for the notion of a kmem_cache. It gives better
30 * memory usage characteristics for lots of allocations of the same size. Think 33 * memory usage characteristics for lots of allocations of the same size. Think
31 * structs that get allocated over and over. Normal kmalloc() type routines 34 * structs that get allocated over and over. Normal kmalloc() type routines
@@ -37,26 +40,200 @@ struct gk20a;
37 */ 40 */
38struct nvgpu_kmem_cache; 41struct nvgpu_kmem_cache;
39 42
43#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
44/*
45 * Uncomment this if you want to enable stack traces in the memory profiling.
46 * Since this is a fairly high overhead operation and is only necessary for
47 * debugging actual bugs it's left here for developers to enable.
48 */
49/* #define __NVGPU_SAVE_KALLOC_STACK_TRACES */
50
51/*
52 * Defined per-OS.
53 */
54struct nvgpu_mem_alloc_tracker;
55#endif
56
57
58/**
59 * nvgpu_kmem_cache_create - create an nvgpu kernel memory cache.
60 *
61 * @g The GPU driver struct using this cache.
62 * @size Size of the object allocated by the cache.
63 *
64 * This cache can be used to allocate objects of size @size. Common usage would
65 * be for a struct that gets allocated a lot. In that case @size should be
66 * sizeof(struct my_struct).
67 *
68 * A given implementation of this need not do anything special. The allocation
69 * routines can simply be passed on to nvgpu_kzalloc() if desired so packing
70 * and alignment of the structs cannot be assumed.
71 */
40struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size); 72struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size);
73
74/**
75 * nvgpu_kmem_cache_destroy - destroy a cache created by
76 * nvgpu_kmem_cache_create().
77 *
78 * @cache The cache to destroy.
79 */
41void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache); 80void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache);
42 81
82/**
83 * nvgpu_kmem_cache_alloc - Allocate an object from the cache
84 *
85 * @cache The cache to alloc from.
86 */
43void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache); 87void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache);
88
89/**
90 * nvgpu_kmem_cache_free - Free an object back to a cache
91 *
92 * @cache The cache to return the object to.
93 * @ptr Pointer to the object to free.
94 */
44void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr); 95void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr);
45 96
46static inline void *__nvgpu_big_alloc(size_t size, bool clear) 97/**
98 * nvgpu_kmalloc - Allocate from the kernel's allocator.
99 *
100 * @g: Current GPU.
101 * @size: Size of the allocation.
102 *
103 * Allocate a chunk of system memory from the kernel. Allocations larger than 1
104 * page may fail even when there may appear to be enough memory.
105 *
106 * This function may sleep so cannot be used in IRQs.
107 */
108#define nvgpu_kmalloc(g, size) __nvgpu_kmalloc(g, size, _THIS_IP_)
109
110/**
111 * nvgpu_kzalloc - Allocate from the kernel's allocator.
112 *
113 * @g: Current GPU.
114 * @size: Size of the allocation.
115 *
116 * Identical to nvgpu_kalloc() except the memory will be zeroed before being
117 * returned.
118 */
119#define nvgpu_kzalloc(g, size) __nvgpu_kzalloc(g, size, _THIS_IP_)
120
121/**
122 * nvgpu_kcalloc - Allocate from the kernel's allocator.
123 *
124 * @g: Current GPU.
125 * @n: Number of objects.
126 * @size: Size of each object.
127 *
128 * Identical to nvgpu_kalloc() except the size of the memory chunk returned is
129 * @n * @size.
130 */
131#define nvgpu_kcalloc(g, n, size) __nvgpu_kcalloc(g, n, size, _THIS_IP_)
132
133/**
134 * nvgpu_vmalloc - Allocate memory and return a map to it.
135 *
136 * @g: Current GPU.
137 * @size: Size of the allocation.
138 *
139 * Allocate some memory and return a pointer to a virtual memory mapping of
140 * that memory in the kernel's virtual address space. The underlying physical
141 * memory is not guaranteed to be contiguous (and indeed likely isn't). This
142 * allows for much larger allocations to be done without worrying about as much
143 * about physical memory fragmentation.
144 *
145 * This function may sleep.
146 */
147#define nvgpu_vmalloc(g, size) __nvgpu_vmalloc(g, size, _THIS_IP_)
148
149/**
150 * nvgpu_vzalloc - Allocate memory and return a map to it.
151 *
152 * @g: Current GPU.
153 * @size: Size of the allocation.
154 *
155 * Identical to nvgpu_vmalloc() except this will return zero'ed memory.
156 */
157#define nvgpu_vzalloc(g, size) __nvgpu_vzalloc(g, size, _THIS_IP_)
158
159/**
160 * nvgpu_kfree - Frees an alloc from nvgpu_kmalloc, nvgpu_kzalloc,
161 * nvgpu_kcalloc.
162 *
163 * @g: Current GPU.
164 * @addr: Address of object to free.
165 */
166#define nvgpu_kfree(g, addr) __nvgpu_kfree(g, addr)
167
168/**
169 * nvgpu_vfree - Frees an alloc from nvgpu_vmalloc, nvgpu_vzalloc.
170 *
171 * @g: Current GPU.
172 * @addr: Address of object to free.
173 */
174#define nvgpu_vfree(g, addr) __nvgpu_vfree(g, addr)
175
176#define kmem_dbg(fmt, args...) \
177 gk20a_dbg(gpu_dbg_kmem, fmt, ##args)
178
179/**
180 * nvgpu_kmem_init - Initialize the kmem tracking stuff.
181 *
182 *@g: The driver to init.
183 *
184 * Returns non-zero on failure.
185 */
186int nvgpu_kmem_init(struct gk20a *g);
187
188/**
189 * nvgpu_kmem_fini - Finalize the kmem tracking code
190 *
191 * @g - The GPU.
192 * @flags - Flags that control operation of this finalization.
193 *
194 * Cleanup resources used by nvgpu_kmem. Available flags for cleanup are:
195 *
196 * %NVGPU_KMEM_FINI_DO_NOTHING
197 * %NVGPU_KMEM_FINI_FORCE_CLEANUP
198 * %NVGPU_KMEM_FINI_DUMP_ALLOCS
199 * %NVGPU_KMEM_FINI_WARN
200 * %NVGPU_KMEM_FINI_BUG
201 *
202 * %NVGPU_KMEM_FINI_DO_NOTHING will be overridden by anything else specified.
203 * Put another way don't just add %NVGPU_KMEM_FINI_DO_NOTHING and expect that
204 * to suppress other flags from doing anything.
205 */
206void nvgpu_kmem_fini(struct gk20a *g, int flags);
207
208/*
209 * These will simply be ignored if CONFIG_NVGPU_TRACK_MEM_USAGE is not defined.
210 */
211#define NVGPU_KMEM_FINI_DO_NOTHING 0
212#define NVGPU_KMEM_FINI_FORCE_CLEANUP (1 << 0)
213#define NVGPU_KMEM_FINI_DUMP_ALLOCS (1 << 1)
214#define NVGPU_KMEM_FINI_WARN (1 << 2)
215#define NVGPU_KMEM_FINI_BUG (1 << 3)
216
217/*
218 * When there's other implementations make sure they are included instead of
219 * Linux when not compiling on Linux!
220 */
221#include <nvgpu/kmem_linux.h>
222
223static inline void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
47{ 224{
48 void *p; 225 void *p;
49 226
50 if (size > PAGE_SIZE) { 227 if (size > PAGE_SIZE) {
51 if (clear) 228 if (clear)
52 p = vzalloc(size); 229 p = nvgpu_vzalloc(g, size);
53 else 230 else
54 p = vmalloc(size); 231 p = nvgpu_vmalloc(g, size);
55 } else { 232 } else {
56 if (clear) 233 if (clear)
57 p = kzalloc(size, GFP_KERNEL); 234 p = nvgpu_kzalloc(g, size);
58 else 235 else
59 p = kmalloc(size, GFP_KERNEL); 236 p = nvgpu_kmalloc(g, size);
60 } 237 }
61 238
62 return p; 239 return p;
@@ -65,6 +242,7 @@ static inline void *__nvgpu_big_alloc(size_t size, bool clear)
65/** 242/**
66 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size 243 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size
67 * 244 *
245 * @g - The GPU.
68 * @size - Size of the allocation. 246 * @size - Size of the allocation.
69 * 247 *
70 * On some platforms (i.e Linux) it is possible to allocate memory directly 248 * On some platforms (i.e Linux) it is possible to allocate memory directly
@@ -83,30 +261,31 @@ static inline void *__nvgpu_big_alloc(size_t size, bool clear)
83 * Returns a pointer to a virtual address range that the kernel can access or 261 * Returns a pointer to a virtual address range that the kernel can access or
84 * %NULL on failure. 262 * %NULL on failure.
85 */ 263 */
86static inline void *nvgpu_big_malloc(size_t size) 264static inline void *nvgpu_big_malloc(struct gk20a *g, size_t size)
87{ 265{
88 return __nvgpu_big_alloc(size, false); 266 return __nvgpu_big_alloc(g, size, false);
89} 267}
90 268
91/** 269/**
92 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size 270 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size
93 * 271 *
272 * @g - The GPU.
94 * @size - Size of the allocation. 273 * @size - Size of the allocation.
95 * 274 *
96 * Zeroed memory version of nvgpu_big_malloc(). 275 * Zeroed memory version of nvgpu_big_malloc().
97 */ 276 */
98static inline void *nvgpu_big_zalloc(size_t size) 277static inline void *nvgpu_big_zalloc(struct gk20a *g, size_t size)
99{ 278{
100 return __nvgpu_big_alloc(size, true); 279 return __nvgpu_big_alloc(g, size, true);
101} 280}
102 281
103/** 282/**
104 * nvgpu_big_free - Free and alloc from nvgpu_big_zalloc() or 283 * nvgpu_big_free - Free and alloc from nvgpu_big_zalloc() or
105 * nvgpu_big_malloc(). 284 * nvgpu_big_malloc().
106 * 285 * @g - The GPU.
107 * @p - A pointer allocated by nvgpu_big_zalloc() or nvgpu_big_malloc(). 286 * @p - A pointer allocated by nvgpu_big_zalloc() or nvgpu_big_malloc().
108 */ 287 */
109static inline void nvgpu_big_free(void *p) 288static inline void nvgpu_big_free(struct gk20a *g, void *p)
110{ 289{
111 /* 290 /*
112 * This will have to be fixed eventually. Allocs that use 291 * This will have to be fixed eventually. Allocs that use
@@ -114,9 +293,9 @@ static inline void nvgpu_big_free(void *p)
114 * when freeing. 293 * when freeing.
115 */ 294 */
116 if (virt_addr_valid(p)) 295 if (virt_addr_valid(p))
117 kfree(p); 296 nvgpu_kfree(g, p);
118 else 297 else
119 vfree(p); 298 nvgpu_vfree(g, p);
120} 299}
121 300
122#endif 301#endif /* __NVGPU_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h b/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h
new file mode 100644
index 00000000..d1cd27f3
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/kmem_linux.h
@@ -0,0 +1,123 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_KMEM_LINUX_H__
18#define __NVGPU_KMEM_LINUX_H__
19
20#include <linux/mm.h>
21#include <linux/slab.h>
22#include <linux/vmalloc.h>
23
24#include <asm/page.h>
25
26struct gk20a;
27struct device;
28
29#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
30void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
31 unsigned long ip);
32void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
33 unsigned long ip);
34void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip);
35void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip);
36void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
37 unsigned long ip);
38void __nvgpu_track_vfree(struct gk20a *g, void *addr);
39void __nvgpu_track_kfree(struct gk20a *g, void *addr);
40
41void nvgpu_kmem_debugfs_init(struct device *dev);
42#else
43static inline void nvgpu_kmem_debugfs_init(struct device *dev)
44{
45}
46#endif
47
48/**
49 * DOC: Linux pass through kmem implementation.
50 *
51 * These are the Linux implementations of the various kmem functions defined by
52 * nvgpu. This should not be included directly - instead include <nvgpu/kmem.h>.
53 */
54
55static inline void *__nvgpu_kmalloc(struct gk20a *g, unsigned long size,
56 unsigned long ip)
57{
58#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
59 return __nvgpu_track_vmalloc(g, size, ip);
60#else
61 return kmalloc(size, GFP_KERNEL);
62#endif
63}
64
65static inline void *__nvgpu_kzalloc(struct gk20a *g, size_t size,
66 unsigned long ip)
67{
68#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
69 return __nvgpu_track_kzalloc(g, size, ip);
70#else
71 return kzalloc(size, GFP_KERNEL);
72#endif
73}
74
75static inline void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size,
76 unsigned long ip)
77{
78#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
79 return __nvgpu_track_kcalloc(g, n, size, ip);
80#else
81 return kcalloc(n, size, GFP_KERNEL);
82#endif
83}
84
85static inline void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size,
86 unsigned long ip)
87{
88#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
89 return __nvgpu_track_vmalloc(g, size, ip);
90#else
91 return vmalloc(size);
92#endif
93}
94
95static inline void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size,
96 unsigned long ip)
97{
98#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
99 return __nvgpu_track_vzalloc(g, size, ip);
100#else
101 return vzalloc(size);
102#endif
103}
104
105static inline void __nvgpu_kfree(struct gk20a *g, void *addr)
106{
107#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
108 __nvgpu_track_kfree(g, addr);
109#else
110 kfree(addr);
111#endif
112}
113
114static inline void __nvgpu_vfree(struct gk20a *g, void *addr)
115{
116#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
117 __nvgpu_track_vfree(g, addr);
118#else
119 vfree(addr);
120#endif
121}
122
123#endif
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c
index 3677b02d..39559dac 100644
--- a/drivers/gpu/nvgpu/pci.c
+++ b/drivers/gpu/nvgpu/pci.c
@@ -19,6 +19,7 @@
19#include <linux/pm_runtime.h> 19#include <linux/pm_runtime.h>
20 20
21#include <nvgpu/nvgpu_common.h> 21#include <nvgpu/nvgpu_common.h>
22#include <nvgpu/kmem.h>
22 23
23#include "gk20a/gk20a.h" 24#include "gk20a/gk20a.h"
24#include "gk20a/platform_gk20a.h" 25#include "gk20a/platform_gk20a.h"
@@ -358,6 +359,8 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
358 platform->g = g; 359 platform->g = g;
359 g->dev = &pdev->dev; 360 g->dev = &pdev->dev;
360 361
362 nvgpu_kmem_init(g);
363
361 err = pci_enable_device(pdev); 364 err = pci_enable_device(pdev);
362 if (err) 365 if (err)
363 return err; 366 return err;
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index d8e0dfa1..37b4633b 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -19,6 +19,8 @@
19#include <linux/pm_runtime.h> 19#include <linux/pm_runtime.h>
20#include <linux/pm_qos.h> 20#include <linux/pm_qos.h>
21 21
22#include <nvgpu/kmem.h>
23
22#include "vgpu/vgpu.h" 24#include "vgpu/vgpu.h"
23#include "vgpu/fecs_trace_vgpu.h" 25#include "vgpu/fecs_trace_vgpu.h"
24#include "gk20a/debug_gk20a.h" 26#include "gk20a/debug_gk20a.h"
@@ -562,6 +564,8 @@ int vgpu_probe(struct platform_device *pdev)
562 platform->vgpu_priv = priv; 564 platform->vgpu_priv = priv;
563 gk20a->dev = dev; 565 gk20a->dev = dev;
564 566
567 nvgpu_kmem_init(gk20a);
568
565 err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); 569 err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
566 if (err) 570 if (err)
567 return err; 571 return err;