summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h70
-rw-r--r--drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h192
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c16
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.c211
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h302
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c442
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c1327
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c206
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c936
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h121
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c114
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h23
-rw-r--r--drivers/gpu/nvgpu/gk20a/page_allocator_priv.h164
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c26
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h3
24 files changed, 114 insertions, 4082 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 0b90090a..07601d42 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -279,17 +279,17 @@ static int gk20a_as_ioctl_get_va_regions(
279 279
280 for (i = 0; i < write_entries; ++i) { 280 for (i = 0; i < write_entries; ++i) {
281 struct nvgpu_as_va_region region; 281 struct nvgpu_as_va_region region;
282 struct gk20a_allocator *vma = 282 struct nvgpu_allocator *vma =
283 gk20a_alloc_initialized(&vm->fixed) ? 283 nvgpu_alloc_initialized(&vm->fixed) ?
284 &vm->fixed : &vm->vma[i]; 284 &vm->fixed : &vm->vma[i];
285 285
286 memset(&region, 0, sizeof(struct nvgpu_as_va_region)); 286 memset(&region, 0, sizeof(struct nvgpu_as_va_region));
287 287
288 region.page_size = vm->gmmu_page_sizes[i]; 288 region.page_size = vm->gmmu_page_sizes[i];
289 region.offset = gk20a_alloc_base(vma); 289 region.offset = nvgpu_alloc_base(vma);
290 /* No __aeabi_uldivmod() on some platforms... */ 290 /* No __aeabi_uldivmod() on some platforms... */
291 region.pages = (gk20a_alloc_end(vma) - 291 region.pages = (nvgpu_alloc_end(vma) -
292 gk20a_alloc_base(vma)) >> ilog2(region.page_size); 292 nvgpu_alloc_base(vma)) >> ilog2(region.page_size);
293 293
294 if (copy_to_user(user_region_ptr + i, &region, sizeof(region))) 294 if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
295 return -EFAULT; 295 return -EFAULT;
diff --git a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h
deleted file mode 100644
index a686b704..00000000
--- a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef BITMAP_ALLOCATOR_PRIV_H
18#define BITMAP_ALLOCATOR_PRIV_H
19
20#include <linux/rbtree.h>
21
22struct gk20a_allocator;
23
24struct gk20a_bitmap_allocator {
25 struct gk20a_allocator *owner;
26
27 u64 base; /* Base address of the space. */
28 u64 length; /* Length of the space. */
29 u64 blk_size; /* Size that corresponds to 1 bit. */
30 u64 blk_shift; /* Bit shift to divide by blk_size. */
31 u64 num_bits; /* Number of allocatable bits. */
32 u64 bit_offs; /* Offset of bitmap. */
33
34 /*
35 * Optimization for making repeated allocations faster. Keep track of
36 * the next bit after the most recent allocation. This is where the next
37 * search will start from. This should make allocation faster in cases
38 * where lots of allocations get made one after another. It shouldn't
39 * have a negative impact on the case where the allocator is fragmented.
40 */
41 u64 next_blk;
42
43 unsigned long *bitmap; /* The actual bitmap! */
44 struct rb_root allocs; /* Tree of outstanding allocations. */
45
46 u64 flags;
47
48 bool inited;
49
50 /* Statistics */
51 u64 nr_allocs;
52 u64 nr_fixed_allocs;
53 u64 bytes_alloced;
54 u64 bytes_freed;
55};
56
57struct gk20a_bitmap_alloc {
58 u64 base;
59 u64 length;
60 struct rb_node alloc_entry; /* RB tree of allocations. */
61};
62
63static inline struct gk20a_bitmap_allocator *bitmap_allocator(
64 struct gk20a_allocator *a)
65{
66 return (struct gk20a_bitmap_allocator *)(a)->priv;
67}
68
69
70#endif
diff --git a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h
deleted file mode 100644
index bb8b307b..00000000
--- a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h
+++ /dev/null
@@ -1,192 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef BUDDY_ALLOCATOR_PRIV_H
18#define BUDDY_ALLOCATOR_PRIV_H
19
20#include <linux/list.h>
21#include <linux/rbtree.h>
22
23struct gk20a_allocator;
24struct vm_gk20a;
25
26/*
27 * Each buddy is an element in a binary tree.
28 */
29struct gk20a_buddy {
30 struct gk20a_buddy *parent; /* Parent node. */
31 struct gk20a_buddy *buddy; /* This node's buddy. */
32 struct gk20a_buddy *left; /* Lower address sub-node. */
33 struct gk20a_buddy *right; /* Higher address sub-node. */
34
35 struct list_head buddy_entry; /* List entry for various lists. */
36 struct rb_node alloced_entry; /* RB tree of allocations. */
37
38 u64 start; /* Start address of this buddy. */
39 u64 end; /* End address of this buddy. */
40 u64 order; /* Buddy order. */
41
42#define BALLOC_BUDDY_ALLOCED 0x1
43#define BALLOC_BUDDY_SPLIT 0x2
44#define BALLOC_BUDDY_IN_LIST 0x4
45 int flags; /* List of associated flags. */
46
47 /*
48 * Size of the PDE this buddy is using. This allows for grouping like
49 * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a
50 * enum except for the BALLOC_PTE_SIZE_ANY specifier.
51 */
52#define BALLOC_PTE_SIZE_ANY -1
53 int pte_size;
54};
55
56#define __buddy_flag_ops(flag, flag_up) \
57 static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \
58 { \
59 return b->flags & BALLOC_BUDDY_ ## flag_up; \
60 } \
61 static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \
62 { \
63 b->flags |= BALLOC_BUDDY_ ## flag_up; \
64 } \
65 static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \
66 { \
67 b->flags &= ~BALLOC_BUDDY_ ## flag_up; \
68 }
69
70/*
71 * int buddy_is_alloced(struct gk20a_buddy *b);
72 * void buddy_set_alloced(struct gk20a_buddy *b);
73 * void buddy_clr_alloced(struct gk20a_buddy *b);
74 *
75 * int buddy_is_split(struct gk20a_buddy *b);
76 * void buddy_set_split(struct gk20a_buddy *b);
77 * void buddy_clr_split(struct gk20a_buddy *b);
78 *
79 * int buddy_is_in_list(struct gk20a_buddy *b);
80 * void buddy_set_in_list(struct gk20a_buddy *b);
81 * void buddy_clr_in_list(struct gk20a_buddy *b);
82 */
83__buddy_flag_ops(alloced, ALLOCED);
84__buddy_flag_ops(split, SPLIT);
85__buddy_flag_ops(in_list, IN_LIST);
86
87/*
88 * Keeps info for a fixed allocation.
89 */
90struct gk20a_fixed_alloc {
91 struct list_head buddies; /* List of buddies. */
92 struct rb_node alloced_entry; /* RB tree of fixed allocations. */
93
94 u64 start; /* Start of fixed block. */
95 u64 end; /* End address. */
96};
97
98/*
99 * GPU buddy allocator for the various GPU address spaces. Each addressable unit
100 * doesn't have to correspond to a byte. In some cases each unit is a more
101 * complex object such as a comp_tag line or the like.
102 *
103 * The max order is computed based on the size of the minimum order and the size
104 * of the address space.
105 *
106 * order_size is the size of an order 0 buddy.
107 */
108struct gk20a_buddy_allocator {
109 struct gk20a_allocator *owner; /* Owner of this buddy allocator. */
110 struct vm_gk20a *vm; /* Parent VM - can be NULL. */
111
112 u64 base; /* Base address of the space. */
113 u64 length; /* Length of the space. */
114 u64 blk_size; /* Size of order 0 allocation. */
115 u64 blk_shift; /* Shift to divide by blk_size. */
116
117 /* Internal stuff. */
118 u64 start; /* Real start (aligned to blk_size). */
119 u64 end; /* Real end, trimmed if needed. */
120 u64 count; /* Count of objects in space. */
121 u64 blks; /* Count of blks in the space. */
122 u64 max_order; /* Specific maximum order. */
123
124 struct rb_root alloced_buddies; /* Outstanding allocations. */
125 struct rb_root fixed_allocs; /* Outstanding fixed allocations. */
126
127 struct list_head co_list;
128
129 /*
130 * Impose an upper bound on the maximum order.
131 */
132#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1)
133
134 struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
135 u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
136 u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
137 u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
138
139 /*
140 * This is for when the allocator is managing a GVA space (the
141 * GPU_ALLOC_GVA_SPACE bit is set in @flags). This requires
142 * that we group like sized allocations into PDE blocks.
143 */
144 u64 pte_blk_order;
145
146 int initialized;
147 int alloc_made; /* True after the first alloc. */
148
149 u64 flags;
150
151 u64 bytes_alloced;
152 u64 bytes_alloced_real;
153 u64 bytes_freed;
154};
155
156static inline struct gk20a_buddy_allocator *buddy_allocator(
157 struct gk20a_allocator *a)
158{
159 return (struct gk20a_buddy_allocator *)(a)->priv;
160}
161
162static inline struct list_head *balloc_get_order_list(
163 struct gk20a_buddy_allocator *a, int order)
164{
165 return &a->buddy_list[order];
166}
167
168static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a,
169 int order)
170{
171 return (1 << order) * a->blk_size;
172}
173
174static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a,
175 u64 base)
176{
177 return base - a->start;
178}
179
180static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a,
181 u64 base)
182{
183 return base + a->start;
184}
185
186static inline struct gk20a_allocator *balloc_owner(
187 struct gk20a_buddy_allocator *a)
188{
189 return a->owner;
190}
191
192#endif
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 136c28d0..be01e0e9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -976,7 +976,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
976 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); 976 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
977 977
978 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); 978 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
979 nvgpu_free(ch->gpfifo.pipe); 979 nvgpu_kfree(ch->gpfifo.pipe);
980 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); 980 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
981 981
982#if defined(CONFIG_GK20A_CYCLE_STATS) 982#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1778,7 +1778,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1778 } 1778 }
1779 1779
1780 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 1780 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
1781 c->gpfifo.pipe = nvgpu_alloc( 1781 c->gpfifo.pipe = nvgpu_kalloc(
1782 gpfifo_size * sizeof(struct nvgpu_gpfifo), 1782 gpfifo_size * sizeof(struct nvgpu_gpfifo),
1783 false); 1783 false);
1784 if (!c->gpfifo.pipe) { 1784 if (!c->gpfifo.pipe) {
@@ -1850,7 +1850,7 @@ clean_up_sync:
1850 c->sync = NULL; 1850 c->sync = NULL;
1851 } 1851 }
1852clean_up_unmap: 1852clean_up_unmap:
1853 nvgpu_free(c->gpfifo.pipe); 1853 nvgpu_kfree(c->gpfifo.pipe);
1854 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1854 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1855clean_up: 1855clean_up:
1856 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); 1856 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -1980,12 +1980,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1980 if (!g) { 1980 if (!g) {
1981 size = count * sizeof(struct nvgpu_gpfifo); 1981 size = count * sizeof(struct nvgpu_gpfifo);
1982 if (size) { 1982 if (size) {
1983 g = nvgpu_alloc(size, false); 1983 g = nvgpu_kalloc(size, false);
1984 if (!g) 1984 if (!g)
1985 return; 1985 return;
1986 1986
1987 if (copy_from_user(g, user_gpfifo, size)) { 1987 if (copy_from_user(g, user_gpfifo, size)) {
1988 nvgpu_free(g); 1988 nvgpu_kfree(g);
1989 return; 1989 return;
1990 } 1990 }
1991 } 1991 }
@@ -1997,7 +1997,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1997 trace_write_pushbuffer(c, gp); 1997 trace_write_pushbuffer(c, gp);
1998 1998
1999 if (gpfifo_allocated) 1999 if (gpfifo_allocated)
2000 nvgpu_free(g); 2000 nvgpu_kfree(g);
2001} 2001}
2002 2002
2003static void gk20a_channel_timeout_start(struct channel_gk20a *ch, 2003static void gk20a_channel_timeout_start(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 0a0d94b7..697d1603 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -143,7 +143,7 @@ struct channel_gk20a {
143 struct list_head ch_entry; /* channel's entry in TSG */ 143 struct list_head ch_entry; /* channel's entry in TSG */
144 144
145 struct channel_gk20a_joblist joblist; 145 struct channel_gk20a_joblist joblist;
146 struct gk20a_allocator fence_allocator; 146 struct nvgpu_allocator fence_allocator;
147 147
148 struct vm_gk20a *vm; 148 struct vm_gk20a *vm;
149 149
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index e5529295..ac96036f 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -815,7 +815,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
815 goto fail_dmabuf_put; 815 goto fail_dmabuf_put;
816 } 816 }
817 817
818 buffer = nvgpu_alloc(access_limit_size, true); 818 buffer = nvgpu_kalloc(access_limit_size, true);
819 if (!buffer) { 819 if (!buffer) {
820 err = -ENOMEM; 820 err = -ENOMEM;
821 goto fail_dmabuf_put; 821 goto fail_dmabuf_put;
@@ -861,7 +861,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
861fail_idle: 861fail_idle:
862 gk20a_idle(g->dev); 862 gk20a_idle(g->dev);
863fail_free_buffer: 863fail_free_buffer:
864 nvgpu_free(buffer); 864 nvgpu_kfree(buffer);
865fail_dmabuf_put: 865fail_dmabuf_put:
866 dma_buf_put(dmabuf); 866 dma_buf_put(dmabuf);
867 867
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index b84db933..8fa108c2 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -477,7 +477,7 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
477 gk20a_railgating_debugfs_init(g->dev); 477 gk20a_railgating_debugfs_init(g->dev);
478 gk20a_cde_debugfs_init(g->dev); 478 gk20a_cde_debugfs_init(g->dev);
479 gk20a_ce_debugfs_init(g->dev); 479 gk20a_ce_debugfs_init(g->dev);
480 gk20a_alloc_debugfs_init(g->dev); 480 nvgpu_alloc_debugfs_init(g->dev);
481 gk20a_mm_debugfs_init(g->dev); 481 gk20a_mm_debugfs_init(g->dev);
482 gk20a_fifo_debugfs_init(g->dev); 482 gk20a_fifo_debugfs_init(g->dev);
483 gk20a_sched_debugfs_init(g->dev); 483 gk20a_sched_debugfs_init(g->dev);
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 323caa8f..b8a1dcbc 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -49,8 +49,8 @@ static void gk20a_fence_free(struct kref *ref)
49 gk20a_semaphore_put(f->semaphore); 49 gk20a_semaphore_put(f->semaphore);
50 50
51 if (f->allocator) { 51 if (f->allocator) {
52 if (gk20a_alloc_initialized(f->allocator)) 52 if (nvgpu_alloc_initialized(f->allocator))
53 gk20a_free(f->allocator, (size_t)f); 53 nvgpu_free(f->allocator, (size_t)f);
54 } else 54 } else
55 kfree(f); 55 kfree(f);
56} 56}
@@ -129,7 +129,7 @@ int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count)
129 if (!fence_pool) 129 if (!fence_pool)
130 return -ENOMEM; 130 return -ENOMEM;
131 131
132 err = gk20a_lockless_allocator_init(c->g, &c->fence_allocator, 132 err = nvgpu_lockless_allocator_init(c->g, &c->fence_allocator,
133 "fence_pool", (size_t)fence_pool, size, 133 "fence_pool", (size_t)fence_pool, size,
134 sizeof(struct gk20a_fence), 0); 134 sizeof(struct gk20a_fence), 0);
135 if (err) 135 if (err)
@@ -144,11 +144,11 @@ fail:
144 144
145void gk20a_free_fence_pool(struct channel_gk20a *c) 145void gk20a_free_fence_pool(struct channel_gk20a *c)
146{ 146{
147 if (gk20a_alloc_initialized(&c->fence_allocator)) { 147 if (nvgpu_alloc_initialized(&c->fence_allocator)) {
148 void *base = (void *)(uintptr_t) 148 void *base = (void *)(uintptr_t)
149 gk20a_alloc_base(&c->fence_allocator); 149 nvgpu_alloc_base(&c->fence_allocator);
150 150
151 gk20a_alloc_destroy(&c->fence_allocator); 151 nvgpu_alloc_destroy(&c->fence_allocator);
152 vfree(base); 152 vfree(base);
153 } 153 }
154} 154}
@@ -158,9 +158,9 @@ struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
158 struct gk20a_fence *fence = NULL; 158 struct gk20a_fence *fence = NULL;
159 159
160 if (channel_gk20a_is_prealloc_enabled(c)) { 160 if (channel_gk20a_is_prealloc_enabled(c)) {
161 if (gk20a_alloc_initialized(&c->fence_allocator)) { 161 if (nvgpu_alloc_initialized(&c->fence_allocator)) {
162 fence = (struct gk20a_fence *)(uintptr_t) 162 fence = (struct gk20a_fence *)(uintptr_t)
163 gk20a_alloc(&c->fence_allocator, 163 nvgpu_alloc(&c->fence_allocator,
164 sizeof(struct gk20a_fence)); 164 sizeof(struct gk20a_fence));
165 165
166 /* clear the node and reset the allocator pointer */ 166 /* clear the node and reset the allocator pointer */
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index beba761a..f38fcbe7 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -47,7 +47,7 @@ struct gk20a_fence {
47 u32 syncpt_value; 47 u32 syncpt_value;
48 48
49 /* Valid for fences part of a pre-allocated fence pool */ 49 /* Valid for fences part of a pre-allocated fence pool */
50 struct gk20a_allocator *allocator; 50 struct nvgpu_allocator *allocator;
51}; 51};
52 52
53/* Fences can be created from semaphores or syncpoint (id, value) pairs */ 53/* Fences can be created from semaphores or syncpoint (id, value) pairs */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index b1e90bd8..753f031a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -43,6 +43,8 @@
43#include <linux/sched.h> 43#include <linux/sched.h>
44#include <linux/version.h> 44#include <linux/version.h>
45 45
46#include <nvgpu/allocator.h>
47
46#include "gk20a.h" 48#include "gk20a.h"
47#include "nvgpu_common.h" 49#include "nvgpu_common.h"
48#include "debug_gk20a.h" 50#include "debug_gk20a.h"
@@ -60,7 +62,6 @@
60#include "gk20a_scale.h" 62#include "gk20a_scale.h"
61#include "ctxsw_trace_gk20a.h" 63#include "ctxsw_trace_gk20a.h"
62#include "dbg_gpu_gk20a.h" 64#include "dbg_gpu_gk20a.h"
63#include "gk20a_allocator.h"
64#include "hal.h" 65#include "hal.h"
65#include "vgpu/vgpu.h" 66#include "vgpu/vgpu.h"
66#include "pci.h" 67#include "pci.h"
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
deleted file mode 100644
index 3129b07c..00000000
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
+++ /dev/null
@@ -1,211 +0,0 @@
1/*
2 * gk20a allocator
3 *
4 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/kernel.h>
20#include <linux/slab.h>
21
22#include "gk20a.h"
23#include "mm_gk20a.h"
24#include "platform_gk20a.h"
25#include "gk20a_allocator.h"
26
27u32 gk20a_alloc_tracing_on;
28
29u64 gk20a_alloc_length(struct gk20a_allocator *a)
30{
31 if (a->ops->length)
32 return a->ops->length(a);
33
34 return 0;
35}
36
37u64 gk20a_alloc_base(struct gk20a_allocator *a)
38{
39 if (a->ops->base)
40 return a->ops->base(a);
41
42 return 0;
43}
44
45u64 gk20a_alloc_initialized(struct gk20a_allocator *a)
46{
47 if (!a->ops || !a->ops->inited)
48 return 0;
49
50 return a->ops->inited(a);
51}
52
53u64 gk20a_alloc_end(struct gk20a_allocator *a)
54{
55 if (a->ops->end)
56 return a->ops->end(a);
57
58 return 0;
59}
60
61u64 gk20a_alloc_space(struct gk20a_allocator *a)
62{
63 if (a->ops->space)
64 return a->ops->space(a);
65
66 return 0;
67}
68
69u64 gk20a_alloc(struct gk20a_allocator *a, u64 len)
70{
71 return a->ops->alloc(a, len);
72}
73
74void gk20a_free(struct gk20a_allocator *a, u64 addr)
75{
76 a->ops->free(a, addr);
77}
78
79u64 gk20a_alloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
80{
81 if (a->ops->alloc_fixed)
82 return a->ops->alloc_fixed(a, base, len);
83
84 return 0;
85}
86
87void gk20a_free_fixed(struct gk20a_allocator *a, u64 base, u64 len)
88{
89 /*
90 * If this operation is not defined for the allocator then just do
91 * nothing. The alternative would be to fall back on the regular
92 * free but that may be harmful in unexpected ways.
93 */
94 if (a->ops->free_fixed)
95 a->ops->free_fixed(a, base, len);
96}
97
98int gk20a_alloc_reserve_carveout(struct gk20a_allocator *a,
99 struct gk20a_alloc_carveout *co)
100{
101 if (a->ops->reserve_carveout)
102 return a->ops->reserve_carveout(a, co);
103
104 return -ENODEV;
105}
106
107void gk20a_alloc_release_carveout(struct gk20a_allocator *a,
108 struct gk20a_alloc_carveout *co)
109{
110 if (a->ops->release_carveout)
111 a->ops->release_carveout(a, co);
112}
113
114void gk20a_alloc_destroy(struct gk20a_allocator *a)
115{
116 a->ops->fini(a);
117 memset(a, 0, sizeof(*a));
118}
119
120/*
121 * Handle the common init stuff for a gk20a_allocator.
122 */
123int __gk20a_alloc_common_init(struct gk20a_allocator *a,
124 const char *name, void *priv, bool dbg,
125 const struct gk20a_allocator_ops *ops)
126{
127 if (!ops)
128 return -EINVAL;
129
130 /*
131 * This is the bare minimum operations required for a sensible
132 * allocator.
133 */
134 if (!ops->alloc || !ops->free || !ops->fini)
135 return -EINVAL;
136
137 a->ops = ops;
138 a->priv = priv;
139 a->debug = dbg;
140
141 mutex_init(&a->lock);
142
143 strlcpy(a->name, name, sizeof(a->name));
144
145 return 0;
146}
147
148void gk20a_alloc_print_stats(struct gk20a_allocator *__a,
149 struct seq_file *s, int lock)
150{
151 __a->ops->print_stats(__a, s, lock);
152}
153
154#ifdef CONFIG_DEBUG_FS
155static int __alloc_show(struct seq_file *s, void *unused)
156{
157 struct gk20a_allocator *a = s->private;
158
159 gk20a_alloc_print_stats(a, s, 1);
160
161 return 0;
162}
163
164static int __alloc_open(struct inode *inode, struct file *file)
165{
166 return single_open(file, __alloc_show, inode->i_private);
167}
168
169static const struct file_operations __alloc_fops = {
170 .open = __alloc_open,
171 .read = seq_read,
172 .llseek = seq_lseek,
173 .release = single_release,
174};
175#endif
176
177void gk20a_init_alloc_debug(struct gk20a *g, struct gk20a_allocator *a)
178{
179#ifdef CONFIG_DEBUG_FS
180 if (!g->debugfs_allocators)
181 return;
182
183 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
184 g->debugfs_allocators,
185 a, &__alloc_fops);
186#endif
187}
188
189void gk20a_fini_alloc_debug(struct gk20a_allocator *a)
190{
191#ifdef CONFIG_DEBUG_FS
192 if (!IS_ERR_OR_NULL(a->debugfs_entry))
193 debugfs_remove(a->debugfs_entry);
194#endif
195}
196
197void gk20a_alloc_debugfs_init(struct device *dev)
198{
199#ifdef CONFIG_DEBUG_FS
200 struct gk20a_platform *platform = dev_get_drvdata(dev);
201 struct dentry *gpu_root = platform->debugfs;
202 struct gk20a *g = get_gk20a(dev);
203
204 g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root);
205 if (IS_ERR_OR_NULL(g->debugfs_allocators))
206 return;
207
208 debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
209 &gk20a_alloc_tracing_on);
210#endif
211}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
deleted file mode 100644
index b12926b3..00000000
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ /dev/null
@@ -1,302 +0,0 @@
1/*
2 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef GK20A_ALLOCATOR_H
18#define GK20A_ALLOCATOR_H
19
20#include <linux/debugfs.h>
21#include <linux/seq_file.h>
22#include <linux/platform_device.h>
23
24/* #define ALLOCATOR_DEBUG */
25
26struct gk20a_allocator;
27struct gk20a_alloc_carveout;
28struct vm_gk20a;
29struct gk20a;
30
31/*
32 * Operations for an allocator to implement.
33 */
34struct gk20a_allocator_ops {
35 u64 (*alloc)(struct gk20a_allocator *allocator, u64 len);
36 void (*free)(struct gk20a_allocator *allocator, u64 addr);
37
38 /*
39 * Special interface to allocate a memory region with a specific
40 * starting address. Yikes. Note: if free() works for freeing both
41 * regular and fixed allocations then free_fixed() does not need to
42 * be implemented. This behavior exists for legacy reasons and should
43 * not be propagated to new allocators.
44 */
45 u64 (*alloc_fixed)(struct gk20a_allocator *allocator,
46 u64 base, u64 len);
47 void (*free_fixed)(struct gk20a_allocator *allocator,
48 u64 base, u64 len);
49
50 /*
51 * Allow allocators to reserve space for carveouts.
52 */
53 int (*reserve_carveout)(struct gk20a_allocator *allocator,
54 struct gk20a_alloc_carveout *co);
55 void (*release_carveout)(struct gk20a_allocator *allocator,
56 struct gk20a_alloc_carveout *co);
57
58 /*
59 * Returns info about the allocator.
60 */
61 u64 (*base)(struct gk20a_allocator *allocator);
62 u64 (*length)(struct gk20a_allocator *allocator);
63 u64 (*end)(struct gk20a_allocator *allocator);
64 int (*inited)(struct gk20a_allocator *allocator);
65 u64 (*space)(struct gk20a_allocator *allocator);
66
67 /* Destructor. */
68 void (*fini)(struct gk20a_allocator *allocator);
69
70 /* Debugging. */
71 void (*print_stats)(struct gk20a_allocator *allocator,
72 struct seq_file *s, int lock);
73};
74
75struct gk20a_allocator {
76 char name[32];
77 struct mutex lock;
78
79 void *priv;
80 const struct gk20a_allocator_ops *ops;
81
82 struct dentry *debugfs_entry;
83 bool debug; /* Control for debug msgs. */
84};
85
86struct gk20a_alloc_carveout {
87 const char *name;
88 u64 base;
89 u64 length;
90
91 struct gk20a_allocator *allocator;
92
93 /*
94 * For usage by the allocator implementation.
95 */
96 struct list_head co_entry;
97};
98
99#define GK20A_CARVEOUT(__name, __base, __length) \
100 { \
101 .name = (__name), \
102 .base = (__base), \
103 .length = (__length) \
104 }
105
106/*
107 * These are the available allocator flags.
108 *
109 * GPU_ALLOC_GVA_SPACE
110 *
111 * This flag makes sense for the buddy allocator only. It specifies that the
112 * allocator will be used for managing a GVA space. When managing GVA spaces
113 * special care has to be taken to ensure that allocations of similar PTE
114 * sizes are placed in the same PDE block. This allows the higher level
115 * code to skip defining both small and large PTE tables for every PDE. That
116 * can save considerable memory for address spaces that have a lot of
117 * allocations.
118 *
119 * GPU_ALLOC_NO_ALLOC_PAGE
120 *
121 * For any allocator that needs to manage a resource in a latency critical
122 * path this flag specifies that the allocator should not use any kmalloc()
123 * or similar functions during normal operation. Initialization routines
124 * may still use kmalloc(). This prevents the possibility of long waits for
125 * pages when using alloc_page(). Currently only the bitmap allocator
126 * implements this functionality.
127 *
128 * Also note that if you accept this flag then you must also define the
129 * free_fixed() function. Since no meta-data is allocated to help free
130 * allocations you need to keep track of the meta-data yourself (in this
131 * case the base and length of the allocation as opposed to just the base
132 * of the allocation).
133 *
134 * GPU_ALLOC_4K_VIDMEM_PAGES
135 *
136 * We manage vidmem pages at a large page granularity for performance
137 * reasons; however, this can lead to wasting memory. For page allocators
138 * setting this flag will tell the allocator to manage pools of 4K pages
139 * inside internally allocated large pages.
140 *
141 * Currently this flag is ignored since the only usage of the page allocator
142 * uses a 4K block size already. However, this flag has been reserved since
143 * it will be necessary in the future.
144 *
145 * GPU_ALLOC_FORCE_CONTIG
146 *
147 * Force allocations to be contiguous. Currently only relevant for page
148 * allocators since all other allocators are naturally contiguous.
149 *
150 * GPU_ALLOC_NO_SCATTER_GATHER
151 *
152 * The page allocator normally returns a scatter gather data structure for
153 * allocations (to handle discontiguous pages). However, at times that can
154 * be annoying so this flag forces the page allocator to return a u64
155 * pointing to the allocation base (requires GPU_ALLOC_FORCE_CONTIG to be
156 * set as well).
157 */
158#define GPU_ALLOC_GVA_SPACE 0x1
159#define GPU_ALLOC_NO_ALLOC_PAGE 0x2
160#define GPU_ALLOC_4K_VIDMEM_PAGES 0x4
161#define GPU_ALLOC_FORCE_CONTIG 0x8
162#define GPU_ALLOC_NO_SCATTER_GATHER 0x10
163
164static inline void alloc_lock(struct gk20a_allocator *a)
165{
166 mutex_lock(&a->lock);
167}
168
169static inline void alloc_unlock(struct gk20a_allocator *a)
170{
171 mutex_unlock(&a->lock);
172}
173
174/*
175 * Buddy allocator specific initializers.
176 */
177int __gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a,
178 struct vm_gk20a *vm, const char *name,
179 u64 base, u64 size, u64 blk_size,
180 u64 max_order, u64 flags);
181int gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a,
182 const char *name, u64 base, u64 size,
183 u64 blk_size, u64 flags);
184
185/*
186 * Bitmap initializers.
187 */
188int gk20a_bitmap_allocator_init(struct gk20a *g, struct gk20a_allocator *a,
189 const char *name, u64 base, u64 length,
190 u64 blk_size, u64 flags);
191
192/*
193 * Page allocator initializers.
194 */
195int gk20a_page_allocator_init(struct gk20a *g, struct gk20a_allocator *a,
196 const char *name, u64 base, u64 length,
197 u64 blk_size, u64 flags);
198
199/*
200 * Lockless allocatior initializers.
201 * Note: This allocator can only allocate fixed-size structures of a
202 * pre-defined size.
203 */
204int gk20a_lockless_allocator_init(struct gk20a *g, struct gk20a_allocator *a,
205 const char *name, u64 base, u64 length,
206 u64 struct_size, u64 flags);
207
208#define GPU_BALLOC_MAX_ORDER 31
209
210/*
211 * Allocator APIs.
212 */
213u64 gk20a_alloc(struct gk20a_allocator *allocator, u64 len);
214void gk20a_free(struct gk20a_allocator *allocator, u64 addr);
215
216u64 gk20a_alloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
217void gk20a_free_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
218
219int gk20a_alloc_reserve_carveout(struct gk20a_allocator *a,
220 struct gk20a_alloc_carveout *co);
221void gk20a_alloc_release_carveout(struct gk20a_allocator *a,
222 struct gk20a_alloc_carveout *co);
223
224u64 gk20a_alloc_base(struct gk20a_allocator *a);
225u64 gk20a_alloc_length(struct gk20a_allocator *a);
226u64 gk20a_alloc_end(struct gk20a_allocator *a);
227u64 gk20a_alloc_initialized(struct gk20a_allocator *a);
228u64 gk20a_alloc_space(struct gk20a_allocator *a);
229
230void gk20a_alloc_destroy(struct gk20a_allocator *allocator);
231
232void gk20a_alloc_print_stats(struct gk20a_allocator *a,
233 struct seq_file *s, int lock);
234
235/*
236 * Common functionality for the internals of the allocators.
237 */
238void gk20a_init_alloc_debug(struct gk20a *g, struct gk20a_allocator *a);
239void gk20a_fini_alloc_debug(struct gk20a_allocator *a);
240
241int __gk20a_alloc_common_init(struct gk20a_allocator *a,
242 const char *name, void *priv, bool dbg,
243 const struct gk20a_allocator_ops *ops);
244
245static inline void gk20a_alloc_enable_dbg(struct gk20a_allocator *a)
246{
247 a->debug = true;
248}
249
250static inline void gk20a_alloc_disable_dbg(struct gk20a_allocator *a)
251{
252 a->debug = false;
253}
254
255/*
256 * Debug stuff.
257 */
258extern u32 gk20a_alloc_tracing_on;
259
260void gk20a_alloc_debugfs_init(struct device *dev);
261
262#define gk20a_alloc_trace_func() \
263 do { \
264 if (gk20a_alloc_tracing_on) \
265 trace_printk("%s\n", __func__); \
266 } while (0)
267
268#define gk20a_alloc_trace_func_done() \
269 do { \
270 if (gk20a_alloc_tracing_on) \
271 trace_printk("%s_done\n", __func__); \
272 } while (0)
273
274#define __alloc_pstat(seq, allocator, fmt, arg...) \
275 do { \
276 if (s) \
277 seq_printf(seq, fmt, ##arg); \
278 else \
279 alloc_dbg(allocator, fmt, ##arg); \
280 } while (0)
281
282#define __alloc_dbg(a, fmt, arg...) \
283 pr_info("%-25s %25s() " fmt, (a)->name, __func__, ##arg)
284
285#if defined(ALLOCATOR_DEBUG)
286/*
287 * Always print the debug messages...
288 */
289#define alloc_dbg(a, fmt, arg...) __alloc_dbg(a, fmt, ##arg)
290#else
291/*
292 * Only print debug messages if debug is enabled for a given allocator.
293 */
294#define alloc_dbg(a, fmt, arg...) \
295 do { \
296 if ((a)->debug) \
297 __alloc_dbg((a), fmt, ##arg); \
298 } while (0)
299
300#endif
301
302#endif /* GK20A_ALLOCATOR_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c
deleted file mode 100644
index f98e0782..00000000
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c
+++ /dev/null
@@ -1,442 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/bitops.h>
20
21#include "gk20a_allocator.h"
22#include "bitmap_allocator_priv.h"
23
24static struct kmem_cache *meta_data_cache; /* slab cache for meta data. */
25static DEFINE_MUTEX(meta_data_cache_lock);
26
27static u64 gk20a_bitmap_alloc_length(struct gk20a_allocator *a)
28{
29 struct gk20a_bitmap_allocator *ba = a->priv;
30
31 return ba->length;
32}
33
34static u64 gk20a_bitmap_alloc_base(struct gk20a_allocator *a)
35{
36 struct gk20a_bitmap_allocator *ba = a->priv;
37
38 return ba->base;
39}
40
41static int gk20a_bitmap_alloc_inited(struct gk20a_allocator *a)
42{
43 struct gk20a_bitmap_allocator *ba = a->priv;
44 int inited = ba->inited;
45
46 rmb();
47 return inited;
48}
49
50static u64 gk20a_bitmap_alloc_end(struct gk20a_allocator *a)
51{
52 struct gk20a_bitmap_allocator *ba = a->priv;
53
54 return ba->base + ba->length;
55}
56
57static u64 gk20a_bitmap_alloc_fixed(struct gk20a_allocator *__a,
58 u64 base, u64 len)
59{
60 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
61 u64 blks, offs, ret;
62
63 /* Compute the bit offset and make sure it's aligned to a block. */
64 offs = base >> a->blk_shift;
65 if (offs * a->blk_size != base)
66 return 0;
67
68 offs -= a->bit_offs;
69
70 blks = len >> a->blk_shift;
71 if (blks * a->blk_size != len)
72 blks++;
73
74 alloc_lock(__a);
75
76 /* Check if the space requested is already occupied. */
77 ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, blks, 0);
78 if (ret != offs)
79 goto fail;
80
81 bitmap_set(a->bitmap, offs, blks);
82
83 a->bytes_alloced += blks * a->blk_size;
84 a->nr_fixed_allocs++;
85 alloc_unlock(__a);
86
87 alloc_dbg(__a, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
88 base, len, blks, blks);
89 return base;
90
91fail:
92 alloc_unlock(__a);
93 alloc_dbg(__a, "Alloc-fixed failed! (0x%llx)\n", base);
94 return 0;
95}
96
97/*
98 * Two possibilities for this function: either we are freeing a fixed allocation
99 * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined.
100 *
101 * Note: this function won't do much error checking. Thus you could really
102 * confuse the allocator if you misuse this function.
103 */
104static void gk20a_bitmap_free_fixed(struct gk20a_allocator *__a,
105 u64 base, u64 len)
106{
107 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
108 u64 blks, offs;
109
110 offs = base >> a->blk_shift;
111 if (WARN_ON(offs * a->blk_size != base))
112 return;
113
114 offs -= a->bit_offs;
115
116 blks = len >> a->blk_shift;
117 if (blks * a->blk_size != len)
118 blks++;
119
120 alloc_lock(__a);
121 bitmap_clear(a->bitmap, offs, blks);
122 a->bytes_freed += blks * a->blk_size;
123 alloc_unlock(__a);
124
125 alloc_dbg(__a, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
126 base, len, blks, blks);
127}
128
129/*
130 * Add the passed alloc to the tree of stored allocations.
131 */
132static void insert_alloc_metadata(struct gk20a_bitmap_allocator *a,
133 struct gk20a_bitmap_alloc *alloc)
134{
135 struct rb_node **new = &a->allocs.rb_node;
136 struct rb_node *parent = NULL;
137 struct gk20a_bitmap_alloc *tmp;
138
139 while (*new) {
140 tmp = container_of(*new, struct gk20a_bitmap_alloc,
141 alloc_entry);
142
143 parent = *new;
144 if (alloc->base < tmp->base)
145 new = &((*new)->rb_left);
146 else if (alloc->base > tmp->base)
147 new = &((*new)->rb_right);
148 else {
149 WARN_ON("Duplicate entries in RB alloc tree!\n");
150 return;
151 }
152 }
153
154 rb_link_node(&alloc->alloc_entry, parent, new);
155 rb_insert_color(&alloc->alloc_entry, &a->allocs);
156}
157
158/*
159 * Find and remove meta-data from the outstanding allocations.
160 */
161static struct gk20a_bitmap_alloc *find_alloc_metadata(
162 struct gk20a_bitmap_allocator *a, u64 addr)
163{
164 struct rb_node *node = a->allocs.rb_node;
165 struct gk20a_bitmap_alloc *alloc;
166
167 while (node) {
168 alloc = container_of(node, struct gk20a_bitmap_alloc,
169 alloc_entry);
170
171 if (addr < alloc->base)
172 node = node->rb_left;
173 else if (addr > alloc->base)
174 node = node->rb_right;
175 else
176 break;
177 }
178
179 if (!node)
180 return NULL;
181
182 rb_erase(node, &a->allocs);
183
184 return alloc;
185}
186
187/*
188 * Tree of alloc meta data stores the address of the alloc not the bit offset.
189 */
190static int __gk20a_bitmap_store_alloc(struct gk20a_bitmap_allocator *a,
191 u64 addr, u64 len)
192{
193 struct gk20a_bitmap_alloc *alloc =
194 kmem_cache_alloc(meta_data_cache, GFP_KERNEL);
195
196 if (!alloc)
197 return -ENOMEM;
198
199 alloc->base = addr;
200 alloc->length = len;
201
202 insert_alloc_metadata(a, alloc);
203
204 return 0;
205}
206
207/*
208 * @len is in bytes. This routine will figure out the right number of bits to
209 * actually allocate. The return is the address in bytes as well.
210 */
211static u64 gk20a_bitmap_alloc(struct gk20a_allocator *__a, u64 len)
212{
213 u64 blks, addr;
214 unsigned long offs, adjusted_offs, limit;
215 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
216
217 blks = len >> a->blk_shift;
218
219 if (blks * a->blk_size != len)
220 blks++;
221
222 alloc_lock(__a);
223
224 /*
225 * First look from next_blk and onwards...
226 */
227 offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits,
228 a->next_blk, blks, 0);
229 if (offs >= a->num_bits) {
230 /*
231 * If that didn't work try the remaining area. Since there can
232 * be available space that spans across a->next_blk we need to
233 * search up to the first set bit after that.
234 */
235 limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk);
236 offs = bitmap_find_next_zero_area(a->bitmap, limit,
237 0, blks, 0);
238 if (offs >= a->next_blk)
239 goto fail;
240 }
241
242 bitmap_set(a->bitmap, offs, blks);
243 a->next_blk = offs + blks;
244
245 adjusted_offs = offs + a->bit_offs;
246 addr = ((u64)adjusted_offs) * a->blk_size;
247
248 /*
249 * Only do meta-data storage if we are allowed to allocate storage for
250 * that meta-data. The issue with using kmalloc() and friends is that
251 * in latency and success critical paths an alloc_page() call can either
252 * sleep for potentially a long time or, assuming GFP_ATOMIC, fail.
253 * Since we might not want either of these possibilities assume that the
254 * caller will keep what data it needs around to successfully free this
255 * allocation.
256 */
257 if (!(a->flags & GPU_ALLOC_NO_ALLOC_PAGE) &&
258 __gk20a_bitmap_store_alloc(a, addr, blks * a->blk_size))
259 goto fail_reset_bitmap;
260
261 alloc_dbg(__a, "Alloc 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
262 addr, len, blks, blks);
263
264 a->nr_allocs++;
265 a->bytes_alloced += (blks * a->blk_size);
266 alloc_unlock(__a);
267
268 return addr;
269
270fail_reset_bitmap:
271 bitmap_clear(a->bitmap, offs, blks);
272fail:
273 a->next_blk = 0;
274 alloc_unlock(__a);
275 alloc_dbg(__a, "Alloc failed!\n");
276 return 0;
277}
278
279static void gk20a_bitmap_free(struct gk20a_allocator *__a, u64 addr)
280{
281 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
282 struct gk20a_bitmap_alloc *alloc = NULL;
283 u64 offs, adjusted_offs, blks;
284
285 alloc_lock(__a);
286
287 if (a->flags & GPU_ALLOC_NO_ALLOC_PAGE) {
288 WARN(1, "Using wrong free for NO_ALLOC_PAGE bitmap allocator");
289 goto done;
290 }
291
292 alloc = find_alloc_metadata(a, addr);
293 if (!alloc)
294 goto done;
295
296 /*
297 * Address comes from adjusted offset (i.e the bit offset with
298 * a->bit_offs added. So start with that and then work out the real
299 * offs into the bitmap.
300 */
301 adjusted_offs = addr >> a->blk_shift;
302 offs = adjusted_offs - a->bit_offs;
303 blks = alloc->length >> a->blk_shift;
304
305 bitmap_clear(a->bitmap, offs, blks);
306 alloc_dbg(__a, "Free 0x%-10llx\n", addr);
307
308 a->bytes_freed += alloc->length;
309
310done:
311 kfree(alloc);
312 alloc_unlock(__a);
313}
314
315static void gk20a_bitmap_alloc_destroy(struct gk20a_allocator *__a)
316{
317 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
318 struct gk20a_bitmap_alloc *alloc;
319 struct rb_node *node;
320
321 /*
322 * Kill any outstanding allocations.
323 */
324 while ((node = rb_first(&a->allocs)) != NULL) {
325 alloc = container_of(node, struct gk20a_bitmap_alloc,
326 alloc_entry);
327
328 rb_erase(node, &a->allocs);
329 kfree(alloc);
330 }
331
332 kfree(a->bitmap);
333 kfree(a);
334}
335
336static void gk20a_bitmap_print_stats(struct gk20a_allocator *__a,
337 struct seq_file *s, int lock)
338{
339 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
340
341 __alloc_pstat(s, __a, "Bitmap allocator params:\n");
342 __alloc_pstat(s, __a, " start = 0x%llx\n", a->base);
343 __alloc_pstat(s, __a, " end = 0x%llx\n", a->base + a->length);
344 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->num_bits);
345
346 /* Actual stats. */
347 __alloc_pstat(s, __a, "Stats:\n");
348 __alloc_pstat(s, __a, " Number allocs = 0x%llx\n", a->nr_allocs);
349 __alloc_pstat(s, __a, " Number fixed = 0x%llx\n", a->nr_fixed_allocs);
350 __alloc_pstat(s, __a, " Bytes alloced = 0x%llx\n", a->bytes_alloced);
351 __alloc_pstat(s, __a, " Bytes freed = 0x%llx\n", a->bytes_freed);
352 __alloc_pstat(s, __a, " Outstanding = 0x%llx\n",
353 a->bytes_alloced - a->bytes_freed);
354}
355
356static const struct gk20a_allocator_ops bitmap_ops = {
357 .alloc = gk20a_bitmap_alloc,
358 .free = gk20a_bitmap_free,
359
360 .alloc_fixed = gk20a_bitmap_alloc_fixed,
361 .free_fixed = gk20a_bitmap_free_fixed,
362
363 .base = gk20a_bitmap_alloc_base,
364 .length = gk20a_bitmap_alloc_length,
365 .end = gk20a_bitmap_alloc_end,
366 .inited = gk20a_bitmap_alloc_inited,
367
368 .fini = gk20a_bitmap_alloc_destroy,
369
370 .print_stats = gk20a_bitmap_print_stats,
371};
372
373
374int gk20a_bitmap_allocator_init(struct gk20a *g, struct gk20a_allocator *__a,
375 const char *name, u64 base, u64 length,
376 u64 blk_size, u64 flags)
377{
378 int err;
379 struct gk20a_bitmap_allocator *a;
380
381 mutex_lock(&meta_data_cache_lock);
382 if (!meta_data_cache)
383 meta_data_cache = KMEM_CACHE(gk20a_bitmap_alloc, 0);
384 mutex_unlock(&meta_data_cache_lock);
385
386 if (!meta_data_cache)
387 return -ENOMEM;
388
389 if (WARN_ON(blk_size & (blk_size - 1)))
390 return -EINVAL;
391
392 /*
393 * blk_size must be a power-of-2; base length also need to be aligned
394 * to blk_size.
395 */
396 if (blk_size & (blk_size - 1) ||
397 base & (blk_size - 1) || length & (blk_size - 1))
398 return -EINVAL;
399
400 if (base == 0) {
401 base = blk_size;
402 length -= blk_size;
403 }
404
405 a = kzalloc(sizeof(struct gk20a_bitmap_allocator), GFP_KERNEL);
406 if (!a)
407 return -ENOMEM;
408
409 err = __gk20a_alloc_common_init(__a, name, a, false, &bitmap_ops);
410 if (err)
411 goto fail;
412
413 a->base = base;
414 a->length = length;
415 a->blk_size = blk_size;
416 a->blk_shift = __ffs(a->blk_size);
417 a->num_bits = length >> a->blk_shift;
418 a->bit_offs = a->base >> a->blk_shift;
419 a->flags = flags;
420
421 a->bitmap = kcalloc(BITS_TO_LONGS(a->num_bits), sizeof(*a->bitmap),
422 GFP_KERNEL);
423 if (!a->bitmap)
424 goto fail;
425
426 wmb();
427 a->inited = true;
428
429 gk20a_init_alloc_debug(g, __a);
430 alloc_dbg(__a, "New allocator: type bitmap\n");
431 alloc_dbg(__a, " base 0x%llx\n", a->base);
432 alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs);
433 alloc_dbg(__a, " size 0x%llx\n", a->length);
434 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
435 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
436
437 return 0;
438
439fail:
440 kfree(a);
441 return err;
442}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
deleted file mode 100644
index 3715e9f8..00000000
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
+++ /dev/null
@@ -1,1327 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19
20#include "mm_gk20a.h"
21#include "platform_gk20a.h"
22#include "gk20a_allocator.h"
23#include "buddy_allocator_priv.h"
24
25static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
26
27/* Some other buddy allocator functions. */
28static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
29 u64 addr);
30static void balloc_coalesce(struct gk20a_buddy_allocator *a,
31 struct gk20a_buddy *b);
32static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
33 struct gk20a_fixed_alloc *falloc);
34
35/*
36 * This function is not present in older kernel's list.h code.
37 */
38#ifndef list_last_entry
39#define list_last_entry(ptr, type, member) \
40 list_entry((ptr)->prev, type, member)
41#endif
42
43/*
44 * GPU buddy allocator for various address spaces.
45 *
46 * Current limitations:
47 * o A fixed allocation could potentially be made that borders PDEs with
48 * different PTE sizes. This would require that fixed buffer to have
49 * different sized PTEs for different parts of the allocation. Probably
50 * best to just require PDE alignment for fixed address allocs.
51 *
52 * o It is currently possible to make an allocator that has a buddy alignment
53 * out of sync with the PDE block size alignment. A simple example is a
54 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
55 * which means each buddy corresponf to more than one actual GPU page. The
56 * best way to fix this is probably just require PDE blocksize alignment
57 * for the start of the address space. At the moment all allocators are
58 * easily PDE aligned so this hasn't been a problem.
59 */
60
61/*
62 * Pick a suitable maximum order for this allocator.
63 *
64 * Hueristic: Just guessing that the best max order is the largest single
65 * block that will fit in the address space.
66 */
67static void balloc_compute_max_order(struct gk20a_buddy_allocator *a)
68{
69 u64 true_max_order = ilog2(a->blks);
70
71 if (a->max_order == 0) {
72 a->max_order = true_max_order;
73 return;
74 }
75
76 if (a->max_order > true_max_order)
77 a->max_order = true_max_order;
78 if (a->max_order > GPU_BALLOC_MAX_ORDER)
79 a->max_order = GPU_BALLOC_MAX_ORDER;
80}
81
82/*
83 * Since we can only allocate in chucks of a->blk_size we need to trim off
84 * any excess data that is not aligned to a->blk_size.
85 */
86static void balloc_allocator_align(struct gk20a_buddy_allocator *a)
87{
88 a->start = ALIGN(a->base, a->blk_size);
89 WARN_ON(a->start != a->base);
90 a->end = (a->base + a->length) & ~(a->blk_size - 1);
91 a->count = a->end - a->start;
92 a->blks = a->count >> a->blk_shift;
93}
94
95/*
96 * Pass NULL for parent if you want a top level buddy.
97 */
98static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a,
99 struct gk20a_buddy *parent,
100 u64 start, u64 order)
101{
102 struct gk20a_buddy *new_buddy;
103
104 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
105 if (!new_buddy)
106 return NULL;
107
108 memset(new_buddy, 0, sizeof(struct gk20a_buddy));
109
110 new_buddy->parent = parent;
111 new_buddy->start = start;
112 new_buddy->order = order;
113 new_buddy->end = start + (1 << order) * a->blk_size;
114 new_buddy->pte_size = BALLOC_PTE_SIZE_ANY;
115
116 return new_buddy;
117}
118
119static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a,
120 struct gk20a_buddy *b,
121 struct list_head *list)
122{
123 if (buddy_is_in_list(b)) {
124 alloc_dbg(balloc_owner(a),
125 "Oops: adding added buddy (%llu:0x%llx)\n",
126 b->order, b->start);
127 BUG();
128 }
129
130 /*
131 * Add big PTE blocks to the tail, small to the head for GVA spaces.
132 * This lets the code that checks if there are available blocks check
133 * without cycling through the entire list.
134 */
135 if (a->flags & GPU_ALLOC_GVA_SPACE &&
136 b->pte_size == gmmu_page_size_big)
137 list_add_tail(&b->buddy_entry, list);
138 else
139 list_add(&b->buddy_entry, list);
140
141 buddy_set_in_list(b);
142}
143
144static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a,
145 struct gk20a_buddy *b)
146{
147 if (!buddy_is_in_list(b)) {
148 alloc_dbg(balloc_owner(a),
149 "Oops: removing removed buddy (%llu:0x%llx)\n",
150 b->order, b->start);
151 BUG();
152 }
153
154 list_del_init(&b->buddy_entry);
155 buddy_clr_in_list(b);
156}
157
158/*
159 * Add a buddy to one of the buddy lists and deal with the necessary
160 * book keeping. Adds the buddy to the list specified by the buddy's order.
161 */
162static void balloc_blist_add(struct gk20a_buddy_allocator *a,
163 struct gk20a_buddy *b)
164{
165 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
166 a->buddy_list_len[b->order]++;
167}
168
169static void balloc_blist_rem(struct gk20a_buddy_allocator *a,
170 struct gk20a_buddy *b)
171{
172 __balloc_buddy_list_rem(a, b);
173 a->buddy_list_len[b->order]--;
174}
175
176static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len)
177{
178 if (len == 0)
179 return 0;
180
181 len--;
182 len >>= a->blk_shift;
183
184 return fls(len);
185}
186
187static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a,
188 u64 start, u64 end)
189{
190 u64 size = (end - start) >> a->blk_shift;
191
192 if (size > 0)
193 return min_t(u64, ilog2(size), a->max_order);
194 else
195 return GPU_BALLOC_MAX_ORDER;
196}
197
198/*
199 * Initialize the buddy lists.
200 */
201static int balloc_init_lists(struct gk20a_buddy_allocator *a)
202{
203 int i;
204 u64 bstart, bend, order;
205 struct gk20a_buddy *buddy;
206
207 bstart = a->start;
208 bend = a->end;
209
210 /* First make sure the LLs are valid. */
211 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
212 INIT_LIST_HEAD(balloc_get_order_list(a, i));
213
214 while (bstart < bend) {
215 order = __balloc_max_order_in(a, bstart, bend);
216
217 buddy = balloc_new_buddy(a, NULL, bstart, order);
218 if (!buddy)
219 goto cleanup;
220
221 balloc_blist_add(a, buddy);
222 bstart += balloc_order_to_len(a, order);
223 }
224
225 return 0;
226
227cleanup:
228 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
229 if (!list_empty(balloc_get_order_list(a, i))) {
230 buddy = list_first_entry(balloc_get_order_list(a, i),
231 struct gk20a_buddy, buddy_entry);
232 balloc_blist_rem(a, buddy);
233 kmem_cache_free(buddy_cache, buddy);
234 }
235 }
236
237 return -ENOMEM;
238}
239
240/*
241 * Clean up and destroy the passed allocator.
242 */
243static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a)
244{
245 int i;
246 struct rb_node *node;
247 struct gk20a_buddy *bud;
248 struct gk20a_fixed_alloc *falloc;
249 struct gk20a_buddy_allocator *a = __a->priv;
250
251 alloc_lock(__a);
252
253 gk20a_fini_alloc_debug(__a);
254
255 /*
256 * Free the fixed allocs first.
257 */
258 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
259 falloc = container_of(node,
260 struct gk20a_fixed_alloc, alloced_entry);
261
262 rb_erase(node, &a->fixed_allocs);
263 __balloc_do_free_fixed(a, falloc);
264 }
265
266 /*
267 * And now free all outstanding allocations.
268 */
269 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
270 bud = container_of(node, struct gk20a_buddy, alloced_entry);
271 balloc_free_buddy(a, bud->start);
272 balloc_blist_add(a, bud);
273 balloc_coalesce(a, bud);
274 }
275
276 /*
277 * Now clean up the unallocated buddies.
278 */
279 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
280 BUG_ON(a->buddy_list_alloced[i] != 0);
281
282 while (!list_empty(balloc_get_order_list(a, i))) {
283 bud = list_first_entry(balloc_get_order_list(a, i),
284 struct gk20a_buddy, buddy_entry);
285 balloc_blist_rem(a, bud);
286 kmem_cache_free(buddy_cache, bud);
287 }
288
289 if (a->buddy_list_len[i] != 0) {
290 pr_info("Excess buddies!!! (%d: %llu)\n",
291 i, a->buddy_list_len[i]);
292 BUG();
293 }
294 if (a->buddy_list_split[i] != 0) {
295 pr_info("Excess split nodes!!! (%d: %llu)\n",
296 i, a->buddy_list_split[i]);
297 BUG();
298 }
299 if (a->buddy_list_alloced[i] != 0) {
300 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
301 i, a->buddy_list_alloced[i]);
302 BUG();
303 }
304 }
305
306 kfree(a);
307
308 alloc_unlock(__a);
309}
310
311/*
312 * Combine the passed buddy if possible. The pointer in @b may not be valid
313 * after this as the buddy may be freed.
314 *
315 * @a must be locked.
316 */
317static void balloc_coalesce(struct gk20a_buddy_allocator *a,
318 struct gk20a_buddy *b)
319{
320 struct gk20a_buddy *parent;
321
322 if (buddy_is_alloced(b) || buddy_is_split(b))
323 return;
324
325 /*
326 * If both our buddy and I are both not allocated and not split then
327 * we can coalesce ourselves.
328 */
329 if (!b->buddy)
330 return;
331 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
332 return;
333
334 parent = b->parent;
335
336 balloc_blist_rem(a, b);
337 balloc_blist_rem(a, b->buddy);
338
339 buddy_clr_split(parent);
340 a->buddy_list_split[parent->order]--;
341 balloc_blist_add(a, parent);
342
343 /*
344 * Recursively coalesce as far as we can go.
345 */
346 balloc_coalesce(a, parent);
347
348 /* Clean up the remains. */
349 kmem_cache_free(buddy_cache, b->buddy);
350 kmem_cache_free(buddy_cache, b);
351}
352
353/*
354 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
355 *
356 * @a must be locked.
357 */
358static int balloc_split_buddy(struct gk20a_buddy_allocator *a,
359 struct gk20a_buddy *b, int pte_size)
360{
361 struct gk20a_buddy *left, *right;
362 u64 half;
363
364 left = balloc_new_buddy(a, b, b->start, b->order - 1);
365 if (!left)
366 return -ENOMEM;
367
368 half = (b->end - b->start) / 2;
369
370 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
371 if (!right) {
372 kmem_cache_free(buddy_cache, left);
373 return -ENOMEM;
374 }
375
376 buddy_set_split(b);
377 a->buddy_list_split[b->order]++;
378
379 b->left = left;
380 b->right = right;
381 left->buddy = right;
382 right->buddy = left;
383 left->parent = b;
384 right->parent = b;
385
386 /* PTE considerations. */
387 if (a->flags & GPU_ALLOC_GVA_SPACE &&
388 left->order <= a->pte_blk_order) {
389 left->pte_size = pte_size;
390 right->pte_size = pte_size;
391 }
392
393 balloc_blist_rem(a, b);
394 balloc_blist_add(a, left);
395 balloc_blist_add(a, right);
396
397 return 0;
398}
399
400/*
401 * Place the passed buddy into the RB tree for allocated buddies. Never fails
402 * unless the passed entry is a duplicate which is a bug.
403 *
404 * @a must be locked.
405 */
406static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a,
407 struct gk20a_buddy *b)
408{
409 struct rb_node **new = &(a->alloced_buddies.rb_node);
410 struct rb_node *parent = NULL;
411
412 while (*new) {
413 struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
414 alloced_entry);
415
416 parent = *new;
417 if (b->start < bud->start)
418 new = &((*new)->rb_left);
419 else if (b->start > bud->start)
420 new = &((*new)->rb_right);
421 else
422 BUG_ON("Duplicate entries in allocated list!\n");
423 }
424
425 rb_link_node(&b->alloced_entry, parent, new);
426 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
427
428 buddy_set_alloced(b);
429 a->buddy_list_alloced[b->order]++;
430}
431
432/*
433 * Remove the passed buddy from the allocated buddy RB tree. Returns the
434 * deallocated buddy for further processing.
435 *
436 * @a must be locked.
437 */
438static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
439 u64 addr)
440{
441 struct rb_node *node = a->alloced_buddies.rb_node;
442 struct gk20a_buddy *bud;
443
444 while (node) {
445 bud = container_of(node, struct gk20a_buddy, alloced_entry);
446
447 if (addr < bud->start)
448 node = node->rb_left;
449 else if (addr > bud->start)
450 node = node->rb_right;
451 else
452 break;
453 }
454
455 if (!node)
456 return NULL;
457
458 rb_erase(node, &a->alloced_buddies);
459 buddy_clr_alloced(bud);
460 a->buddy_list_alloced[bud->order]--;
461
462 return bud;
463}
464
465/*
466 * Find a suitable buddy for the given order and PTE type (big or little).
467 */
468static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a,
469 u64 order, int pte_size)
470{
471 struct gk20a_buddy *bud;
472
473 if (order > a->max_order ||
474 list_empty(balloc_get_order_list(a, order)))
475 return NULL;
476
477 if (a->flags & GPU_ALLOC_GVA_SPACE &&
478 pte_size == gmmu_page_size_big)
479 bud = list_last_entry(balloc_get_order_list(a, order),
480 struct gk20a_buddy, buddy_entry);
481 else
482 bud = list_first_entry(balloc_get_order_list(a, order),
483 struct gk20a_buddy, buddy_entry);
484
485 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
486 bud->pte_size != pte_size)
487 return NULL;
488
489 return bud;
490}
491
492/*
493 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
494 * order buddies until we have a suitable buddy to allocate.
495 *
496 * For PDE grouping add an extra check to see if a buddy is suitable: that the
497 * buddy exists in a PDE who's PTE size is reasonable
498 *
499 * @a must be locked.
500 */
501static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a,
502 u64 order, int pte_size)
503{
504 u64 split_order;
505 struct gk20a_buddy *bud = NULL;
506
507 split_order = order;
508 while (split_order <= a->max_order &&
509 !(bud = __balloc_find_buddy(a, split_order, pte_size)))
510 split_order++;
511
512 /* Out of memory! */
513 if (!bud)
514 return 0;
515
516 while (bud->order != order) {
517 if (balloc_split_buddy(a, bud, pte_size))
518 return 0; /* No mem... */
519 bud = bud->left;
520 }
521
522 balloc_blist_rem(a, bud);
523 balloc_alloc_buddy(a, bud);
524
525 return bud->start;
526}
527
528/*
529 * See if the passed range is actually available for allocation. If so, then
530 * return 1, otherwise return 0.
531 *
532 * TODO: Right now this uses the unoptimal approach of going through all
533 * outstanding allocations and checking their base/ends. This could be better.
534 */
535static int balloc_is_range_free(struct gk20a_buddy_allocator *a,
536 u64 base, u64 end)
537{
538 struct rb_node *node;
539 struct gk20a_buddy *bud;
540
541 node = rb_first(&a->alloced_buddies);
542 if (!node)
543 return 1; /* No allocs yet. */
544
545 bud = container_of(node, struct gk20a_buddy, alloced_entry);
546
547 while (bud->start < end) {
548 if ((bud->start > base && bud->start < end) ||
549 (bud->end > base && bud->end < end))
550 return 0;
551
552 node = rb_next(node);
553 if (!node)
554 break;
555 bud = container_of(node, struct gk20a_buddy, alloced_entry);
556 }
557
558 return 1;
559}
560
561static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a,
562 struct gk20a_fixed_alloc *f)
563{
564 struct rb_node **new = &(a->fixed_allocs.rb_node);
565 struct rb_node *parent = NULL;
566
567 while (*new) {
568 struct gk20a_fixed_alloc *falloc =
569 container_of(*new, struct gk20a_fixed_alloc,
570 alloced_entry);
571
572 BUG_ON(!virt_addr_valid(falloc));
573
574 parent = *new;
575 if (f->start < falloc->start)
576 new = &((*new)->rb_left);
577 else if (f->start > falloc->start)
578 new = &((*new)->rb_right);
579 else
580 BUG_ON("Duplicate entries in allocated list!\n");
581 }
582
583 rb_link_node(&f->alloced_entry, parent, new);
584 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
585}
586
587/*
588 * Remove the passed buddy from the allocated buddy RB tree. Returns the
589 * deallocated buddy for further processing.
590 *
591 * @a must be locked.
592 */
593static struct gk20a_fixed_alloc *balloc_free_fixed(
594 struct gk20a_buddy_allocator *a, u64 addr)
595{
596 struct rb_node *node = a->fixed_allocs.rb_node;
597 struct gk20a_fixed_alloc *falloc;
598
599 while (node) {
600 falloc = container_of(node,
601 struct gk20a_fixed_alloc, alloced_entry);
602
603 if (addr < falloc->start)
604 node = node->rb_left;
605 else if (addr > falloc->start)
606 node = node->rb_right;
607 else
608 break;
609 }
610
611 if (!node)
612 return NULL;
613
614 rb_erase(node, &a->fixed_allocs);
615
616 return falloc;
617}
618
619/*
620 * Find the parent range - doesn't necessarily need the parent to actually exist
621 * as a buddy. Finding an existing parent comes later...
622 */
623static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a,
624 u64 base, u64 order,
625 u64 *pbase, u64 *porder)
626{
627 u64 base_mask;
628 u64 shifted_base = balloc_base_shift(a, base);
629
630 order++;
631 base_mask = ~((a->blk_size << order) - 1);
632
633 shifted_base &= base_mask;
634
635 *pbase = balloc_base_unshift(a, shifted_base);
636 *porder = order;
637}
638
639/*
640 * Makes a buddy at the passed address. This will make all parent buddies
641 * necessary for this buddy to exist as well.
642 */
643static struct gk20a_buddy *__balloc_make_fixed_buddy(
644 struct gk20a_buddy_allocator *a, u64 base, u64 order)
645{
646 struct gk20a_buddy *bud = NULL;
647 struct list_head *order_list;
648 u64 cur_order = order, cur_base = base;
649
650 /*
651 * Algo:
652 * 1. Keep jumping up a buddy order until we find the real buddy that
653 * this buddy exists in.
654 * 2. Then work our way down through the buddy tree until we hit a dead
655 * end.
656 * 3. Start splitting buddies until we split to the one we need to
657 * make.
658 */
659 while (cur_order <= a->max_order) {
660 int found = 0;
661
662 order_list = balloc_get_order_list(a, cur_order);
663 list_for_each_entry(bud, order_list, buddy_entry) {
664 if (bud->start == cur_base) {
665 found = 1;
666 break;
667 }
668 }
669
670 if (found)
671 break;
672
673 __balloc_get_parent_range(a, cur_base, cur_order,
674 &cur_base, &cur_order);
675 }
676
677 if (cur_order > a->max_order) {
678 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
679 return NULL;
680 }
681
682 /* Split this buddy as necessary until we get the target buddy. */
683 while (bud->start != base || bud->order != order) {
684 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
685 balloc_coalesce(a, bud);
686 return NULL;
687 }
688
689 if (base < bud->right->start)
690 bud = bud->left;
691 else
692 bud = bud->right;
693
694 }
695
696 return bud;
697}
698
699static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a,
700 struct gk20a_fixed_alloc *falloc,
701 u64 base, u64 len)
702{
703 u64 shifted_base, inc_base;
704 u64 align_order;
705
706 shifted_base = balloc_base_shift(a, base);
707 if (shifted_base == 0)
708 align_order = __fls(len >> a->blk_shift);
709 else
710 align_order = min_t(u64,
711 __ffs(shifted_base >> a->blk_shift),
712 __fls(len >> a->blk_shift));
713
714 if (align_order > a->max_order) {
715 alloc_dbg(balloc_owner(a),
716 "Align order too big: %llu > %llu\n",
717 align_order, a->max_order);
718 return 0;
719 }
720
721 /*
722 * Generate a list of buddies that satisfy this allocation.
723 */
724 inc_base = shifted_base;
725 while (inc_base < (shifted_base + len)) {
726 u64 order_len = balloc_order_to_len(a, align_order);
727 u64 remaining;
728 struct gk20a_buddy *bud;
729
730 bud = __balloc_make_fixed_buddy(a,
731 balloc_base_unshift(a, inc_base),
732 align_order);
733 if (!bud) {
734 alloc_dbg(balloc_owner(a),
735 "Fixed buddy failed: {0x%llx, %llu}!\n",
736 balloc_base_unshift(a, inc_base),
737 align_order);
738 goto err_and_cleanup;
739 }
740
741 balloc_blist_rem(a, bud);
742 balloc_alloc_buddy(a, bud);
743 __balloc_buddy_list_add(a, bud, &falloc->buddies);
744
745 /* Book keeping. */
746 inc_base += order_len;
747 remaining = (shifted_base + len) - inc_base;
748 align_order = __ffs(inc_base >> a->blk_shift);
749
750 /* If we don't have much left - trim down align_order. */
751 if (balloc_order_to_len(a, align_order) > remaining)
752 align_order = __balloc_max_order_in(a, inc_base,
753 inc_base + remaining);
754 }
755
756 return base;
757
758err_and_cleanup:
759 while (!list_empty(&falloc->buddies)) {
760 struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
761 struct gk20a_buddy,
762 buddy_entry);
763
764 __balloc_buddy_list_rem(a, bud);
765 balloc_free_buddy(a, bud->start);
766 kmem_cache_free(buddy_cache, bud);
767 }
768
769 return 0;
770}
771
772static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
773 struct gk20a_fixed_alloc *falloc)
774{
775 struct gk20a_buddy *bud;
776
777 while (!list_empty(&falloc->buddies)) {
778 bud = list_first_entry(&falloc->buddies,
779 struct gk20a_buddy,
780 buddy_entry);
781 __balloc_buddy_list_rem(a, bud);
782
783 balloc_free_buddy(a, bud->start);
784 balloc_blist_add(a, bud);
785 a->bytes_freed += balloc_order_to_len(a, bud->order);
786
787 /*
788 * Attemp to defrag the allocation.
789 */
790 balloc_coalesce(a, bud);
791 }
792
793 kfree(falloc);
794}
795
796/*
797 * Allocate memory from the passed allocator.
798 */
799static u64 gk20a_buddy_balloc(struct gk20a_allocator *__a, u64 len)
800{
801 u64 order, addr;
802 int pte_size;
803 struct gk20a_buddy_allocator *a = __a->priv;
804
805 gk20a_alloc_trace_func();
806
807 alloc_lock(__a);
808
809 order = balloc_get_order(a, len);
810
811 if (order > a->max_order) {
812 alloc_unlock(__a);
813 alloc_dbg(balloc_owner(a), "Alloc fail\n");
814 gk20a_alloc_trace_func_done();
815 return 0;
816 }
817
818 /*
819 * For now pass the base address of the allocator's region to
820 * __get_pte_size(). This ensures we get the right page size for
821 * the alloc but we don't have to know what the real address is
822 * going to be quite yet.
823 *
824 * TODO: once userspace supports a unified address space pass 0 for
825 * the base. This will make only 'len' affect the PTE size.
826 */
827 if (a->flags & GPU_ALLOC_GVA_SPACE)
828 pte_size = __get_pte_size(a->vm, a->base, len);
829 else
830 pte_size = BALLOC_PTE_SIZE_ANY;
831
832 addr = __balloc_do_alloc(a, order, pte_size);
833
834 if (addr) {
835 a->bytes_alloced += len;
836 a->bytes_alloced_real += balloc_order_to_len(a, order);
837 alloc_dbg(balloc_owner(a),
838 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
839 addr, order, len,
840 pte_size == gmmu_page_size_big ? "big" :
841 pte_size == gmmu_page_size_small ? "small" :
842 "NA/any");
843 } else {
844 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
845 }
846
847 a->alloc_made = 1;
848
849 alloc_unlock(__a);
850
851 gk20a_alloc_trace_func_done();
852 return addr;
853}
854
855/*
856 * Requires @__a to be locked.
857 */
858static u64 __gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a,
859 u64 base, u64 len)
860{
861 u64 ret, real_bytes = 0;
862 struct gk20a_buddy *bud;
863 struct gk20a_fixed_alloc *falloc = NULL;
864 struct gk20a_buddy_allocator *a = __a->priv;
865
866 gk20a_alloc_trace_func();
867
868 /* If base isn't aligned to an order 0 block, fail. */
869 if (base & (a->blk_size - 1))
870 goto fail;
871
872 if (len == 0)
873 goto fail;
874
875 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
876 if (!falloc)
877 goto fail;
878
879 INIT_LIST_HEAD(&falloc->buddies);
880 falloc->start = base;
881 falloc->end = base + len;
882
883 if (!balloc_is_range_free(a, base, base + len)) {
884 alloc_dbg(balloc_owner(a),
885 "Range not free: 0x%llx -> 0x%llx\n",
886 base, base + len);
887 goto fail_unlock;
888 }
889
890 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
891 if (!ret) {
892 alloc_dbg(balloc_owner(a),
893 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
894 base, base + len);
895 goto fail_unlock;
896 }
897
898 balloc_alloc_fixed(a, falloc);
899
900 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
901 real_bytes += (bud->end - bud->start);
902
903 a->bytes_alloced += len;
904 a->bytes_alloced_real += real_bytes;
905
906 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
907
908 gk20a_alloc_trace_func_done();
909 return base;
910
911fail_unlock:
912 alloc_unlock(__a);
913fail:
914 kfree(falloc);
915 gk20a_alloc_trace_func_done();
916 return 0;
917}
918
919/*
920 * Allocate a fixed address allocation. The address of the allocation is @base
921 * and the length is @len. This is not a typical buddy allocator operation and
922 * as such has a high posibility of failure if the address space is heavily in
923 * use.
924 *
925 * Please do not use this function unless _absolutely_ necessary.
926 */
927static u64 gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a,
928 u64 base, u64 len)
929{
930 u64 alloc;
931 struct gk20a_buddy_allocator *a = __a->priv;
932
933 alloc_lock(__a);
934 alloc = __gk20a_balloc_fixed_buddy(__a, base, len);
935 a->alloc_made = 1;
936 alloc_unlock(__a);
937
938 return alloc;
939}
940
941/*
942 * Free the passed allocation.
943 */
944static void gk20a_buddy_bfree(struct gk20a_allocator *__a, u64 addr)
945{
946 struct gk20a_buddy *bud;
947 struct gk20a_fixed_alloc *falloc;
948 struct gk20a_buddy_allocator *a = __a->priv;
949
950 gk20a_alloc_trace_func();
951
952 if (!addr) {
953 gk20a_alloc_trace_func_done();
954 return;
955 }
956
957 alloc_lock(__a);
958
959 /*
960 * First see if this is a fixed alloc. If not fall back to a regular
961 * buddy.
962 */
963 falloc = balloc_free_fixed(a, addr);
964 if (falloc) {
965 __balloc_do_free_fixed(a, falloc);
966 goto done;
967 }
968
969 bud = balloc_free_buddy(a, addr);
970 if (!bud)
971 goto done;
972
973 balloc_blist_add(a, bud);
974 a->bytes_freed += balloc_order_to_len(a, bud->order);
975
976 /*
977 * Attemp to defrag the allocation.
978 */
979 balloc_coalesce(a, bud);
980
981done:
982 alloc_unlock(__a);
983 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
984 gk20a_alloc_trace_func_done();
985 return;
986}
987
988static bool gk20a_buddy_reserve_is_possible(struct gk20a_buddy_allocator *a,
989 struct gk20a_alloc_carveout *co)
990{
991 struct gk20a_alloc_carveout *tmp;
992 u64 co_base, co_end;
993
994 co_base = co->base;
995 co_end = co->base + co->length;
996
997 /*
998 * Not the fastest approach but we should not have that many carveouts
999 * for any reasonable allocator.
1000 */
1001 list_for_each_entry(tmp, &a->co_list, co_entry) {
1002 if ((co_base >= tmp->base &&
1003 co_base < (tmp->base + tmp->length)) ||
1004 (co_end >= tmp->base &&
1005 co_end < (tmp->base + tmp->length)))
1006 return false;
1007 }
1008
1009 return true;
1010}
1011
1012/*
1013 * Carveouts can only be reserved before any regular allocations have been
1014 * made.
1015 */
1016static int gk20a_buddy_reserve_co(struct gk20a_allocator *__a,
1017 struct gk20a_alloc_carveout *co)
1018{
1019 struct gk20a_buddy_allocator *a = __a->priv;
1020 u64 addr;
1021 int err = 0;
1022
1023 if (co->base < a->start || (co->base + co->length) > a->end ||
1024 a->alloc_made)
1025 return -EINVAL;
1026
1027 alloc_lock(__a);
1028
1029 if (!gk20a_buddy_reserve_is_possible(a, co)) {
1030 err = -EBUSY;
1031 goto done;
1032 }
1033
1034 /* Should not be possible to fail... */
1035 addr = __gk20a_balloc_fixed_buddy(__a, co->base, co->length);
1036 if (!addr) {
1037 err = -ENOMEM;
1038 pr_warn("%s: Failed to reserve a valid carveout!\n", __func__);
1039 goto done;
1040 }
1041
1042 list_add(&co->co_entry, &a->co_list);
1043
1044done:
1045 alloc_unlock(__a);
1046 return err;
1047}
1048
1049/*
1050 * Carveouts can be release at any time.
1051 */
1052static void gk20a_buddy_release_co(struct gk20a_allocator *__a,
1053 struct gk20a_alloc_carveout *co)
1054{
1055 alloc_lock(__a);
1056
1057 list_del_init(&co->co_entry);
1058 gk20a_free(__a, co->base);
1059
1060 alloc_unlock(__a);
1061}
1062
1063static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a)
1064{
1065 struct gk20a_buddy_allocator *ba = a->priv;
1066
1067 return ba->length;
1068}
1069
1070static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a)
1071{
1072 struct gk20a_buddy_allocator *ba = a->priv;
1073
1074 return ba->start;
1075}
1076
1077static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a)
1078{
1079 struct gk20a_buddy_allocator *ba = a->priv;
1080 int inited = ba->initialized;
1081
1082 rmb();
1083 return inited;
1084}
1085
1086static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a)
1087{
1088 struct gk20a_buddy_allocator *ba = a->priv;
1089
1090 return ba->end;
1091}
1092
1093static u64 gk20a_buddy_alloc_space(struct gk20a_allocator *a)
1094{
1095 struct gk20a_buddy_allocator *ba = a->priv;
1096 u64 space;
1097
1098 alloc_lock(a);
1099 space = ba->end - ba->start -
1100 (ba->bytes_alloced_real - ba->bytes_freed);
1101 alloc_unlock(a);
1102
1103 return space;
1104}
1105
1106/*
1107 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1108 * stats are printed to the kernel log. This lets this code be used for
1109 * debugging purposes internal to the allocator.
1110 */
1111static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
1112 struct seq_file *s, int lock)
1113{
1114 int i = 0;
1115 struct rb_node *node;
1116 struct gk20a_fixed_alloc *falloc;
1117 struct gk20a_alloc_carveout *tmp;
1118 struct gk20a_buddy_allocator *a = __a->priv;
1119
1120 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1121 a->base, a->length, a->blk_size);
1122 __alloc_pstat(s, __a, "Internal params:\n");
1123 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1124 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1125 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1126 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1127 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1128
1129 if (lock)
1130 alloc_lock(__a);
1131
1132 if (!list_empty(&a->co_list)) {
1133 __alloc_pstat(s, __a, "\n");
1134 __alloc_pstat(s, __a, "Carveouts:\n");
1135 list_for_each_entry(tmp, &a->co_list, co_entry)
1136 __alloc_pstat(s, __a,
1137 " CO %2d: %-20s 0x%010llx + 0x%llx\n",
1138 i++, tmp->name, tmp->base, tmp->length);
1139 }
1140
1141 __alloc_pstat(s, __a, "\n");
1142 __alloc_pstat(s, __a, "Buddy blocks:\n");
1143 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1144 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1145
1146 for (i = a->max_order; i >= 0; i--) {
1147 if (a->buddy_list_len[i] == 0 &&
1148 a->buddy_list_alloced[i] == 0 &&
1149 a->buddy_list_split[i] == 0)
1150 continue;
1151
1152 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1153 a->buddy_list_len[i],
1154 a->buddy_list_alloced[i],
1155 a->buddy_list_split[i]);
1156 }
1157
1158 __alloc_pstat(s, __a, "\n");
1159
1160 for (node = rb_first(&a->fixed_allocs), i = 1;
1161 node != NULL;
1162 node = rb_next(node)) {
1163 falloc = container_of(node,
1164 struct gk20a_fixed_alloc, alloced_entry);
1165
1166 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1167 i, falloc->start, falloc->end);
1168 }
1169
1170 __alloc_pstat(s, __a, "\n");
1171 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1172 a->bytes_alloced);
1173 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1174 a->bytes_alloced_real);
1175 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1176 a->bytes_freed);
1177
1178 if (lock)
1179 alloc_unlock(__a);
1180}
1181
1182static const struct gk20a_allocator_ops buddy_ops = {
1183 .alloc = gk20a_buddy_balloc,
1184 .free = gk20a_buddy_bfree,
1185
1186 .alloc_fixed = gk20a_balloc_fixed_buddy,
1187 /* .free_fixed not needed. */
1188
1189 .reserve_carveout = gk20a_buddy_reserve_co,
1190 .release_carveout = gk20a_buddy_release_co,
1191
1192 .base = gk20a_buddy_alloc_base,
1193 .length = gk20a_buddy_alloc_length,
1194 .end = gk20a_buddy_alloc_end,
1195 .inited = gk20a_buddy_alloc_inited,
1196 .space = gk20a_buddy_alloc_space,
1197
1198 .fini = gk20a_buddy_allocator_destroy,
1199
1200 .print_stats = gk20a_buddy_print_stats,
1201};
1202
1203/*
1204 * Initialize a buddy allocator. Returns 0 on success. This allocator does
1205 * not necessarily manage bytes. It manages distinct ranges of resources. This
1206 * allows the allocator to work for things like comp_tags, semaphores, etc.
1207 *
1208 * @allocator: Ptr to an allocator struct to init.
1209 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
1210 * get PTE size for GVA spaces.
1211 * @name: Name of the allocator. Doesn't have to be static storage.
1212 * @base: The base address of the resource pool being managed.
1213 * @size: Number of resources in the pool.
1214 * @blk_size: Minimum number of resources to allocate at once. For things like
1215 * semaphores this is 1. For GVA this might be as much as 64k. This
1216 * corresponds to order 0. Must be power of 2.
1217 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
1218 * will try and pick a reasonable max order.
1219 * @flags: Extra flags necessary. See GPU_BALLOC_*.
1220 */
1221int __gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *__a,
1222 struct vm_gk20a *vm, const char *name,
1223 u64 base, u64 size, u64 blk_size,
1224 u64 max_order, u64 flags)
1225{
1226 int err;
1227 u64 pde_size;
1228 struct gk20a_buddy_allocator *a;
1229
1230 /* blk_size must be greater than 0 and a power of 2. */
1231 if (blk_size == 0)
1232 return -EINVAL;
1233 if (blk_size & (blk_size - 1))
1234 return -EINVAL;
1235
1236 if (max_order > GPU_BALLOC_MAX_ORDER)
1237 return -EINVAL;
1238
1239 /* If this is to manage a GVA space we need a VM. */
1240 if (flags & GPU_ALLOC_GVA_SPACE && !vm)
1241 return -EINVAL;
1242
1243 a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL);
1244 if (!a)
1245 return -ENOMEM;
1246
1247 err = __gk20a_alloc_common_init(__a, name, a, false, &buddy_ops);
1248 if (err)
1249 goto fail;
1250
1251 a->base = base;
1252 a->length = size;
1253 a->blk_size = blk_size;
1254 a->blk_shift = __ffs(blk_size);
1255 a->owner = __a;
1256
1257 /*
1258 * If base is 0 then modfy base to be the size of one block so that we
1259 * can return errors by returning addr == 0.
1260 */
1261 if (a->base == 0) {
1262 a->base = a->blk_size;
1263 a->length -= a->blk_size;
1264 }
1265
1266 a->vm = vm;
1267 if (flags & GPU_ALLOC_GVA_SPACE) {
1268 pde_size = ((u64)vm->big_page_size) << 10;
1269 a->pte_blk_order = balloc_get_order(a, pde_size);
1270 }
1271
1272 /*
1273 * When we have a GVA space with big_pages enabled the size and base
1274 * must be PDE aligned. If big_pages are not enabled then this
1275 * requirement is not necessary.
1276 */
1277 if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages &&
1278 (base & ((vm->big_page_size << 10) - 1) ||
1279 size & ((vm->big_page_size << 10) - 1)))
1280 return -EINVAL;
1281
1282 a->flags = flags;
1283 a->max_order = max_order;
1284
1285 balloc_allocator_align(a);
1286 balloc_compute_max_order(a);
1287
1288 /* Shared buddy kmem_cache for all allocators. */
1289 if (!buddy_cache)
1290 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
1291 if (!buddy_cache) {
1292 err = -ENOMEM;
1293 goto fail;
1294 }
1295
1296 a->alloced_buddies = RB_ROOT;
1297 a->fixed_allocs = RB_ROOT;
1298 INIT_LIST_HEAD(&a->co_list);
1299 err = balloc_init_lists(a);
1300 if (err)
1301 goto fail;
1302
1303 wmb();
1304 a->initialized = 1;
1305
1306 gk20a_init_alloc_debug(g, __a);
1307 alloc_dbg(__a, "New allocator: type buddy\n");
1308 alloc_dbg(__a, " base 0x%llx\n", a->base);
1309 alloc_dbg(__a, " size 0x%llx\n", a->length);
1310 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
1311 alloc_dbg(__a, " max_order %llu\n", a->max_order);
1312 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
1313
1314 return 0;
1315
1316fail:
1317 kfree(a);
1318 return err;
1319}
1320
1321int gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a,
1322 const char *name, u64 base, u64 size,
1323 u64 blk_size, u64 flags)
1324{
1325 return __gk20a_buddy_allocator_init(g, a, NULL, name,
1326 base, size, blk_size, 0, 0);
1327}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
deleted file mode 100644
index 5b011d8c..00000000
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
+++ /dev/null
@@ -1,206 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20#include <linux/atomic.h>
21
22#include "gk20a_allocator.h"
23#include "lockless_allocator_priv.h"
24
25static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a)
26{
27 struct gk20a_lockless_allocator *pa = a->priv;
28
29 return pa->length;
30}
31
32static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a)
33{
34 struct gk20a_lockless_allocator *pa = a->priv;
35
36 return pa->base;
37}
38
39static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a)
40{
41 struct gk20a_lockless_allocator *pa = a->priv;
42 int inited = pa->inited;
43
44 rmb();
45 return inited;
46}
47
48static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a)
49{
50 struct gk20a_lockless_allocator *pa = a->priv;
51
52 return pa->base + pa->length;
53}
54
55static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len)
56{
57 struct gk20a_lockless_allocator *pa = a->priv;
58 int head, new_head, ret;
59 u64 addr = 0;
60
61 if (len != pa->blk_size)
62 return 0;
63
64 head = ACCESS_ONCE(pa->head);
65 while (head >= 0) {
66 new_head = ACCESS_ONCE(pa->next[head]);
67 ret = cmpxchg(&pa->head, head, new_head);
68 if (ret == head) {
69 addr = pa->base + head * pa->blk_size;
70 atomic_inc(&pa->nr_allocs);
71 alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
72 addr);
73 break;
74 }
75 head = ACCESS_ONCE(pa->head);
76 }
77 return addr;
78}
79
80static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr)
81{
82 struct gk20a_lockless_allocator *pa = a->priv;
83 int head, ret;
84 u64 cur_idx, rem;
85
86 cur_idx = addr - pa->base;
87 rem = do_div(cur_idx, pa->blk_size);
88
89 while (1) {
90 head = ACCESS_ONCE(pa->head);
91 ACCESS_ONCE(pa->next[cur_idx]) = head;
92 ret = cmpxchg(&pa->head, head, cur_idx);
93 if (ret == head) {
94 atomic_dec(&pa->nr_allocs);
95 alloc_dbg(a, "Free node # %llu\n", cur_idx);
96 break;
97 }
98 }
99}
100
101static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a)
102{
103 struct gk20a_lockless_allocator *pa = a->priv;
104
105 gk20a_fini_alloc_debug(a);
106
107 vfree(pa->next);
108 kfree(pa);
109}
110
111static void gk20a_lockless_print_stats(struct gk20a_allocator *a,
112 struct seq_file *s, int lock)
113{
114 struct gk20a_lockless_allocator *pa = a->priv;
115
116 __alloc_pstat(s, a, "Lockless allocator params:\n");
117 __alloc_pstat(s, a, " start = 0x%llx\n", pa->base);
118 __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length);
119
120 /* Actual stats. */
121 __alloc_pstat(s, a, "Stats:\n");
122 __alloc_pstat(s, a, " Number allocs = %d\n",
123 atomic_read(&pa->nr_allocs));
124 __alloc_pstat(s, a, " Number free = %d\n",
125 pa->nr_nodes - atomic_read(&pa->nr_allocs));
126}
127
128static const struct gk20a_allocator_ops pool_ops = {
129 .alloc = gk20a_lockless_alloc,
130 .free = gk20a_lockless_free,
131
132 .base = gk20a_lockless_alloc_base,
133 .length = gk20a_lockless_alloc_length,
134 .end = gk20a_lockless_alloc_end,
135 .inited = gk20a_lockless_alloc_inited,
136
137 .fini = gk20a_lockless_alloc_destroy,
138
139 .print_stats = gk20a_lockless_print_stats,
140};
141
142int gk20a_lockless_allocator_init(struct gk20a *g, struct gk20a_allocator *__a,
143 const char *name, u64 base, u64 length,
144 u64 blk_size, u64 flags)
145{
146 int i;
147 int err;
148 int nr_nodes;
149 u64 count, rem;
150 struct gk20a_lockless_allocator *a;
151
152 if (!blk_size)
153 return -EINVAL;
154
155 /*
156 * Ensure we have space for atleast one node & there's no overflow.
157 * In order to control memory footprint, we require count < INT_MAX
158 */
159 count = length;
160 rem = do_div(count, blk_size);
161 if (!base || !count || count > INT_MAX)
162 return -EINVAL;
163
164 a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL);
165 if (!a)
166 return -ENOMEM;
167
168 err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops);
169 if (err)
170 goto fail;
171
172 a->next = vzalloc(sizeof(*a->next) * count);
173 if (!a->next) {
174 err = -ENOMEM;
175 goto fail;
176 }
177
178 /* chain the elements together to form the initial free list */
179 nr_nodes = (int)count;
180 for (i = 0; i < nr_nodes; i++)
181 a->next[i] = i + 1;
182 a->next[nr_nodes - 1] = -1;
183
184 a->base = base;
185 a->length = length;
186 a->blk_size = blk_size;
187 a->nr_nodes = nr_nodes;
188 a->flags = flags;
189 atomic_set(&a->nr_allocs, 0);
190
191 wmb();
192 a->inited = true;
193
194 gk20a_init_alloc_debug(g, __a);
195 alloc_dbg(__a, "New allocator: type lockless\n");
196 alloc_dbg(__a, " base 0x%llx\n", a->base);
197 alloc_dbg(__a, " nodes %d\n", a->nr_nodes);
198 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
199 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
200
201 return 0;
202
203fail:
204 kfree(a);
205 return err;
206}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
deleted file mode 100644
index 9717a726..00000000
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
+++ /dev/null
@@ -1,936 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/bitops.h>
20#include <linux/mm.h>
21
22#include "gk20a_allocator.h"
23#include "buddy_allocator_priv.h"
24#include "page_allocator_priv.h"
25
26#define palloc_dbg(a, fmt, arg...) \
27 alloc_dbg(palloc_owner(a), fmt, ##arg)
28
29static struct kmem_cache *page_alloc_cache;
30static struct kmem_cache *page_alloc_chunk_cache;
31static struct kmem_cache *page_alloc_slab_page_cache;
32static DEFINE_MUTEX(meta_data_cache_lock);
33
34/*
35 * Handle the book-keeping for these operations.
36 */
37static inline void add_slab_page_to_empty(struct page_alloc_slab *slab,
38 struct page_alloc_slab_page *page)
39{
40 BUG_ON(page->state != SP_NONE);
41 list_add(&page->list_entry, &slab->empty);
42 slab->nr_empty++;
43 page->state = SP_EMPTY;
44}
45static inline void add_slab_page_to_partial(struct page_alloc_slab *slab,
46 struct page_alloc_slab_page *page)
47{
48 BUG_ON(page->state != SP_NONE);
49 list_add(&page->list_entry, &slab->partial);
50 slab->nr_partial++;
51 page->state = SP_PARTIAL;
52}
53static inline void add_slab_page_to_full(struct page_alloc_slab *slab,
54 struct page_alloc_slab_page *page)
55{
56 BUG_ON(page->state != SP_NONE);
57 list_add(&page->list_entry, &slab->full);
58 slab->nr_full++;
59 page->state = SP_FULL;
60}
61
62static inline void del_slab_page_from_empty(struct page_alloc_slab *slab,
63 struct page_alloc_slab_page *page)
64{
65 list_del_init(&page->list_entry);
66 slab->nr_empty--;
67 page->state = SP_NONE;
68}
69static inline void del_slab_page_from_partial(struct page_alloc_slab *slab,
70 struct page_alloc_slab_page *page)
71{
72 list_del_init(&page->list_entry);
73 slab->nr_partial--;
74 page->state = SP_NONE;
75}
76static inline void del_slab_page_from_full(struct page_alloc_slab *slab,
77 struct page_alloc_slab_page *page)
78{
79 list_del_init(&page->list_entry);
80 slab->nr_full--;
81 page->state = SP_NONE;
82}
83
84static u64 gk20a_page_alloc_length(struct gk20a_allocator *a)
85{
86 struct gk20a_page_allocator *va = a->priv;
87
88 return gk20a_alloc_length(&va->source_allocator);
89}
90
91static u64 gk20a_page_alloc_base(struct gk20a_allocator *a)
92{
93 struct gk20a_page_allocator *va = a->priv;
94
95 return gk20a_alloc_base(&va->source_allocator);
96}
97
98static int gk20a_page_alloc_inited(struct gk20a_allocator *a)
99{
100 struct gk20a_page_allocator *va = a->priv;
101
102 return gk20a_alloc_initialized(&va->source_allocator);
103}
104
105static u64 gk20a_page_alloc_end(struct gk20a_allocator *a)
106{
107 struct gk20a_page_allocator *va = a->priv;
108
109 return gk20a_alloc_end(&va->source_allocator);
110}
111
112static u64 gk20a_page_alloc_space(struct gk20a_allocator *a)
113{
114 struct gk20a_page_allocator *va = a->priv;
115
116 return gk20a_alloc_space(&va->source_allocator);
117}
118
119static int gk20a_page_reserve_co(struct gk20a_allocator *a,
120 struct gk20a_alloc_carveout *co)
121{
122 struct gk20a_page_allocator *va = a->priv;
123
124 return gk20a_alloc_reserve_carveout(&va->source_allocator, co);
125}
126
127static void gk20a_page_release_co(struct gk20a_allocator *a,
128 struct gk20a_alloc_carveout *co)
129{
130 struct gk20a_page_allocator *va = a->priv;
131
132 gk20a_alloc_release_carveout(&va->source_allocator, co);
133}
134
135static void __gk20a_free_pages(struct gk20a_page_allocator *a,
136 struct gk20a_page_alloc *alloc,
137 bool free_buddy_alloc)
138{
139 struct page_alloc_chunk *chunk;
140
141 while (!list_empty(&alloc->alloc_chunks)) {
142 chunk = list_first_entry(&alloc->alloc_chunks,
143 struct page_alloc_chunk,
144 list_entry);
145 list_del(&chunk->list_entry);
146
147 if (free_buddy_alloc)
148 gk20a_free(&a->source_allocator, chunk->base);
149 kfree(chunk);
150 }
151
152 kfree(alloc);
153}
154
155static int __insert_page_alloc(struct gk20a_page_allocator *a,
156 struct gk20a_page_alloc *alloc)
157{
158 struct rb_node **new = &a->allocs.rb_node;
159 struct rb_node *parent = NULL;
160
161 while (*new) {
162 struct gk20a_page_alloc *tmp =
163 container_of(*new, struct gk20a_page_alloc,
164 tree_entry);
165
166 parent = *new;
167 if (alloc->base < tmp->base) {
168 new = &((*new)->rb_left);
169 } else if (alloc->base > tmp->base) {
170 new = &((*new)->rb_right);
171 } else {
172 WARN(1, "Duplicate entries in allocated list!\n");
173 return 0;
174 }
175 }
176
177 rb_link_node(&alloc->tree_entry, parent, new);
178 rb_insert_color(&alloc->tree_entry, &a->allocs);
179
180 return 0;
181}
182
183static struct gk20a_page_alloc *__find_page_alloc(
184 struct gk20a_page_allocator *a,
185 u64 addr)
186{
187 struct rb_node *node = a->allocs.rb_node;
188 struct gk20a_page_alloc *alloc;
189
190 while (node) {
191 alloc = container_of(node, struct gk20a_page_alloc, tree_entry);
192
193 if (addr < alloc->base)
194 node = node->rb_left;
195 else if (addr > alloc->base)
196 node = node->rb_right;
197 else
198 break;
199 }
200
201 if (!node)
202 return NULL;
203
204 rb_erase(node, &a->allocs);
205
206 return alloc;
207}
208
209static struct page_alloc_slab_page *alloc_slab_page(
210 struct gk20a_page_allocator *a,
211 struct page_alloc_slab *slab)
212{
213 struct page_alloc_slab_page *slab_page;
214
215 slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL);
216 if (!slab_page) {
217 palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n");
218 return ERR_PTR(-ENOMEM);
219 }
220
221 memset(slab_page, 0, sizeof(*slab_page));
222
223 slab_page->page_addr = gk20a_alloc(&a->source_allocator, a->page_size);
224 if (!slab_page->page_addr) {
225 kfree(slab_page);
226 palloc_dbg(a, "OOM: vidmem is full!\n");
227 return ERR_PTR(-ENOMEM);
228 }
229
230 INIT_LIST_HEAD(&slab_page->list_entry);
231 slab_page->slab_size = slab->slab_size;
232 slab_page->nr_objects = (u32)a->page_size / slab->slab_size;
233 slab_page->nr_objects_alloced = 0;
234 slab_page->owner = slab;
235 slab_page->state = SP_NONE;
236
237 a->pages_alloced++;
238
239 palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n",
240 slab_page->page_addr, slab_page->slab_size);
241
242 return slab_page;
243}
244
245static void free_slab_page(struct gk20a_page_allocator *a,
246 struct page_alloc_slab_page *slab_page)
247{
248 palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr);
249
250 BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) ||
251 slab_page->nr_objects_alloced != 0 ||
252 slab_page->bitmap != 0);
253
254 gk20a_free(&a->source_allocator, slab_page->page_addr);
255 a->pages_freed++;
256
257 kmem_cache_free(page_alloc_slab_page_cache, slab_page);
258}
259
260/*
261 * This expects @alloc to have 1 empty page_alloc_chunk already added to the
262 * alloc_chunks list.
263 */
264static int __do_slab_alloc(struct gk20a_page_allocator *a,
265 struct page_alloc_slab *slab,
266 struct gk20a_page_alloc *alloc)
267{
268 struct page_alloc_slab_page *slab_page = NULL;
269 struct page_alloc_chunk *chunk;
270 unsigned long offs;
271
272 /*
273 * Check the partial and empty lists to see if we have some space
274 * readily available. Take the slab_page out of what ever list it
275 * was in since it may be put back into a different list later.
276 */
277 if (!list_empty(&slab->partial)) {
278 slab_page = list_first_entry(&slab->partial,
279 struct page_alloc_slab_page,
280 list_entry);
281 del_slab_page_from_partial(slab, slab_page);
282 } else if (!list_empty(&slab->empty)) {
283 slab_page = list_first_entry(&slab->empty,
284 struct page_alloc_slab_page,
285 list_entry);
286 del_slab_page_from_empty(slab, slab_page);
287 }
288
289 if (!slab_page) {
290 slab_page = alloc_slab_page(a, slab);
291 if (IS_ERR(slab_page))
292 return PTR_ERR(slab_page);
293 }
294
295 /*
296 * We now have a slab_page. Do the alloc.
297 */
298 offs = bitmap_find_next_zero_area(&slab_page->bitmap,
299 slab_page->nr_objects,
300 0, 1, 0);
301 if (offs >= slab_page->nr_objects) {
302 WARN(1, "Empty/partial slab with no free objects?");
303
304 /* Add the buggy page to the full list... This isn't ideal. */
305 add_slab_page_to_full(slab, slab_page);
306 return -ENOMEM;
307 }
308
309 bitmap_set(&slab_page->bitmap, offs, 1);
310 slab_page->nr_objects_alloced++;
311
312 if (slab_page->nr_objects_alloced < slab_page->nr_objects)
313 add_slab_page_to_partial(slab, slab_page);
314 else if (slab_page->nr_objects_alloced == slab_page->nr_objects)
315 add_slab_page_to_full(slab, slab_page);
316 else
317 BUG(); /* Should be impossible to hit this. */
318
319 /*
320 * Handle building the gk20a_page_alloc struct. We expect one
321 * page_alloc_chunk to be present.
322 */
323 alloc->slab_page = slab_page;
324 alloc->nr_chunks = 1;
325 alloc->length = slab_page->slab_size;
326 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
327
328 chunk = list_first_entry(&alloc->alloc_chunks,
329 struct page_alloc_chunk, list_entry);
330 chunk->base = alloc->base;
331 chunk->length = alloc->length;
332
333 return 0;
334}
335
336/*
337 * Allocate from a slab instead of directly from the page allocator.
338 */
339static struct gk20a_page_alloc *__gk20a_alloc_slab(
340 struct gk20a_page_allocator *a, u64 len)
341{
342 int err, slab_nr;
343 struct page_alloc_slab *slab;
344 struct gk20a_page_alloc *alloc = NULL;
345 struct page_alloc_chunk *chunk = NULL;
346
347 /*
348 * Align the length to a page and then divide by the page size (4k for
349 * this code). ilog2() of that then gets us the correct slab to use.
350 */
351 slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12);
352 slab = &a->slabs[slab_nr];
353
354 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
355 if (!alloc) {
356 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
357 goto fail;
358 }
359 chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
360 if (!chunk) {
361 palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n");
362 goto fail;
363 }
364
365 INIT_LIST_HEAD(&alloc->alloc_chunks);
366 list_add(&chunk->list_entry, &alloc->alloc_chunks);
367
368 err = __do_slab_alloc(a, slab, alloc);
369 if (err)
370 goto fail;
371
372 palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n",
373 len, slab_nr, alloc->base);
374 a->nr_slab_allocs++;
375
376 return alloc;
377
378fail:
379 kfree(alloc);
380 kfree(chunk);
381 return NULL;
382}
383
384static void __gk20a_free_slab(struct gk20a_page_allocator *a,
385 struct gk20a_page_alloc *alloc)
386{
387 struct page_alloc_slab_page *slab_page = alloc->slab_page;
388 struct page_alloc_slab *slab = slab_page->owner;
389 enum slab_page_state new_state;
390 int offs;
391
392 offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size;
393 bitmap_clear(&slab_page->bitmap, offs, 1);
394
395 slab_page->nr_objects_alloced--;
396
397 if (slab_page->nr_objects_alloced == 0)
398 new_state = SP_EMPTY;
399 else
400 new_state = SP_PARTIAL;
401
402 /*
403 * Need to migrate the page to a different list.
404 */
405 if (new_state != slab_page->state) {
406 /* Delete - can't be in empty. */
407 if (slab_page->state == SP_PARTIAL)
408 del_slab_page_from_partial(slab, slab_page);
409 else
410 del_slab_page_from_full(slab, slab_page);
411
412 /* And add. */
413 if (new_state == SP_EMPTY) {
414 if (list_empty(&slab->empty))
415 add_slab_page_to_empty(slab, slab_page);
416 else
417 free_slab_page(a, slab_page);
418 } else {
419 add_slab_page_to_partial(slab, slab_page);
420 }
421 }
422
423 /*
424 * Now handle the page_alloc.
425 */
426 __gk20a_free_pages(a, alloc, false);
427 a->nr_slab_frees++;
428
429 return;
430}
431
432/*
433 * Allocate physical pages. Since the underlying allocator is a buddy allocator
434 * the returned pages are always contiguous. However, since there could be
435 * fragmentation in the space this allocator will collate smaller non-contiguous
436 * allocations together if necessary.
437 */
438static struct gk20a_page_alloc *__do_gk20a_alloc_pages(
439 struct gk20a_page_allocator *a, u64 pages)
440{
441 struct gk20a_page_alloc *alloc;
442 struct page_alloc_chunk *c;
443 u64 max_chunk_len = pages << a->page_shift;
444 int i = 0;
445
446 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
447 if (!alloc)
448 goto fail;
449
450 memset(alloc, 0, sizeof(*alloc));
451
452 INIT_LIST_HEAD(&alloc->alloc_chunks);
453 alloc->length = pages << a->page_shift;
454
455 while (pages) {
456 u64 chunk_addr = 0;
457 u64 chunk_pages = (u64)1 << __fls(pages);
458 u64 chunk_len = chunk_pages << a->page_shift;
459
460 /*
461 * Take care of the possibility that the allocation must be
462 * contiguous. If this is not the first iteration then that
463 * means the first iteration failed to alloc the entire
464 * requested size. The buddy allocator guarantees any given
465 * single alloc is contiguous.
466 */
467 if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0)
468 goto fail_cleanup;
469
470 if (chunk_len > max_chunk_len)
471 chunk_len = max_chunk_len;
472
473 /*
474 * Keep attempting to allocate in smaller chunks until the alloc
475 * either succeeds or is smaller than the page_size of the
476 * allocator (i.e the allocator is OOM).
477 */
478 do {
479 chunk_addr = gk20a_alloc(&a->source_allocator,
480 chunk_len);
481
482 /* Divide by 2 and try again */
483 if (!chunk_addr) {
484 palloc_dbg(a, "balloc failed: 0x%llx\n",
485 chunk_len);
486 chunk_len >>= 1;
487 max_chunk_len = chunk_len;
488 }
489 } while (!chunk_addr && chunk_len >= a->page_size);
490
491 chunk_pages = chunk_len >> a->page_shift;
492
493 if (!chunk_addr) {
494 palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len);
495 goto fail_cleanup;
496 }
497
498 c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
499 if (!c) {
500 gk20a_free(&a->source_allocator, chunk_addr);
501 goto fail_cleanup;
502 }
503
504 pages -= chunk_pages;
505
506 c->base = chunk_addr;
507 c->length = chunk_len;
508 list_add(&c->list_entry, &alloc->alloc_chunks);
509
510 i++;
511 }
512
513 alloc->nr_chunks = i;
514 c = list_first_entry(&alloc->alloc_chunks,
515 struct page_alloc_chunk, list_entry);
516 alloc->base = c->base;
517
518 return alloc;
519
520fail_cleanup:
521 while (!list_empty(&alloc->alloc_chunks)) {
522 c = list_first_entry(&alloc->alloc_chunks,
523 struct page_alloc_chunk, list_entry);
524 list_del(&c->list_entry);
525 gk20a_free(&a->source_allocator, c->base);
526 kfree(c);
527 }
528 kfree(alloc);
529fail:
530 return ERR_PTR(-ENOMEM);
531}
532
533static struct gk20a_page_alloc *__gk20a_alloc_pages(
534 struct gk20a_page_allocator *a, u64 len)
535{
536 struct gk20a_page_alloc *alloc = NULL;
537 struct page_alloc_chunk *c;
538 u64 pages;
539 int i = 0;
540
541 pages = ALIGN(len, a->page_size) >> a->page_shift;
542
543 alloc = __do_gk20a_alloc_pages(a, pages);
544 if (IS_ERR(alloc)) {
545 palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
546 pages << a->page_shift, pages);
547 return NULL;
548 }
549
550 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
551 pages << a->page_shift, pages, alloc->base);
552 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
553 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
554 i++, c->base, c->length);
555 }
556
557 return alloc;
558}
559
560/*
561 * Allocate enough pages to satisfy @len. Page size is determined at
562 * initialization of the allocator.
563 *
564 * The return is actually a pointer to a struct gk20a_page_alloc pointer. This
565 * is because it doesn't make a lot of sense to return the address of the first
566 * page in the list of pages (since they could be discontiguous). This has
567 * precedent in the dma_alloc APIs, though, it's really just an annoying
568 * artifact of the fact that the gk20a_alloc() API requires a u64 return type.
569 */
570static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
571{
572 struct gk20a_page_allocator *a = page_allocator(__a);
573 struct gk20a_page_alloc *alloc = NULL;
574 u64 real_len;
575
576 /*
577 * If we want contig pages we have to round up to a power of two. It's
578 * easier to do that here than in the buddy allocator.
579 */
580 real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
581 roundup_pow_of_two(len) : len;
582
583 alloc_lock(__a);
584 if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES &&
585 real_len <= (a->page_size / 2))
586 alloc = __gk20a_alloc_slab(a, real_len);
587 else
588 alloc = __gk20a_alloc_pages(a, real_len);
589
590 if (!alloc) {
591 alloc_unlock(__a);
592 return 0;
593 }
594
595 __insert_page_alloc(a, alloc);
596
597 a->nr_allocs++;
598 if (real_len > a->page_size / 2)
599 a->pages_alloced += alloc->length >> a->page_shift;
600 alloc_unlock(__a);
601
602 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
603 return alloc->base;
604 else
605 return (u64) (uintptr_t) alloc;
606}
607
608/*
609 * Note: this will remove the gk20a_page_alloc struct from the RB tree
610 * if it's found.
611 */
612static void gk20a_page_free(struct gk20a_allocator *__a, u64 base)
613{
614 struct gk20a_page_allocator *a = page_allocator(__a);
615 struct gk20a_page_alloc *alloc;
616
617 alloc_lock(__a);
618
619 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
620 alloc = __find_page_alloc(a, base);
621 else
622 alloc = __find_page_alloc(a,
623 ((struct gk20a_page_alloc *)(uintptr_t)base)->base);
624
625 if (!alloc) {
626 palloc_dbg(a, "Hrm, found no alloc?\n");
627 goto done;
628 }
629
630 a->nr_frees++;
631
632 palloc_dbg(a, "Free 0x%llx id=0x%010llx\n",
633 alloc->length, alloc->base);
634
635 /*
636 * Frees *alloc.
637 */
638 if (alloc->slab_page) {
639 __gk20a_free_slab(a, alloc);
640 } else {
641 a->pages_freed += (alloc->length >> a->page_shift);
642 __gk20a_free_pages(a, alloc, true);
643 }
644
645done:
646 alloc_unlock(__a);
647}
648
649static struct gk20a_page_alloc *__gk20a_alloc_pages_fixed(
650 struct gk20a_page_allocator *a, u64 base, u64 length)
651{
652 struct gk20a_page_alloc *alloc;
653 struct page_alloc_chunk *c;
654
655 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
656 c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
657 if (!alloc || !c)
658 goto fail;
659
660 alloc->base = gk20a_alloc_fixed(&a->source_allocator, base, length);
661 if (!alloc->base) {
662 WARN(1, "gk20a: failed to fixed alloc pages @ 0x%010llx", base);
663 goto fail;
664 }
665
666 alloc->nr_chunks = 1;
667 alloc->length = length;
668 INIT_LIST_HEAD(&alloc->alloc_chunks);
669
670 c->base = alloc->base;
671 c->length = length;
672 list_add(&c->list_entry, &alloc->alloc_chunks);
673
674 return alloc;
675
676fail:
677 kfree(c);
678 kfree(alloc);
679 return ERR_PTR(-ENOMEM);
680}
681
682static u64 gk20a_page_alloc_fixed(struct gk20a_allocator *__a,
683 u64 base, u64 len)
684{
685 struct gk20a_page_allocator *a = page_allocator(__a);
686 struct gk20a_page_alloc *alloc = NULL;
687 struct page_alloc_chunk *c;
688 u64 aligned_len, pages;
689 int i = 0;
690
691 aligned_len = ALIGN(len, a->page_size);
692 pages = aligned_len >> a->page_shift;
693
694 alloc_lock(__a);
695
696 alloc = __gk20a_alloc_pages_fixed(a, base, aligned_len);
697 if (IS_ERR(alloc)) {
698 alloc_unlock(__a);
699 return 0;
700 }
701
702 __insert_page_alloc(a, alloc);
703 alloc_unlock(__a);
704
705 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
706 alloc->base, aligned_len, pages);
707 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
708 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
709 i++, c->base, c->length);
710 }
711
712 a->nr_fixed_allocs++;
713 a->pages_alloced += pages;
714
715 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
716 return alloc->base;
717 else
718 return (u64) (uintptr_t) alloc;
719}
720
721static void gk20a_page_free_fixed(struct gk20a_allocator *__a,
722 u64 base, u64 len)
723{
724 struct gk20a_page_allocator *a = page_allocator(__a);
725 struct gk20a_page_alloc *alloc;
726
727 alloc_lock(__a);
728
729 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
730 alloc = __find_page_alloc(a, base);
731 if (!alloc)
732 goto done;
733 } else {
734 alloc = (struct gk20a_page_alloc *) (uintptr_t) base;
735 }
736
737 palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n",
738 alloc->base, alloc->length);
739
740 a->nr_fixed_frees++;
741 a->pages_freed += (alloc->length >> a->page_shift);
742
743 /*
744 * This works for the time being since the buddy allocator
745 * uses the same free function for both fixed and regular
746 * allocs. This would have to be updated if the underlying
747 * allocator were to change.
748 */
749 __gk20a_free_pages(a, alloc, true);
750
751done:
752 alloc_unlock(__a);
753}
754
755static void gk20a_page_allocator_destroy(struct gk20a_allocator *__a)
756{
757 struct gk20a_page_allocator *a = page_allocator(__a);
758
759 alloc_lock(__a);
760 kfree(a);
761 __a->priv = NULL;
762 alloc_unlock(__a);
763}
764
765static void gk20a_page_print_stats(struct gk20a_allocator *__a,
766 struct seq_file *s, int lock)
767{
768 struct gk20a_page_allocator *a = page_allocator(__a);
769 int i;
770
771 if (lock)
772 alloc_lock(__a);
773
774 __alloc_pstat(s, __a, "Page allocator:\n");
775 __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs);
776 __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees);
777 __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs);
778 __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees);
779 __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs);
780 __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees);
781 __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced);
782 __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed);
783 __alloc_pstat(s, __a, "\n");
784
785 /*
786 * Slab info.
787 */
788 if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) {
789 __alloc_pstat(s, __a, "Slabs:\n");
790 __alloc_pstat(s, __a, " size empty partial full\n");
791 __alloc_pstat(s, __a, " ---- ----- ------- ----\n");
792
793 for (i = 0; i < a->nr_slabs; i++) {
794 struct page_alloc_slab *slab = &a->slabs[i];
795
796 __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n",
797 slab->slab_size,
798 slab->nr_empty, slab->nr_partial,
799 slab->nr_full);
800 }
801 __alloc_pstat(s, __a, "\n");
802 }
803
804 __alloc_pstat(s, __a, "Source alloc: %s\n",
805 a->source_allocator.name);
806 gk20a_alloc_print_stats(&a->source_allocator, s, lock);
807
808 if (lock)
809 alloc_unlock(__a);
810}
811
812static const struct gk20a_allocator_ops page_ops = {
813 .alloc = gk20a_page_alloc,
814 .free = gk20a_page_free,
815
816 .alloc_fixed = gk20a_page_alloc_fixed,
817 .free_fixed = gk20a_page_free_fixed,
818
819 .reserve_carveout = gk20a_page_reserve_co,
820 .release_carveout = gk20a_page_release_co,
821
822 .base = gk20a_page_alloc_base,
823 .length = gk20a_page_alloc_length,
824 .end = gk20a_page_alloc_end,
825 .inited = gk20a_page_alloc_inited,
826 .space = gk20a_page_alloc_space,
827
828 .fini = gk20a_page_allocator_destroy,
829
830 .print_stats = gk20a_page_print_stats,
831};
832
833/*
834 * nr_slabs is computed as follows: divide page_size by 4096 to get number of
835 * 4k pages in page_size. Then take the base 2 log of that to get number of
836 * slabs. For 64k page_size that works on like:
837 *
838 * 1024*64 / 1024*4 = 16
839 * ilog2(16) = 4
840 *
841 * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k).
842 */
843static int gk20a_page_alloc_init_slabs(struct gk20a_page_allocator *a)
844{
845 size_t nr_slabs = ilog2(a->page_size >> 12);
846 unsigned int i;
847
848 a->slabs = kcalloc(nr_slabs,
849 sizeof(struct page_alloc_slab),
850 GFP_KERNEL);
851 if (!a->slabs)
852 return -ENOMEM;
853 a->nr_slabs = nr_slabs;
854
855 for (i = 0; i < nr_slabs; i++) {
856 struct page_alloc_slab *slab = &a->slabs[i];
857
858 slab->slab_size = SZ_4K * (1 << i);
859 INIT_LIST_HEAD(&slab->empty);
860 INIT_LIST_HEAD(&slab->partial);
861 INIT_LIST_HEAD(&slab->full);
862 slab->nr_empty = 0;
863 slab->nr_partial = 0;
864 slab->nr_full = 0;
865 }
866
867 return 0;
868}
869
870int gk20a_page_allocator_init(struct gk20a *g, struct gk20a_allocator *__a,
871 const char *name, u64 base, u64 length,
872 u64 blk_size, u64 flags)
873{
874 struct gk20a_page_allocator *a;
875 char buddy_name[sizeof(__a->name)];
876 int err;
877
878 mutex_lock(&meta_data_cache_lock);
879 if (!page_alloc_cache)
880 page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0);
881 if (!page_alloc_chunk_cache)
882 page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0);
883 if (!page_alloc_slab_page_cache)
884 page_alloc_slab_page_cache =
885 KMEM_CACHE(page_alloc_slab_page, 0);
886 mutex_unlock(&meta_data_cache_lock);
887
888 if (!page_alloc_cache || !page_alloc_chunk_cache)
889 return -ENOMEM;
890
891 if (blk_size < SZ_4K)
892 return -EINVAL;
893
894 a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL);
895 if (!a)
896 return -ENOMEM;
897
898 err = __gk20a_alloc_common_init(__a, name, a, false, &page_ops);
899 if (err)
900 goto fail;
901
902 a->base = base;
903 a->length = length;
904 a->page_size = blk_size;
905 a->page_shift = __ffs(blk_size);
906 a->allocs = RB_ROOT;
907 a->owner = __a;
908 a->flags = flags;
909
910 if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) {
911 err = gk20a_page_alloc_init_slabs(a);
912 if (err)
913 goto fail;
914 }
915
916 snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
917
918 err = gk20a_buddy_allocator_init(g, &a->source_allocator, buddy_name,
919 base, length, blk_size, 0);
920 if (err)
921 goto fail;
922
923 gk20a_init_alloc_debug(g, __a);
924 palloc_dbg(a, "New allocator: type page\n");
925 palloc_dbg(a, " base 0x%llx\n", a->base);
926 palloc_dbg(a, " size 0x%llx\n", a->length);
927 palloc_dbg(a, " page_size 0x%llx\n", a->page_size);
928 palloc_dbg(a, " flags 0x%llx\n", a->flags);
929 palloc_dbg(a, " slabs: %d\n", a->nr_slabs);
930
931 return 0;
932
933fail:
934 kfree(a);
935 return err;
936}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 39562ec1..2ee2dd43 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3400,7 +3400,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3400 gr->ctx_vars.local_golden_image = NULL; 3400 gr->ctx_vars.local_golden_image = NULL;
3401 3401
3402 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) 3402 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
3403 nvgpu_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); 3403 nvgpu_kfree(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3404 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; 3404 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3405 3405
3406 gk20a_comptag_allocator_destroy(&gr->comp_tags); 3406 gk20a_comptag_allocator_destroy(&gr->comp_tags);
@@ -7998,7 +7998,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7998 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; 7998 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
7999 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); 7999 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
8000 8000
8001 map = nvgpu_alloc(map_size, true); 8001 map = nvgpu_kalloc(map_size, true);
8002 if (!map) 8002 if (!map)
8003 return -ENOMEM; 8003 return -ENOMEM;
8004 8004
@@ -8088,7 +8088,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8088 return 0; 8088 return 0;
8089cleanup: 8089cleanup:
8090 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); 8090 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
8091 nvgpu_free(map); 8091 nvgpu_kfree(map);
8092 return -EINVAL; 8092 return -EINVAL;
8093} 8093}
8094 8094
diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
deleted file mode 100644
index f9b03e0e..00000000
--- a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
+++ /dev/null
@@ -1,121 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17/*
18 * Basics:
19 *
20 * - Lockless memory allocator for fixed-size structures, whose
21 * size is defined up front at init time.
22 * - Memory footprint scales linearly w/ the number of structures in
23 * the pool. It is ~= sizeof(int) * N.
24 * - Memory is pre-allocated by the client. The allocator itself
25 * only computes the addresses for allocations.
26 * - Limit of MAX_INT nodes that the allocator can be responsible for.
27 *
28 * Implementation details:
29 *
30 * The allocator maintains a single list of free nodes. We allocate &
31 * free nodes from the head of the list. We rely on the cmpxchg() operator
32 * to maintain atomicity on the head.
33 *
34 * So, both allocs & frees are O(1)!!
35 *
36 * -- Definitions --
37 * Block Size - size of a single structure that this allocator will
38 * allocate.
39 * Node - one of the elements of size blk_size in the
40 * client-allocated buffer.
41 * Node Index - zero-based index of a node in the client-allocated
42 * contiguous buffer.
43 *
44 * -- Initial State --
45 * We maintain the following to track the state of the free list:
46 *
47 * 1) A "head" index to track the index of the first free node in the list
48 * 2) A "next" array to track the index of the next free node in the list
49 * for every node. So next[head], will give the index to the 2nd free
50 * element in the list.
51 *
52 * So, to begin with, the free list consists of all node indices, and each
53 * position in the next array contains index N + 1:
54 *
55 * head = 0
56 * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
57 * free_list = 0->1->2->3->4->-1
58 *
59 * -- Allocations --
60 * 1) Read the current head (aka acq_head)
61 * 2) Read next[acq_head], to get the 2nd free element (aka new_head)
62 * 3) cmp_xchg(&head, acq_head, new_head)
63 * 4) If it succeeds, compute the address of the node, based on
64 * base address, blk_size, & acq_head.
65 *
66 * head = 1;
67 * next = [1, 2, 3, 4, -1] : Example after allocating Node #0
68 * free_list = 1->2->3->4->-1
69 *
70 * head = 2;
71 * next = [1, 2, 3, 4, -1] : Example after allocating Node #1
72 * free_list = 2->3->4->-1
73 *
74 * -- Frees --
75 * 1) Based on the address to be freed, calculate the index of the node
76 * being freed (cur_idx)
77 * 2) Read the current head (old_head)
78 * 3) So the freed node is going to go at the head of the list, and we
79 * want to put the old_head after it. So next[cur_idx] = old_head
80 * 4) cmpxchg(head, old_head, cur_idx)
81 *
82 * head = 0
83 * next = [2, 2, 3, 4, -1]
84 * free_list = 0->2->3->4->-1 : Example after freeing Node #0
85 *
86 * head = 1
87 * next = [2, 0, 3, 4, -1]
88 * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
89 */
90
91#ifndef LOCKLESS_ALLOCATOR_PRIV_H
92#define LOCKLESS_ALLOCATOR_PRIV_H
93
94struct gk20a_allocator;
95
96struct gk20a_lockless_allocator {
97 struct gk20a_allocator *owner;
98
99 u64 base; /* Base address of the space. */
100 u64 length; /* Length of the space. */
101 u64 blk_size; /* Size of the structure being allocated */
102 int nr_nodes; /* Number of nodes available for allocation */
103
104 int *next; /* An array holding the next indices per node */
105 int head; /* Current node at the top of the stack */
106
107 u64 flags;
108
109 bool inited;
110
111 /* Statistics */
112 atomic_t nr_allocs;
113};
114
115static inline struct gk20a_lockless_allocator *lockless_allocator(
116 struct gk20a_allocator *a)
117{
118 return (struct gk20a_lockless_allocator *)(a)->priv;
119}
120
121#endif
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 2e338fef..d594a5a4 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -31,9 +31,9 @@
31#include <uapi/linux/nvgpu.h> 31#include <uapi/linux/nvgpu.h>
32#include <trace/events/gk20a.h> 32#include <trace/events/gk20a.h>
33 33
34#include <gk20a/page_allocator_priv.h>
35
36#include <nvgpu/timers.h> 34#include <nvgpu/timers.h>
35#include <nvgpu/allocator.h>
36#include <nvgpu/page_allocator.h>
37 37
38#include "gk20a.h" 38#include "gk20a.h"
39#include "mm_gk20a.h" 39#include "mm_gk20a.h"
@@ -74,7 +74,7 @@ is_vidmem_page_alloc(u64 addr)
74 return !!(addr & 1ULL); 74 return !!(addr & 1ULL);
75} 75}
76 76
77static inline struct gk20a_page_alloc * 77static inline struct nvgpu_page_alloc *
78get_vidmem_page_alloc(struct scatterlist *sgl) 78get_vidmem_page_alloc(struct scatterlist *sgl)
79{ 79{
80 u64 addr; 80 u64 addr;
@@ -86,7 +86,7 @@ get_vidmem_page_alloc(struct scatterlist *sgl)
86 else 86 else
87 WARN_ON(1); 87 WARN_ON(1);
88 88
89 return (struct gk20a_page_alloc *)(uintptr_t)addr; 89 return (struct nvgpu_page_alloc *)(uintptr_t)addr;
90} 90}
91 91
92int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) 92int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
@@ -176,7 +176,7 @@ typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words,
176static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, 176static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem,
177 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 177 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
178{ 178{
179 struct gk20a_page_alloc *alloc = NULL; 179 struct nvgpu_page_alloc *alloc = NULL;
180 struct page_alloc_chunk *chunk = NULL; 180 struct page_alloc_chunk *chunk = NULL;
181 u32 byteoff, start_reg, until_end, n; 181 u32 byteoff, start_reg, until_end, n;
182 182
@@ -797,8 +797,8 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block)
797static void gk20a_vidmem_destroy(struct gk20a *g) 797static void gk20a_vidmem_destroy(struct gk20a *g)
798{ 798{
799#if defined(CONFIG_GK20A_VIDMEM) 799#if defined(CONFIG_GK20A_VIDMEM)
800 if (gk20a_alloc_initialized(&g->mm.vidmem.allocator)) 800 if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
801 gk20a_alloc_destroy(&g->mm.vidmem.allocator); 801 nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
802#endif 802#endif
803} 803}
804 804
@@ -928,8 +928,8 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
928 u64 default_page_size = SZ_64K; 928 u64 default_page_size = SZ_64K;
929 int err; 929 int err;
930 930
931 static struct gk20a_alloc_carveout wpr_co = 931 static struct nvgpu_alloc_carveout wpr_co =
932 GK20A_CARVEOUT("wpr-region", 0, SZ_16M); 932 NVGPU_CARVEOUT("wpr-region", 0, SZ_16M);
933 933
934 if (!size) 934 if (!size)
935 return 0; 935 return 0;
@@ -944,12 +944,12 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
944 * initialization requires vidmem but we want to use the CE to zero 944 * initialization requires vidmem but we want to use the CE to zero
945 * out vidmem before allocating it... 945 * out vidmem before allocating it...
946 */ 946 */
947 err = gk20a_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator, 947 err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator,
948 "vidmem-bootstrap", 948 "vidmem-bootstrap",
949 bootstrap_base, bootstrap_size, 949 bootstrap_base, bootstrap_size,
950 SZ_4K, 0); 950 SZ_4K, 0);
951 951
952 err = gk20a_page_allocator_init(g, &g->mm.vidmem.allocator, 952 err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator,
953 "vidmem", 953 "vidmem",
954 base, size - base, 954 base, size - base,
955 default_page_size, 955 default_page_size,
@@ -961,7 +961,7 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
961 } 961 }
962 962
963 /* Reserve bootstrap region in vidmem allocator */ 963 /* Reserve bootstrap region in vidmem allocator */
964 gk20a_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co); 964 nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co);
965 965
966 mm->vidmem.base = base; 966 mm->vidmem.base = base;
967 mm->vidmem.size = size - base; 967 mm->vidmem.size = size - base;
@@ -1482,7 +1482,7 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
1482 1482
1483 mutex_lock(&vm->update_gmmu_lock); 1483 mutex_lock(&vm->update_gmmu_lock);
1484 1484
1485 buffer_list = nvgpu_alloc(sizeof(*buffer_list) * 1485 buffer_list = nvgpu_kalloc(sizeof(*buffer_list) *
1486 vm->num_user_mapped_buffers, true); 1486 vm->num_user_mapped_buffers, true);
1487 if (!buffer_list) { 1487 if (!buffer_list) {
1488 mutex_unlock(&vm->update_gmmu_lock); 1488 mutex_unlock(&vm->update_gmmu_lock);
@@ -1567,7 +1567,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
1567 gk20a_vm_mapping_batch_finish_locked(vm, &batch); 1567 gk20a_vm_mapping_batch_finish_locked(vm, &batch);
1568 mutex_unlock(&vm->update_gmmu_lock); 1568 mutex_unlock(&vm->update_gmmu_lock);
1569 1569
1570 nvgpu_free(mapped_buffers); 1570 nvgpu_kfree(mapped_buffers);
1571} 1571}
1572 1572
1573static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, 1573static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
@@ -1623,7 +1623,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
1623 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 1623 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
1624 1624
1625{ 1625{
1626 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 1626 struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx];
1627 u64 offset; 1627 u64 offset;
1628 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 1628 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
1629 1629
@@ -1645,7 +1645,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
1645 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 1645 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
1646 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); 1646 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
1647 1647
1648 offset = gk20a_alloc(vma, size); 1648 offset = nvgpu_alloc(vma, size);
1649 if (!offset) { 1649 if (!offset) {
1650 gk20a_err(dev_from_vm(vm), 1650 gk20a_err(dev_from_vm(vm),
1651 "%s oom: sz=0x%llx", vma->name, size); 1651 "%s oom: sz=0x%llx", vma->name, size);
@@ -1660,11 +1660,11 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1660 u64 offset, u64 size, 1660 u64 offset, u64 size,
1661 enum gmmu_pgsz_gk20a pgsz_idx) 1661 enum gmmu_pgsz_gk20a pgsz_idx)
1662{ 1662{
1663 struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; 1663 struct nvgpu_allocator *vma = &vm->vma[pgsz_idx];
1664 1664
1665 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 1665 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
1666 vma->name, offset, size); 1666 vma->name, offset, size);
1667 gk20a_free(vma, offset); 1667 nvgpu_free(vma, offset);
1668 1668
1669 return 0; 1669 return 0;
1670} 1670}
@@ -2302,15 +2302,15 @@ err_kfree:
2302int gk20a_vidmem_get_space(struct gk20a *g, u64 *space) 2302int gk20a_vidmem_get_space(struct gk20a *g, u64 *space)
2303{ 2303{
2304#if defined(CONFIG_GK20A_VIDMEM) 2304#if defined(CONFIG_GK20A_VIDMEM)
2305 struct gk20a_allocator *allocator = &g->mm.vidmem.allocator; 2305 struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator;
2306 2306
2307 gk20a_dbg_fn(""); 2307 gk20a_dbg_fn("");
2308 2308
2309 if (!gk20a_alloc_initialized(allocator)) 2309 if (!nvgpu_alloc_initialized(allocator))
2310 return -ENOSYS; 2310 return -ENOSYS;
2311 2311
2312 mutex_lock(&g->mm.vidmem.clear_list_mutex); 2312 mutex_lock(&g->mm.vidmem.clear_list_mutex);
2313 *space = gk20a_alloc_space(allocator) + 2313 *space = nvgpu_alloc_space(allocator) +
2314 atomic64_read(&g->mm.vidmem.bytes_pending); 2314 atomic64_read(&g->mm.vidmem.bytes_pending);
2315 mutex_unlock(&g->mm.vidmem.clear_list_mutex); 2315 mutex_unlock(&g->mm.vidmem.clear_list_mutex);
2316 return 0; 2316 return 0;
@@ -2359,7 +2359,7 @@ static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl,
2359 u64 buf_addr; 2359 u64 buf_addr;
2360 2360
2361 if (aperture == APERTURE_VIDMEM) { 2361 if (aperture == APERTURE_VIDMEM) {
2362 struct gk20a_page_alloc *alloc = get_vidmem_page_alloc(sgl); 2362 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
2363 struct page_alloc_chunk *chunk = NULL; 2363 struct page_alloc_chunk *chunk = NULL;
2364 2364
2365 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { 2365 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
@@ -3068,7 +3068,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem)
3068{ 3068{
3069 struct gk20a_fence *gk20a_fence_out = NULL; 3069 struct gk20a_fence *gk20a_fence_out = NULL;
3070 struct gk20a_fence *gk20a_last_fence = NULL; 3070 struct gk20a_fence *gk20a_last_fence = NULL;
3071 struct gk20a_page_alloc *alloc = NULL; 3071 struct nvgpu_page_alloc *alloc = NULL;
3072 struct page_alloc_chunk *chunk = NULL; 3072 struct page_alloc_chunk *chunk = NULL;
3073 int err = 0; 3073 int err = 0;
3074 3074
@@ -3134,15 +3134,15 @@ int gk20a_gmmu_alloc_attr_vid(struct gk20a *g, enum dma_attr attr,
3134} 3134}
3135 3135
3136#if defined(CONFIG_GK20A_VIDMEM) 3136#if defined(CONFIG_GK20A_VIDMEM)
3137static u64 __gk20a_gmmu_alloc(struct gk20a_allocator *allocator, dma_addr_t at, 3137static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
3138 size_t size) 3138 size_t size)
3139{ 3139{
3140 u64 addr = 0; 3140 u64 addr = 0;
3141 3141
3142 if (at) 3142 if (at)
3143 addr = gk20a_alloc_fixed(allocator, at, size); 3143 addr = nvgpu_alloc_fixed(allocator, at, size);
3144 else 3144 else
3145 addr = gk20a_alloc(allocator, size); 3145 addr = nvgpu_alloc(allocator, size);
3146 3146
3147 return addr; 3147 return addr;
3148} 3148}
@@ -3154,14 +3154,14 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
3154#if defined(CONFIG_GK20A_VIDMEM) 3154#if defined(CONFIG_GK20A_VIDMEM)
3155 u64 addr; 3155 u64 addr;
3156 int err; 3156 int err;
3157 struct gk20a_allocator *vidmem_alloc = g->mm.vidmem.cleared ? 3157 struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
3158 &g->mm.vidmem.allocator : 3158 &g->mm.vidmem.allocator :
3159 &g->mm.vidmem.bootstrap_allocator; 3159 &g->mm.vidmem.bootstrap_allocator;
3160 int before_pending; 3160 int before_pending;
3161 3161
3162 gk20a_dbg_fn(""); 3162 gk20a_dbg_fn("");
3163 3163
3164 if (!gk20a_alloc_initialized(&g->mm.vidmem.allocator)) 3164 if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
3165 return -ENOSYS; 3165 return -ENOSYS;
3166 3166
3167 /* we don't support dma attributes here, except that kernel mappings 3167 /* we don't support dma attributes here, except that kernel mappings
@@ -3214,7 +3214,7 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
3214fail_kfree: 3214fail_kfree:
3215 kfree(mem->sgt); 3215 kfree(mem->sgt);
3216fail_physfree: 3216fail_physfree:
3217 gk20a_free(&g->mm.vidmem.allocator, addr); 3217 nvgpu_free(&g->mm.vidmem.allocator, addr);
3218 return err; 3218 return err;
3219#else 3219#else
3220 return -ENOSYS; 3220 return -ENOSYS;
@@ -3241,7 +3241,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr,
3241 } 3241 }
3242 } else { 3242 } else {
3243 gk20a_memset(g, mem, 0, 0, mem->size); 3243 gk20a_memset(g, mem, 0, 0, mem->size);
3244 gk20a_free(mem->allocator, 3244 nvgpu_free(mem->allocator,
3245 (u64)get_vidmem_page_alloc(mem->sgt->sgl)); 3245 (u64)get_vidmem_page_alloc(mem->sgt->sgl));
3246 gk20a_free_sgtable(&mem->sgt); 3246 gk20a_free_sgtable(&mem->sgt);
3247 3247
@@ -3276,7 +3276,7 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
3276u64 gk20a_mem_get_base_addr(struct gk20a *g, struct mem_desc *mem, 3276u64 gk20a_mem_get_base_addr(struct gk20a *g, struct mem_desc *mem,
3277 u32 flags) 3277 u32 flags)
3278{ 3278{
3279 struct gk20a_page_alloc *alloc; 3279 struct nvgpu_page_alloc *alloc;
3280 u64 addr; 3280 u64 addr;
3281 3281
3282 if (mem->aperture == APERTURE_VIDMEM) { 3282 if (mem->aperture == APERTURE_VIDMEM) {
@@ -3317,7 +3317,7 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
3317 3317
3318 while ((mem = get_pending_mem_desc(mm)) != NULL) { 3318 while ((mem = get_pending_mem_desc(mm)) != NULL) {
3319 gk20a_gmmu_clear_vidmem_mem(g, mem); 3319 gk20a_gmmu_clear_vidmem_mem(g, mem);
3320 gk20a_free(mem->allocator, 3320 nvgpu_free(mem->allocator,
3321 (u64)get_vidmem_page_alloc(mem->sgt->sgl)); 3321 (u64)get_vidmem_page_alloc(mem->sgt->sgl));
3322 gk20a_free_sgtable(&mem->sgt); 3322 gk20a_free_sgtable(&mem->sgt);
3323 3323
@@ -3905,7 +3905,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3905 u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; 3905 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
3906 int err; 3906 int err;
3907 struct scatterlist *sgl = NULL; 3907 struct scatterlist *sgl = NULL;
3908 struct gk20a_page_alloc *alloc = NULL; 3908 struct nvgpu_page_alloc *alloc = NULL;
3909 struct page_alloc_chunk *chunk = NULL; 3909 struct page_alloc_chunk *chunk = NULL;
3910 u64 length; 3910 u64 length;
3911 3911
@@ -4251,12 +4251,12 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
4251 * 4251 *
4252 * !!! TODO: cleanup. 4252 * !!! TODO: cleanup.
4253 */ 4253 */
4254 sema_sea->gpu_va = gk20a_alloc_fixed(&vm->vma[gmmu_page_size_kernel], 4254 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel],
4255 vm->va_limit - 4255 vm->va_limit -
4256 mm->channel.kernel_size, 4256 mm->channel.kernel_size,
4257 512 * PAGE_SIZE); 4257 512 * PAGE_SIZE);
4258 if (!sema_sea->gpu_va) { 4258 if (!sema_sea->gpu_va) {
4259 gk20a_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); 4259 nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va);
4260 gk20a_vm_put(vm); 4260 gk20a_vm_put(vm);
4261 return -ENOMEM; 4261 return -ENOMEM;
4262 } 4262 }
@@ -4264,7 +4264,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
4264 err = gk20a_semaphore_pool_map(vm->sema_pool, vm); 4264 err = gk20a_semaphore_pool_map(vm->sema_pool, vm);
4265 if (err) { 4265 if (err) {
4266 gk20a_semaphore_pool_unmap(vm->sema_pool, vm); 4266 gk20a_semaphore_pool_unmap(vm->sema_pool, vm);
4267 gk20a_free(&vm->vma[gmmu_page_size_small], 4267 nvgpu_free(&vm->vma[gmmu_page_size_small],
4268 vm->sema_pool->gpu_va); 4268 vm->sema_pool->gpu_va);
4269 gk20a_vm_put(vm); 4269 gk20a_vm_put(vm);
4270 } 4270 }
@@ -4387,7 +4387,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4387 snprintf(alloc_name, sizeof(alloc_name), 4387 snprintf(alloc_name, sizeof(alloc_name),
4388 "gk20a_%s-fixed", name); 4388 "gk20a_%s-fixed", name);
4389 4389
4390 err = __gk20a_buddy_allocator_init(g, &vm->fixed, 4390 err = __nvgpu_buddy_allocator_init(g, &vm->fixed,
4391 vm, alloc_name, 4391 vm, alloc_name,
4392 small_vma_start, 4392 small_vma_start,
4393 g->separate_fixed_allocs, 4393 g->separate_fixed_allocs,
@@ -4404,7 +4404,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4404 if (small_vma_start < small_vma_limit) { 4404 if (small_vma_start < small_vma_limit) {
4405 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 4405 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
4406 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); 4406 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
4407 err = __gk20a_buddy_allocator_init( 4407 err = __nvgpu_buddy_allocator_init(
4408 g, 4408 g,
4409 &vm->vma[gmmu_page_size_small], 4409 &vm->vma[gmmu_page_size_small],
4410 vm, alloc_name, 4410 vm, alloc_name,
@@ -4420,7 +4420,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4420 if (large_vma_start < large_vma_limit) { 4420 if (large_vma_start < large_vma_limit) {
4421 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 4421 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
4422 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); 4422 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
4423 err = __gk20a_buddy_allocator_init( 4423 err = __nvgpu_buddy_allocator_init(
4424 g, 4424 g,
4425 &vm->vma[gmmu_page_size_big], 4425 &vm->vma[gmmu_page_size_big],
4426 vm, alloc_name, 4426 vm, alloc_name,
@@ -4438,7 +4438,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4438 /* 4438 /*
4439 * kernel reserved VMA is at the end of the aperture 4439 * kernel reserved VMA is at the end of the aperture
4440 */ 4440 */
4441 err = __gk20a_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], 4441 err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel],
4442 vm, alloc_name, 4442 vm, alloc_name,
4443 kernel_vma_start, 4443 kernel_vma_start,
4444 kernel_vma_limit - kernel_vma_start, 4444 kernel_vma_limit - kernel_vma_start,
@@ -4469,10 +4469,10 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4469 4469
4470clean_up_big_allocator: 4470clean_up_big_allocator:
4471 if (large_vma_start < large_vma_limit) 4471 if (large_vma_start < large_vma_limit)
4472 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); 4472 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]);
4473clean_up_small_allocator: 4473clean_up_small_allocator:
4474 if (small_vma_start < small_vma_limit) 4474 if (small_vma_start < small_vma_limit)
4475 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); 4475 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4476clean_up_ptes: 4476clean_up_ptes:
4477 free_gmmu_pages(vm, &vm->pdb); 4477 free_gmmu_pages(vm, &vm->pdb);
4478clean_up_pdes: 4478clean_up_pdes:
@@ -4547,7 +4547,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
4547{ 4547{
4548 int err = -ENOMEM; 4548 int err = -ENOMEM;
4549 int pgsz_idx = gmmu_page_size_small; 4549 int pgsz_idx = gmmu_page_size_small;
4550 struct gk20a_allocator *vma; 4550 struct nvgpu_allocator *vma;
4551 struct vm_gk20a *vm = as_share->vm; 4551 struct vm_gk20a *vm = as_share->vm;
4552 struct gk20a *g = vm->mm->g; 4552 struct gk20a *g = vm->mm->g;
4553 struct vm_reserved_va_node *va_node; 4553 struct vm_reserved_va_node *va_node;
@@ -4579,13 +4579,13 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
4579 4579
4580 vma = &vm->vma[pgsz_idx]; 4580 vma = &vm->vma[pgsz_idx];
4581 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { 4581 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
4582 if (gk20a_alloc_initialized(&vm->fixed)) 4582 if (nvgpu_alloc_initialized(&vm->fixed))
4583 vma = &vm->fixed; 4583 vma = &vm->fixed;
4584 vaddr_start = gk20a_alloc_fixed(vma, args->o_a.offset, 4584 vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset,
4585 (u64)args->pages * 4585 (u64)args->pages *
4586 (u64)args->page_size); 4586 (u64)args->page_size);
4587 } else { 4587 } else {
4588 vaddr_start = gk20a_alloc(vma, 4588 vaddr_start = nvgpu_alloc(vma,
4589 (u64)args->pages * 4589 (u64)args->pages *
4590 (u64)args->page_size); 4590 (u64)args->page_size);
4591 } 4591 }
@@ -4621,7 +4621,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
4621 APERTURE_INVALID); 4621 APERTURE_INVALID);
4622 if (!map_offset) { 4622 if (!map_offset) {
4623 mutex_unlock(&vm->update_gmmu_lock); 4623 mutex_unlock(&vm->update_gmmu_lock);
4624 gk20a_free(vma, vaddr_start); 4624 nvgpu_free(vma, vaddr_start);
4625 kfree(va_node); 4625 kfree(va_node);
4626 goto clean_up; 4626 goto clean_up;
4627 } 4627 }
@@ -4644,7 +4644,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
4644{ 4644{
4645 int err = -ENOMEM; 4645 int err = -ENOMEM;
4646 int pgsz_idx; 4646 int pgsz_idx;
4647 struct gk20a_allocator *vma; 4647 struct nvgpu_allocator *vma;
4648 struct vm_gk20a *vm = as_share->vm; 4648 struct vm_gk20a *vm = as_share->vm;
4649 struct vm_reserved_va_node *va_node; 4649 struct vm_reserved_va_node *va_node;
4650 struct gk20a *g = gk20a_from_vm(vm); 4650 struct gk20a *g = gk20a_from_vm(vm);
@@ -4656,11 +4656,11 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
4656 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? 4656 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
4657 gmmu_page_size_big : gmmu_page_size_small; 4657 gmmu_page_size_big : gmmu_page_size_small;
4658 4658
4659 if (gk20a_alloc_initialized(&vm->fixed)) 4659 if (nvgpu_alloc_initialized(&vm->fixed))
4660 vma = &vm->fixed; 4660 vma = &vm->fixed;
4661 else 4661 else
4662 vma = &vm->vma[pgsz_idx]; 4662 vma = &vm->vma[pgsz_idx];
4663 gk20a_free(vma, args->offset); 4663 nvgpu_free(vma, args->offset);
4664 4664
4665 mutex_lock(&vm->update_gmmu_lock); 4665 mutex_lock(&vm->update_gmmu_lock);
4666 va_node = addr_to_reservation(vm, args->offset); 4666 va_node = addr_to_reservation(vm, args->offset);
@@ -4844,13 +4844,13 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
4844 4844
4845void gk20a_deinit_vm(struct vm_gk20a *vm) 4845void gk20a_deinit_vm(struct vm_gk20a *vm)
4846{ 4846{
4847 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); 4847 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]);
4848 if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big])) 4848 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big]))
4849 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); 4849 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]);
4850 if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small])) 4850 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small]))
4851 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); 4851 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4852 if (gk20a_alloc_initialized(&vm->fixed)) 4852 if (nvgpu_alloc_initialized(&vm->fixed))
4853 gk20a_alloc_destroy(&vm->fixed); 4853 nvgpu_alloc_destroy(&vm->fixed);
4854 4854
4855 gk20a_vm_free_entries(vm, &vm->pdb, 0); 4855 gk20a_vm_free_entries(vm, &vm->pdb, 0);
4856} 4856}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index d32e121a..f58b5df5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -27,7 +27,8 @@
27#include <linux/version.h> 27#include <linux/version.h>
28#include <asm/dma-iommu.h> 28#include <asm/dma-iommu.h>
29#include <asm/cacheflush.h> 29#include <asm/cacheflush.h>
30#include "gk20a_allocator.h" 30
31#include <nvgpu/allocator.h>
31 32
32#ifdef CONFIG_ARM64 33#ifdef CONFIG_ARM64
33#define outer_flush_range(a, b) 34#define outer_flush_range(a, b)
@@ -70,7 +71,7 @@ struct mem_desc {
70 u64 gpu_va; 71 u64 gpu_va;
71 bool fixed; /* vidmem only */ 72 bool fixed; /* vidmem only */
72 bool user_mem; /* vidmem only */ 73 bool user_mem; /* vidmem only */
73 struct gk20a_allocator *allocator; /* vidmem only */ 74 struct nvgpu_allocator *allocator; /* vidmem only */
74 struct list_head clear_list_entry; /* vidmem only */ 75 struct list_head clear_list_entry; /* vidmem only */
75 bool skip_wmb; 76 bool skip_wmb;
76}; 77};
@@ -295,10 +296,10 @@ struct vm_gk20a {
295 296
296 struct gk20a_mm_entry pdb; 297 struct gk20a_mm_entry pdb;
297 298
298 struct gk20a_allocator vma[gmmu_nr_page_sizes]; 299 struct nvgpu_allocator vma[gmmu_nr_page_sizes];
299 300
300 /* If necessary, split fixed from non-fixed. */ 301 /* If necessary, split fixed from non-fixed. */
301 struct gk20a_allocator fixed; 302 struct nvgpu_allocator fixed;
302 303
303 struct rb_root mapped_buffers; 304 struct rb_root mapped_buffers;
304 305
@@ -421,8 +422,8 @@ struct mm_gk20a {
421 size_t bootstrap_size; 422 size_t bootstrap_size;
422 u64 bootstrap_base; 423 u64 bootstrap_base;
423 424
424 struct gk20a_allocator allocator; 425 struct nvgpu_allocator allocator;
425 struct gk20a_allocator bootstrap_allocator; 426 struct nvgpu_allocator bootstrap_allocator;
426 427
427 u32 ce_ctx_id; 428 u32 ce_ctx_id;
428 volatile bool cleared; 429 volatile bool cleared;
@@ -470,13 +471,13 @@ static inline u64 __nv_gmmu_va_small_page_limit(void)
470 471
471static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) 472static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr)
472{ 473{
473 struct gk20a_allocator *a = &vm->vma[gmmu_page_size_big]; 474 struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big];
474 475
475 if (!vm->big_pages) 476 if (!vm->big_pages)
476 return 0; 477 return 0;
477 478
478 return addr >= gk20a_alloc_base(a) && 479 return addr >= nvgpu_alloc_base(a) &&
479 addr < gk20a_alloc_base(a) + gk20a_alloc_length(a); 480 addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a);
480} 481}
481 482
482/* 483/*
@@ -825,7 +826,7 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);
825extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; 826extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
826extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; 827extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
827 828
828static inline void *nvgpu_alloc(size_t size, bool clear) 829static inline void *nvgpu_kalloc(size_t size, bool clear)
829{ 830{
830 void *p; 831 void *p;
831 832
@@ -844,7 +845,7 @@ static inline void *nvgpu_alloc(size_t size, bool clear)
844 return p; 845 return p;
845} 846}
846 847
847static inline void nvgpu_free(void *p) 848static inline void nvgpu_kfree(void *p)
848{ 849{
849 if (virt_addr_valid(p)) 850 if (virt_addr_valid(p))
850 kfree(p); 851 kfree(p);
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
deleted file mode 100644
index 7d7f43c2..00000000
--- a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
+++ /dev/null
@@ -1,164 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef PAGE_ALLOCATOR_PRIV_H
18#define PAGE_ALLOCATOR_PRIV_H
19
20#include <linux/list.h>
21#include <linux/rbtree.h>
22
23#include "gk20a_allocator.h"
24
25struct gk20a_allocator;
26
27/*
28 * This allocator implements the ability to do SLAB style allocation since the
29 * GPU has two page sizes available - 4k and 64k/128k. When the default
30 * granularity is the large page size (64k/128k) small allocations become very
31 * space inefficient. This is most notable in PDE and PTE blocks which are 4k
32 * in size.
33 *
34 * Thus we need the ability to suballocate in 64k pages. The way we do this for
35 * the GPU is as follows. We have several buckets for sub-64K allocations:
36 *
37 * B0 - 4k
38 * B1 - 8k
39 * B3 - 16k
40 * B4 - 32k
41 * B5 - 64k (for when large pages are 128k)
42 *
43 * When an allocation comes in for less than the large page size (from now on
44 * assumed to be 64k) the allocation is satisfied by one of the buckets.
45 */
46struct page_alloc_slab {
47 struct list_head empty;
48 struct list_head partial;
49 struct list_head full;
50
51 int nr_empty;
52 int nr_partial;
53 int nr_full;
54
55 u32 slab_size;
56};
57
58enum slab_page_state {
59 SP_EMPTY,
60 SP_PARTIAL,
61 SP_FULL,
62 SP_NONE
63};
64
65struct page_alloc_slab_page {
66 unsigned long bitmap;
67 u64 page_addr;
68 u32 slab_size;
69
70 u32 nr_objects;
71 u32 nr_objects_alloced;
72
73 enum slab_page_state state;
74
75 struct page_alloc_slab *owner;
76 struct list_head list_entry;
77};
78
79struct page_alloc_chunk {
80 struct list_head list_entry;
81
82 u64 base;
83 u64 length;
84};
85
86/*
87 * Struct to handle internal management of page allocation. It holds a list
88 * of the chunks of pages that make up the overall allocation - much like a
89 * scatter gather table.
90 */
91struct gk20a_page_alloc {
92 struct list_head alloc_chunks;
93
94 int nr_chunks;
95 u64 length;
96
97 /*
98 * Only useful for the RB tree - since the alloc may have discontiguous
99 * pages the base is essentially irrelevant except for the fact that it
100 * is guarenteed to be unique.
101 */
102 u64 base;
103
104 struct rb_node tree_entry;
105
106 /*
107 * Set if this is a slab alloc. Points back to the slab page that owns
108 * this particular allocation. nr_chunks will always be 1 if this is
109 * set.
110 */
111 struct page_alloc_slab_page *slab_page;
112};
113
114struct gk20a_page_allocator {
115 struct gk20a_allocator *owner; /* Owner of this allocator. */
116
117 /*
118 * Use a buddy allocator to manage the allocation of the underlying
119 * pages. This lets us abstract the discontiguous allocation handling
120 * out of the annoyingly complicated buddy allocator.
121 */
122 struct gk20a_allocator source_allocator;
123
124 /*
125 * Page params.
126 */
127 u64 base;
128 u64 length;
129 u64 page_size;
130 u32 page_shift;
131
132 struct rb_root allocs; /* Outstanding allocations. */
133
134 struct page_alloc_slab *slabs;
135 int nr_slabs;
136
137 u64 flags;
138
139 /*
140 * Stat tracking.
141 */
142 u64 nr_allocs;
143 u64 nr_frees;
144 u64 nr_fixed_allocs;
145 u64 nr_fixed_frees;
146 u64 nr_slab_allocs;
147 u64 nr_slab_frees;
148 u64 pages_alloced;
149 u64 pages_freed;
150};
151
152static inline struct gk20a_page_allocator *page_allocator(
153 struct gk20a_allocator *a)
154{
155 return (struct gk20a_page_allocator *)(a)->priv;
156}
157
158static inline struct gk20a_allocator *palloc_owner(
159 struct gk20a_page_allocator *a)
160{
161 return a->owner;
162}
163
164#endif
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index e221be11..56ebda1a 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2896,8 +2896,8 @@ void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
2896{ 2896{
2897 gk20a_dbg_fn(""); 2897 gk20a_dbg_fn("");
2898 2898
2899 if (gk20a_alloc_initialized(&pmu->dmem)) 2899 if (nvgpu_alloc_initialized(&pmu->dmem))
2900 gk20a_alloc_destroy(&pmu->dmem); 2900 nvgpu_alloc_destroy(&pmu->dmem);
2901 2901
2902 release_firmware(pmu->fw); 2902 release_firmware(pmu->fw);
2903} 2903}
@@ -3607,7 +3607,7 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
3607 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); 3607 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
3608 3608
3609 if (!pmu->sample_buffer) 3609 if (!pmu->sample_buffer)
3610 pmu->sample_buffer = gk20a_alloc(&pmu->dmem, 3610 pmu->sample_buffer = nvgpu_alloc(&pmu->dmem,
3611 2 * sizeof(u16)); 3611 2 * sizeof(u16));
3612 if (!pmu->sample_buffer) { 3612 if (!pmu->sample_buffer) {
3613 gk20a_err(dev_from_gk20a(g), 3613 gk20a_err(dev_from_gk20a(g),
@@ -3708,7 +3708,7 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
3708 for (i = 0; i < PMU_QUEUE_COUNT; i++) 3708 for (i = 0; i < PMU_QUEUE_COUNT; i++)
3709 pmu_queue_init(pmu, i, init); 3709 pmu_queue_init(pmu, i, init);
3710 3710
3711 if (!gk20a_alloc_initialized(&pmu->dmem)) { 3711 if (!nvgpu_alloc_initialized(&pmu->dmem)) {
3712 /* Align start and end addresses */ 3712 /* Align start and end addresses */
3713 u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), 3713 u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init),
3714 PMU_DMEM_ALLOC_ALIGNMENT); 3714 PMU_DMEM_ALLOC_ALIGNMENT);
@@ -3716,9 +3716,9 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
3716 pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & 3716 pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
3717 ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); 3717 ~(PMU_DMEM_ALLOC_ALIGNMENT - 1);
3718 u32 size = end - start; 3718 u32 size = end - start;
3719 gk20a_bitmap_allocator_init(g, &pmu->dmem, "gk20a_pmu_dmem", 3719 nvgpu_bitmap_allocator_init(g, &pmu->dmem, "gk20a_pmu_dmem",
3720 start, size, 3720 start, size,
3721 PMU_DMEM_ALLOC_ALIGNMENT, 0); 3721 PMU_DMEM_ALLOC_ALIGNMENT, 0);
3722 } 3722 }
3723 3723
3724 pmu->pmu_ready = true; 3724 pmu->pmu_ready = true;
@@ -3855,12 +3855,12 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
3855 seq->callback = NULL; 3855 seq->callback = NULL;
3856 if (pv->pmu_allocation_get_dmem_size(pmu, 3856 if (pv->pmu_allocation_get_dmem_size(pmu,
3857 pv->get_pmu_seq_in_a_ptr(seq)) != 0) 3857 pv->get_pmu_seq_in_a_ptr(seq)) != 0)
3858 gk20a_free(&pmu->dmem, 3858 nvgpu_free(&pmu->dmem,
3859 pv->pmu_allocation_get_dmem_offset(pmu, 3859 pv->pmu_allocation_get_dmem_offset(pmu,
3860 pv->get_pmu_seq_in_a_ptr(seq))); 3860 pv->get_pmu_seq_in_a_ptr(seq)));
3861 if (pv->pmu_allocation_get_dmem_size(pmu, 3861 if (pv->pmu_allocation_get_dmem_size(pmu,
3862 pv->get_pmu_seq_out_a_ptr(seq)) != 0) 3862 pv->get_pmu_seq_out_a_ptr(seq)) != 0)
3863 gk20a_free(&pmu->dmem, 3863 nvgpu_free(&pmu->dmem,
3864 pv->pmu_allocation_get_dmem_offset(pmu, 3864 pv->pmu_allocation_get_dmem_offset(pmu,
3865 pv->get_pmu_seq_out_a_ptr(seq))); 3865 pv->get_pmu_seq_out_a_ptr(seq)));
3866 3866
@@ -4601,7 +4601,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
4601 (u16)max(payload->in.size, payload->out.size)); 4601 (u16)max(payload->in.size, payload->out.size));
4602 4602
4603 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = 4603 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) =
4604 gk20a_alloc(&pmu->dmem, 4604 nvgpu_alloc(&pmu->dmem,
4605 pv->pmu_allocation_get_dmem_size(pmu, in)); 4605 pv->pmu_allocation_get_dmem_size(pmu, in));
4606 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) 4606 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)))
4607 goto clean_up; 4607 goto clean_up;
@@ -4644,7 +4644,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
4644 4644
4645 if (payload->in.buf != payload->out.buf) { 4645 if (payload->in.buf != payload->out.buf) {
4646 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = 4646 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) =
4647 gk20a_alloc(&pmu->dmem, 4647 nvgpu_alloc(&pmu->dmem,
4648 pv->pmu_allocation_get_dmem_size(pmu, out)); 4648 pv->pmu_allocation_get_dmem_size(pmu, out));
4649 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, 4649 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu,
4650 out))) 4650 out)))
@@ -4694,10 +4694,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
4694clean_up: 4694clean_up:
4695 gk20a_dbg_fn("fail"); 4695 gk20a_dbg_fn("fail");
4696 if (in) 4696 if (in)
4697 gk20a_free(&pmu->dmem, 4697 nvgpu_free(&pmu->dmem,
4698 pv->pmu_allocation_get_dmem_offset(pmu, in)); 4698 pv->pmu_allocation_get_dmem_offset(pmu, in));
4699 if (out) 4699 if (out)
4700 gk20a_free(&pmu->dmem, 4700 nvgpu_free(&pmu->dmem,
4701 pv->pmu_allocation_get_dmem_offset(pmu, out)); 4701 pv->pmu_allocation_get_dmem_offset(pmu, out));
4702 4702
4703 pmu_seq_release(pmu, seq); 4703 pmu_seq_release(pmu, seq);
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index cf4f3b52..32e2ef54 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -709,7 +709,7 @@ struct pmu_gk20a {
709 struct mutex pmu_copy_lock; 709 struct mutex pmu_copy_lock;
710 struct mutex pmu_seq_lock; 710 struct mutex pmu_seq_lock;
711 711
712 struct gk20a_allocator dmem; 712 struct nvgpu_allocator dmem;
713 713
714 u32 *ucode_image; 714 u32 *ucode_image;
715 bool pmu_ready; 715 bool pmu_ready;
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
index cf724fdb..8e09fcfc 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h
@@ -18,10 +18,11 @@
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/delay.h> 19#include <linux/delay.h>
20 20
21#include <nvgpu/allocator.h>
22
21#include "gk20a.h" 23#include "gk20a.h"
22#include "mm_gk20a.h" 24#include "mm_gk20a.h"
23#include "channel_gk20a.h" 25#include "channel_gk20a.h"
24#include "gk20a_allocator.h"
25 26
26#define gpu_sema_dbg(fmt, args...) \ 27#define gpu_sema_dbg(fmt, args...) \
27 gk20a_dbg(gpu_dbg_sema, fmt, ##args) 28 gk20a_dbg(gpu_dbg_sema, fmt, ##args)