diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-12-20 16:55:48 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-01-09 15:33:16 -0500 |
commit | 6df3992b60959d32c7113cb77e131a2547174f3a (patch) | |
tree | efbdc9e6ccd2330d5c469ca0783ecb0137da8fc4 /drivers/gpu/nvgpu/gk20a | |
parent | e229514bece5a109cdbfe263f6329efe987e5939 (diff) |
gpu: nvgpu: Move allocators to common/mm/
Move the GPU allocators to common/mm/ since the allocators are common
code across all GPUs. Also rename the allocator code to move away from
gk20a_ prefixed structs and functions.
This caused one issue with the nvgpu_alloc() and nvgpu_free() functions.
There was a function for allocating either with kmalloc() or vmalloc()
depending on the size of the allocation. Those have now been renamed to
nvgpu_kalloc() and nvgpu_kfree().
Bug 1799159
Change-Id: Iddda92c013612bcb209847084ec85b8953002fa5
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1274400
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
24 files changed, 114 insertions, 4082 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 0b90090a..07601d42 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -279,17 +279,17 @@ static int gk20a_as_ioctl_get_va_regions( | |||
279 | 279 | ||
280 | for (i = 0; i < write_entries; ++i) { | 280 | for (i = 0; i < write_entries; ++i) { |
281 | struct nvgpu_as_va_region region; | 281 | struct nvgpu_as_va_region region; |
282 | struct gk20a_allocator *vma = | 282 | struct nvgpu_allocator *vma = |
283 | gk20a_alloc_initialized(&vm->fixed) ? | 283 | nvgpu_alloc_initialized(&vm->fixed) ? |
284 | &vm->fixed : &vm->vma[i]; | 284 | &vm->fixed : &vm->vma[i]; |
285 | 285 | ||
286 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | 286 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); |
287 | 287 | ||
288 | region.page_size = vm->gmmu_page_sizes[i]; | 288 | region.page_size = vm->gmmu_page_sizes[i]; |
289 | region.offset = gk20a_alloc_base(vma); | 289 | region.offset = nvgpu_alloc_base(vma); |
290 | /* No __aeabi_uldivmod() on some platforms... */ | 290 | /* No __aeabi_uldivmod() on some platforms... */ |
291 | region.pages = (gk20a_alloc_end(vma) - | 291 | region.pages = (nvgpu_alloc_end(vma) - |
292 | gk20a_alloc_base(vma)) >> ilog2(region.page_size); | 292 | nvgpu_alloc_base(vma)) >> ilog2(region.page_size); |
293 | 293 | ||
294 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) | 294 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) |
295 | return -EFAULT; | 295 | return -EFAULT; |
diff --git a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h deleted file mode 100644 index a686b704..00000000 --- a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef BITMAP_ALLOCATOR_PRIV_H | ||
18 | #define BITMAP_ALLOCATOR_PRIV_H | ||
19 | |||
20 | #include <linux/rbtree.h> | ||
21 | |||
22 | struct gk20a_allocator; | ||
23 | |||
24 | struct gk20a_bitmap_allocator { | ||
25 | struct gk20a_allocator *owner; | ||
26 | |||
27 | u64 base; /* Base address of the space. */ | ||
28 | u64 length; /* Length of the space. */ | ||
29 | u64 blk_size; /* Size that corresponds to 1 bit. */ | ||
30 | u64 blk_shift; /* Bit shift to divide by blk_size. */ | ||
31 | u64 num_bits; /* Number of allocatable bits. */ | ||
32 | u64 bit_offs; /* Offset of bitmap. */ | ||
33 | |||
34 | /* | ||
35 | * Optimization for making repeated allocations faster. Keep track of | ||
36 | * the next bit after the most recent allocation. This is where the next | ||
37 | * search will start from. This should make allocation faster in cases | ||
38 | * where lots of allocations get made one after another. It shouldn't | ||
39 | * have a negative impact on the case where the allocator is fragmented. | ||
40 | */ | ||
41 | u64 next_blk; | ||
42 | |||
43 | unsigned long *bitmap; /* The actual bitmap! */ | ||
44 | struct rb_root allocs; /* Tree of outstanding allocations. */ | ||
45 | |||
46 | u64 flags; | ||
47 | |||
48 | bool inited; | ||
49 | |||
50 | /* Statistics */ | ||
51 | u64 nr_allocs; | ||
52 | u64 nr_fixed_allocs; | ||
53 | u64 bytes_alloced; | ||
54 | u64 bytes_freed; | ||
55 | }; | ||
56 | |||
57 | struct gk20a_bitmap_alloc { | ||
58 | u64 base; | ||
59 | u64 length; | ||
60 | struct rb_node alloc_entry; /* RB tree of allocations. */ | ||
61 | }; | ||
62 | |||
63 | static inline struct gk20a_bitmap_allocator *bitmap_allocator( | ||
64 | struct gk20a_allocator *a) | ||
65 | { | ||
66 | return (struct gk20a_bitmap_allocator *)(a)->priv; | ||
67 | } | ||
68 | |||
69 | |||
70 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h deleted file mode 100644 index bb8b307b..00000000 --- a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h +++ /dev/null | |||
@@ -1,192 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef BUDDY_ALLOCATOR_PRIV_H | ||
18 | #define BUDDY_ALLOCATOR_PRIV_H | ||
19 | |||
20 | #include <linux/list.h> | ||
21 | #include <linux/rbtree.h> | ||
22 | |||
23 | struct gk20a_allocator; | ||
24 | struct vm_gk20a; | ||
25 | |||
26 | /* | ||
27 | * Each buddy is an element in a binary tree. | ||
28 | */ | ||
29 | struct gk20a_buddy { | ||
30 | struct gk20a_buddy *parent; /* Parent node. */ | ||
31 | struct gk20a_buddy *buddy; /* This node's buddy. */ | ||
32 | struct gk20a_buddy *left; /* Lower address sub-node. */ | ||
33 | struct gk20a_buddy *right; /* Higher address sub-node. */ | ||
34 | |||
35 | struct list_head buddy_entry; /* List entry for various lists. */ | ||
36 | struct rb_node alloced_entry; /* RB tree of allocations. */ | ||
37 | |||
38 | u64 start; /* Start address of this buddy. */ | ||
39 | u64 end; /* End address of this buddy. */ | ||
40 | u64 order; /* Buddy order. */ | ||
41 | |||
42 | #define BALLOC_BUDDY_ALLOCED 0x1 | ||
43 | #define BALLOC_BUDDY_SPLIT 0x2 | ||
44 | #define BALLOC_BUDDY_IN_LIST 0x4 | ||
45 | int flags; /* List of associated flags. */ | ||
46 | |||
47 | /* | ||
48 | * Size of the PDE this buddy is using. This allows for grouping like | ||
49 | * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a | ||
50 | * enum except for the BALLOC_PTE_SIZE_ANY specifier. | ||
51 | */ | ||
52 | #define BALLOC_PTE_SIZE_ANY -1 | ||
53 | int pte_size; | ||
54 | }; | ||
55 | |||
56 | #define __buddy_flag_ops(flag, flag_up) \ | ||
57 | static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \ | ||
58 | { \ | ||
59 | return b->flags & BALLOC_BUDDY_ ## flag_up; \ | ||
60 | } \ | ||
61 | static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \ | ||
62 | { \ | ||
63 | b->flags |= BALLOC_BUDDY_ ## flag_up; \ | ||
64 | } \ | ||
65 | static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \ | ||
66 | { \ | ||
67 | b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * int buddy_is_alloced(struct gk20a_buddy *b); | ||
72 | * void buddy_set_alloced(struct gk20a_buddy *b); | ||
73 | * void buddy_clr_alloced(struct gk20a_buddy *b); | ||
74 | * | ||
75 | * int buddy_is_split(struct gk20a_buddy *b); | ||
76 | * void buddy_set_split(struct gk20a_buddy *b); | ||
77 | * void buddy_clr_split(struct gk20a_buddy *b); | ||
78 | * | ||
79 | * int buddy_is_in_list(struct gk20a_buddy *b); | ||
80 | * void buddy_set_in_list(struct gk20a_buddy *b); | ||
81 | * void buddy_clr_in_list(struct gk20a_buddy *b); | ||
82 | */ | ||
83 | __buddy_flag_ops(alloced, ALLOCED); | ||
84 | __buddy_flag_ops(split, SPLIT); | ||
85 | __buddy_flag_ops(in_list, IN_LIST); | ||
86 | |||
87 | /* | ||
88 | * Keeps info for a fixed allocation. | ||
89 | */ | ||
90 | struct gk20a_fixed_alloc { | ||
91 | struct list_head buddies; /* List of buddies. */ | ||
92 | struct rb_node alloced_entry; /* RB tree of fixed allocations. */ | ||
93 | |||
94 | u64 start; /* Start of fixed block. */ | ||
95 | u64 end; /* End address. */ | ||
96 | }; | ||
97 | |||
98 | /* | ||
99 | * GPU buddy allocator for the various GPU address spaces. Each addressable unit | ||
100 | * doesn't have to correspond to a byte. In some cases each unit is a more | ||
101 | * complex object such as a comp_tag line or the like. | ||
102 | * | ||
103 | * The max order is computed based on the size of the minimum order and the size | ||
104 | * of the address space. | ||
105 | * | ||
106 | * order_size is the size of an order 0 buddy. | ||
107 | */ | ||
108 | struct gk20a_buddy_allocator { | ||
109 | struct gk20a_allocator *owner; /* Owner of this buddy allocator. */ | ||
110 | struct vm_gk20a *vm; /* Parent VM - can be NULL. */ | ||
111 | |||
112 | u64 base; /* Base address of the space. */ | ||
113 | u64 length; /* Length of the space. */ | ||
114 | u64 blk_size; /* Size of order 0 allocation. */ | ||
115 | u64 blk_shift; /* Shift to divide by blk_size. */ | ||
116 | |||
117 | /* Internal stuff. */ | ||
118 | u64 start; /* Real start (aligned to blk_size). */ | ||
119 | u64 end; /* Real end, trimmed if needed. */ | ||
120 | u64 count; /* Count of objects in space. */ | ||
121 | u64 blks; /* Count of blks in the space. */ | ||
122 | u64 max_order; /* Specific maximum order. */ | ||
123 | |||
124 | struct rb_root alloced_buddies; /* Outstanding allocations. */ | ||
125 | struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ | ||
126 | |||
127 | struct list_head co_list; | ||
128 | |||
129 | /* | ||
130 | * Impose an upper bound on the maximum order. | ||
131 | */ | ||
132 | #define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) | ||
133 | |||
134 | struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; | ||
135 | u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; | ||
136 | u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; | ||
137 | u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; | ||
138 | |||
139 | /* | ||
140 | * This is for when the allocator is managing a GVA space (the | ||
141 | * GPU_ALLOC_GVA_SPACE bit is set in @flags). This requires | ||
142 | * that we group like sized allocations into PDE blocks. | ||
143 | */ | ||
144 | u64 pte_blk_order; | ||
145 | |||
146 | int initialized; | ||
147 | int alloc_made; /* True after the first alloc. */ | ||
148 | |||
149 | u64 flags; | ||
150 | |||
151 | u64 bytes_alloced; | ||
152 | u64 bytes_alloced_real; | ||
153 | u64 bytes_freed; | ||
154 | }; | ||
155 | |||
156 | static inline struct gk20a_buddy_allocator *buddy_allocator( | ||
157 | struct gk20a_allocator *a) | ||
158 | { | ||
159 | return (struct gk20a_buddy_allocator *)(a)->priv; | ||
160 | } | ||
161 | |||
162 | static inline struct list_head *balloc_get_order_list( | ||
163 | struct gk20a_buddy_allocator *a, int order) | ||
164 | { | ||
165 | return &a->buddy_list[order]; | ||
166 | } | ||
167 | |||
168 | static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a, | ||
169 | int order) | ||
170 | { | ||
171 | return (1 << order) * a->blk_size; | ||
172 | } | ||
173 | |||
174 | static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a, | ||
175 | u64 base) | ||
176 | { | ||
177 | return base - a->start; | ||
178 | } | ||
179 | |||
180 | static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a, | ||
181 | u64 base) | ||
182 | { | ||
183 | return base + a->start; | ||
184 | } | ||
185 | |||
186 | static inline struct gk20a_allocator *balloc_owner( | ||
187 | struct gk20a_buddy_allocator *a) | ||
188 | { | ||
189 | return a->owner; | ||
190 | } | ||
191 | |||
192 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 136c28d0..be01e0e9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -976,7 +976,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | |||
976 | memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); | 976 | memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); |
977 | 977 | ||
978 | gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); | 978 | gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); |
979 | nvgpu_free(ch->gpfifo.pipe); | 979 | nvgpu_kfree(ch->gpfifo.pipe); |
980 | memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); | 980 | memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); |
981 | 981 | ||
982 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 982 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
@@ -1778,7 +1778,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1778 | } | 1778 | } |
1779 | 1779 | ||
1780 | if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | 1780 | if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { |
1781 | c->gpfifo.pipe = nvgpu_alloc( | 1781 | c->gpfifo.pipe = nvgpu_kalloc( |
1782 | gpfifo_size * sizeof(struct nvgpu_gpfifo), | 1782 | gpfifo_size * sizeof(struct nvgpu_gpfifo), |
1783 | false); | 1783 | false); |
1784 | if (!c->gpfifo.pipe) { | 1784 | if (!c->gpfifo.pipe) { |
@@ -1850,7 +1850,7 @@ clean_up_sync: | |||
1850 | c->sync = NULL; | 1850 | c->sync = NULL; |
1851 | } | 1851 | } |
1852 | clean_up_unmap: | 1852 | clean_up_unmap: |
1853 | nvgpu_free(c->gpfifo.pipe); | 1853 | nvgpu_kfree(c->gpfifo.pipe); |
1854 | gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); | 1854 | gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); |
1855 | clean_up: | 1855 | clean_up: |
1856 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); | 1856 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); |
@@ -1980,12 +1980,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, | |||
1980 | if (!g) { | 1980 | if (!g) { |
1981 | size = count * sizeof(struct nvgpu_gpfifo); | 1981 | size = count * sizeof(struct nvgpu_gpfifo); |
1982 | if (size) { | 1982 | if (size) { |
1983 | g = nvgpu_alloc(size, false); | 1983 | g = nvgpu_kalloc(size, false); |
1984 | if (!g) | 1984 | if (!g) |
1985 | return; | 1985 | return; |
1986 | 1986 | ||
1987 | if (copy_from_user(g, user_gpfifo, size)) { | 1987 | if (copy_from_user(g, user_gpfifo, size)) { |
1988 | nvgpu_free(g); | 1988 | nvgpu_kfree(g); |
1989 | return; | 1989 | return; |
1990 | } | 1990 | } |
1991 | } | 1991 | } |
@@ -1997,7 +1997,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, | |||
1997 | trace_write_pushbuffer(c, gp); | 1997 | trace_write_pushbuffer(c, gp); |
1998 | 1998 | ||
1999 | if (gpfifo_allocated) | 1999 | if (gpfifo_allocated) |
2000 | nvgpu_free(g); | 2000 | nvgpu_kfree(g); |
2001 | } | 2001 | } |
2002 | 2002 | ||
2003 | static void gk20a_channel_timeout_start(struct channel_gk20a *ch, | 2003 | static void gk20a_channel_timeout_start(struct channel_gk20a *ch, |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 0a0d94b7..697d1603 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -143,7 +143,7 @@ struct channel_gk20a { | |||
143 | struct list_head ch_entry; /* channel's entry in TSG */ | 143 | struct list_head ch_entry; /* channel's entry in TSG */ |
144 | 144 | ||
145 | struct channel_gk20a_joblist joblist; | 145 | struct channel_gk20a_joblist joblist; |
146 | struct gk20a_allocator fence_allocator; | 146 | struct nvgpu_allocator fence_allocator; |
147 | 147 | ||
148 | struct vm_gk20a *vm; | 148 | struct vm_gk20a *vm; |
149 | 149 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index e5529295..ac96036f 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -815,7 +815,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, | |||
815 | goto fail_dmabuf_put; | 815 | goto fail_dmabuf_put; |
816 | } | 816 | } |
817 | 817 | ||
818 | buffer = nvgpu_alloc(access_limit_size, true); | 818 | buffer = nvgpu_kalloc(access_limit_size, true); |
819 | if (!buffer) { | 819 | if (!buffer) { |
820 | err = -ENOMEM; | 820 | err = -ENOMEM; |
821 | goto fail_dmabuf_put; | 821 | goto fail_dmabuf_put; |
@@ -861,7 +861,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, | |||
861 | fail_idle: | 861 | fail_idle: |
862 | gk20a_idle(g->dev); | 862 | gk20a_idle(g->dev); |
863 | fail_free_buffer: | 863 | fail_free_buffer: |
864 | nvgpu_free(buffer); | 864 | nvgpu_kfree(buffer); |
865 | fail_dmabuf_put: | 865 | fail_dmabuf_put: |
866 | dma_buf_put(dmabuf); | 866 | dma_buf_put(dmabuf); |
867 | 867 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index b84db933..8fa108c2 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c | |||
@@ -477,7 +477,7 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) | |||
477 | gk20a_railgating_debugfs_init(g->dev); | 477 | gk20a_railgating_debugfs_init(g->dev); |
478 | gk20a_cde_debugfs_init(g->dev); | 478 | gk20a_cde_debugfs_init(g->dev); |
479 | gk20a_ce_debugfs_init(g->dev); | 479 | gk20a_ce_debugfs_init(g->dev); |
480 | gk20a_alloc_debugfs_init(g->dev); | 480 | nvgpu_alloc_debugfs_init(g->dev); |
481 | gk20a_mm_debugfs_init(g->dev); | 481 | gk20a_mm_debugfs_init(g->dev); |
482 | gk20a_fifo_debugfs_init(g->dev); | 482 | gk20a_fifo_debugfs_init(g->dev); |
483 | gk20a_sched_debugfs_init(g->dev); | 483 | gk20a_sched_debugfs_init(g->dev); |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 323caa8f..b8a1dcbc 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -49,8 +49,8 @@ static void gk20a_fence_free(struct kref *ref) | |||
49 | gk20a_semaphore_put(f->semaphore); | 49 | gk20a_semaphore_put(f->semaphore); |
50 | 50 | ||
51 | if (f->allocator) { | 51 | if (f->allocator) { |
52 | if (gk20a_alloc_initialized(f->allocator)) | 52 | if (nvgpu_alloc_initialized(f->allocator)) |
53 | gk20a_free(f->allocator, (size_t)f); | 53 | nvgpu_free(f->allocator, (size_t)f); |
54 | } else | 54 | } else |
55 | kfree(f); | 55 | kfree(f); |
56 | } | 56 | } |
@@ -129,7 +129,7 @@ int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count) | |||
129 | if (!fence_pool) | 129 | if (!fence_pool) |
130 | return -ENOMEM; | 130 | return -ENOMEM; |
131 | 131 | ||
132 | err = gk20a_lockless_allocator_init(c->g, &c->fence_allocator, | 132 | err = nvgpu_lockless_allocator_init(c->g, &c->fence_allocator, |
133 | "fence_pool", (size_t)fence_pool, size, | 133 | "fence_pool", (size_t)fence_pool, size, |
134 | sizeof(struct gk20a_fence), 0); | 134 | sizeof(struct gk20a_fence), 0); |
135 | if (err) | 135 | if (err) |
@@ -144,11 +144,11 @@ fail: | |||
144 | 144 | ||
145 | void gk20a_free_fence_pool(struct channel_gk20a *c) | 145 | void gk20a_free_fence_pool(struct channel_gk20a *c) |
146 | { | 146 | { |
147 | if (gk20a_alloc_initialized(&c->fence_allocator)) { | 147 | if (nvgpu_alloc_initialized(&c->fence_allocator)) { |
148 | void *base = (void *)(uintptr_t) | 148 | void *base = (void *)(uintptr_t) |
149 | gk20a_alloc_base(&c->fence_allocator); | 149 | nvgpu_alloc_base(&c->fence_allocator); |
150 | 150 | ||
151 | gk20a_alloc_destroy(&c->fence_allocator); | 151 | nvgpu_alloc_destroy(&c->fence_allocator); |
152 | vfree(base); | 152 | vfree(base); |
153 | } | 153 | } |
154 | } | 154 | } |
@@ -158,9 +158,9 @@ struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) | |||
158 | struct gk20a_fence *fence = NULL; | 158 | struct gk20a_fence *fence = NULL; |
159 | 159 | ||
160 | if (channel_gk20a_is_prealloc_enabled(c)) { | 160 | if (channel_gk20a_is_prealloc_enabled(c)) { |
161 | if (gk20a_alloc_initialized(&c->fence_allocator)) { | 161 | if (nvgpu_alloc_initialized(&c->fence_allocator)) { |
162 | fence = (struct gk20a_fence *)(uintptr_t) | 162 | fence = (struct gk20a_fence *)(uintptr_t) |
163 | gk20a_alloc(&c->fence_allocator, | 163 | nvgpu_alloc(&c->fence_allocator, |
164 | sizeof(struct gk20a_fence)); | 164 | sizeof(struct gk20a_fence)); |
165 | 165 | ||
166 | /* clear the node and reset the allocator pointer */ | 166 | /* clear the node and reset the allocator pointer */ |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index beba761a..f38fcbe7 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h | |||
@@ -47,7 +47,7 @@ struct gk20a_fence { | |||
47 | u32 syncpt_value; | 47 | u32 syncpt_value; |
48 | 48 | ||
49 | /* Valid for fences part of a pre-allocated fence pool */ | 49 | /* Valid for fences part of a pre-allocated fence pool */ |
50 | struct gk20a_allocator *allocator; | 50 | struct nvgpu_allocator *allocator; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | /* Fences can be created from semaphores or syncpoint (id, value) pairs */ | 53 | /* Fences can be created from semaphores or syncpoint (id, value) pairs */ |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index b1e90bd8..753f031a 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/sched.h> | 43 | #include <linux/sched.h> |
44 | #include <linux/version.h> | 44 | #include <linux/version.h> |
45 | 45 | ||
46 | #include <nvgpu/allocator.h> | ||
47 | |||
46 | #include "gk20a.h" | 48 | #include "gk20a.h" |
47 | #include "nvgpu_common.h" | 49 | #include "nvgpu_common.h" |
48 | #include "debug_gk20a.h" | 50 | #include "debug_gk20a.h" |
@@ -60,7 +62,6 @@ | |||
60 | #include "gk20a_scale.h" | 62 | #include "gk20a_scale.h" |
61 | #include "ctxsw_trace_gk20a.h" | 63 | #include "ctxsw_trace_gk20a.h" |
62 | #include "dbg_gpu_gk20a.h" | 64 | #include "dbg_gpu_gk20a.h" |
63 | #include "gk20a_allocator.h" | ||
64 | #include "hal.h" | 65 | #include "hal.h" |
65 | #include "vgpu/vgpu.h" | 66 | #include "vgpu/vgpu.h" |
66 | #include "pci.h" | 67 | #include "pci.h" |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c deleted file mode 100644 index 3129b07c..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ /dev/null | |||
@@ -1,211 +0,0 @@ | |||
1 | /* | ||
2 | * gk20a allocator | ||
3 | * | ||
4 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/slab.h> | ||
21 | |||
22 | #include "gk20a.h" | ||
23 | #include "mm_gk20a.h" | ||
24 | #include "platform_gk20a.h" | ||
25 | #include "gk20a_allocator.h" | ||
26 | |||
27 | u32 gk20a_alloc_tracing_on; | ||
28 | |||
29 | u64 gk20a_alloc_length(struct gk20a_allocator *a) | ||
30 | { | ||
31 | if (a->ops->length) | ||
32 | return a->ops->length(a); | ||
33 | |||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | u64 gk20a_alloc_base(struct gk20a_allocator *a) | ||
38 | { | ||
39 | if (a->ops->base) | ||
40 | return a->ops->base(a); | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | u64 gk20a_alloc_initialized(struct gk20a_allocator *a) | ||
46 | { | ||
47 | if (!a->ops || !a->ops->inited) | ||
48 | return 0; | ||
49 | |||
50 | return a->ops->inited(a); | ||
51 | } | ||
52 | |||
53 | u64 gk20a_alloc_end(struct gk20a_allocator *a) | ||
54 | { | ||
55 | if (a->ops->end) | ||
56 | return a->ops->end(a); | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | u64 gk20a_alloc_space(struct gk20a_allocator *a) | ||
62 | { | ||
63 | if (a->ops->space) | ||
64 | return a->ops->space(a); | ||
65 | |||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | u64 gk20a_alloc(struct gk20a_allocator *a, u64 len) | ||
70 | { | ||
71 | return a->ops->alloc(a, len); | ||
72 | } | ||
73 | |||
74 | void gk20a_free(struct gk20a_allocator *a, u64 addr) | ||
75 | { | ||
76 | a->ops->free(a, addr); | ||
77 | } | ||
78 | |||
79 | u64 gk20a_alloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) | ||
80 | { | ||
81 | if (a->ops->alloc_fixed) | ||
82 | return a->ops->alloc_fixed(a, base, len); | ||
83 | |||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | void gk20a_free_fixed(struct gk20a_allocator *a, u64 base, u64 len) | ||
88 | { | ||
89 | /* | ||
90 | * If this operation is not defined for the allocator then just do | ||
91 | * nothing. The alternative would be to fall back on the regular | ||
92 | * free but that may be harmful in unexpected ways. | ||
93 | */ | ||
94 | if (a->ops->free_fixed) | ||
95 | a->ops->free_fixed(a, base, len); | ||
96 | } | ||
97 | |||
98 | int gk20a_alloc_reserve_carveout(struct gk20a_allocator *a, | ||
99 | struct gk20a_alloc_carveout *co) | ||
100 | { | ||
101 | if (a->ops->reserve_carveout) | ||
102 | return a->ops->reserve_carveout(a, co); | ||
103 | |||
104 | return -ENODEV; | ||
105 | } | ||
106 | |||
107 | void gk20a_alloc_release_carveout(struct gk20a_allocator *a, | ||
108 | struct gk20a_alloc_carveout *co) | ||
109 | { | ||
110 | if (a->ops->release_carveout) | ||
111 | a->ops->release_carveout(a, co); | ||
112 | } | ||
113 | |||
114 | void gk20a_alloc_destroy(struct gk20a_allocator *a) | ||
115 | { | ||
116 | a->ops->fini(a); | ||
117 | memset(a, 0, sizeof(*a)); | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * Handle the common init stuff for a gk20a_allocator. | ||
122 | */ | ||
123 | int __gk20a_alloc_common_init(struct gk20a_allocator *a, | ||
124 | const char *name, void *priv, bool dbg, | ||
125 | const struct gk20a_allocator_ops *ops) | ||
126 | { | ||
127 | if (!ops) | ||
128 | return -EINVAL; | ||
129 | |||
130 | /* | ||
131 | * This is the bare minimum operations required for a sensible | ||
132 | * allocator. | ||
133 | */ | ||
134 | if (!ops->alloc || !ops->free || !ops->fini) | ||
135 | return -EINVAL; | ||
136 | |||
137 | a->ops = ops; | ||
138 | a->priv = priv; | ||
139 | a->debug = dbg; | ||
140 | |||
141 | mutex_init(&a->lock); | ||
142 | |||
143 | strlcpy(a->name, name, sizeof(a->name)); | ||
144 | |||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | void gk20a_alloc_print_stats(struct gk20a_allocator *__a, | ||
149 | struct seq_file *s, int lock) | ||
150 | { | ||
151 | __a->ops->print_stats(__a, s, lock); | ||
152 | } | ||
153 | |||
154 | #ifdef CONFIG_DEBUG_FS | ||
155 | static int __alloc_show(struct seq_file *s, void *unused) | ||
156 | { | ||
157 | struct gk20a_allocator *a = s->private; | ||
158 | |||
159 | gk20a_alloc_print_stats(a, s, 1); | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static int __alloc_open(struct inode *inode, struct file *file) | ||
165 | { | ||
166 | return single_open(file, __alloc_show, inode->i_private); | ||
167 | } | ||
168 | |||
169 | static const struct file_operations __alloc_fops = { | ||
170 | .open = __alloc_open, | ||
171 | .read = seq_read, | ||
172 | .llseek = seq_lseek, | ||
173 | .release = single_release, | ||
174 | }; | ||
175 | #endif | ||
176 | |||
177 | void gk20a_init_alloc_debug(struct gk20a *g, struct gk20a_allocator *a) | ||
178 | { | ||
179 | #ifdef CONFIG_DEBUG_FS | ||
180 | if (!g->debugfs_allocators) | ||
181 | return; | ||
182 | |||
183 | a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, | ||
184 | g->debugfs_allocators, | ||
185 | a, &__alloc_fops); | ||
186 | #endif | ||
187 | } | ||
188 | |||
189 | void gk20a_fini_alloc_debug(struct gk20a_allocator *a) | ||
190 | { | ||
191 | #ifdef CONFIG_DEBUG_FS | ||
192 | if (!IS_ERR_OR_NULL(a->debugfs_entry)) | ||
193 | debugfs_remove(a->debugfs_entry); | ||
194 | #endif | ||
195 | } | ||
196 | |||
197 | void gk20a_alloc_debugfs_init(struct device *dev) | ||
198 | { | ||
199 | #ifdef CONFIG_DEBUG_FS | ||
200 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
201 | struct dentry *gpu_root = platform->debugfs; | ||
202 | struct gk20a *g = get_gk20a(dev); | ||
203 | |||
204 | g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root); | ||
205 | if (IS_ERR_OR_NULL(g->debugfs_allocators)) | ||
206 | return; | ||
207 | |||
208 | debugfs_create_u32("tracing", 0664, g->debugfs_allocators, | ||
209 | &gk20a_alloc_tracing_on); | ||
210 | #endif | ||
211 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h deleted file mode 100644 index b12926b3..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ /dev/null | |||
@@ -1,302 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef GK20A_ALLOCATOR_H | ||
18 | #define GK20A_ALLOCATOR_H | ||
19 | |||
20 | #include <linux/debugfs.h> | ||
21 | #include <linux/seq_file.h> | ||
22 | #include <linux/platform_device.h> | ||
23 | |||
24 | /* #define ALLOCATOR_DEBUG */ | ||
25 | |||
26 | struct gk20a_allocator; | ||
27 | struct gk20a_alloc_carveout; | ||
28 | struct vm_gk20a; | ||
29 | struct gk20a; | ||
30 | |||
31 | /* | ||
32 | * Operations for an allocator to implement. | ||
33 | */ | ||
34 | struct gk20a_allocator_ops { | ||
35 | u64 (*alloc)(struct gk20a_allocator *allocator, u64 len); | ||
36 | void (*free)(struct gk20a_allocator *allocator, u64 addr); | ||
37 | |||
38 | /* | ||
39 | * Special interface to allocate a memory region with a specific | ||
40 | * starting address. Yikes. Note: if free() works for freeing both | ||
41 | * regular and fixed allocations then free_fixed() does not need to | ||
42 | * be implemented. This behavior exists for legacy reasons and should | ||
43 | * not be propagated to new allocators. | ||
44 | */ | ||
45 | u64 (*alloc_fixed)(struct gk20a_allocator *allocator, | ||
46 | u64 base, u64 len); | ||
47 | void (*free_fixed)(struct gk20a_allocator *allocator, | ||
48 | u64 base, u64 len); | ||
49 | |||
50 | /* | ||
51 | * Allow allocators to reserve space for carveouts. | ||
52 | */ | ||
53 | int (*reserve_carveout)(struct gk20a_allocator *allocator, | ||
54 | struct gk20a_alloc_carveout *co); | ||
55 | void (*release_carveout)(struct gk20a_allocator *allocator, | ||
56 | struct gk20a_alloc_carveout *co); | ||
57 | |||
58 | /* | ||
59 | * Returns info about the allocator. | ||
60 | */ | ||
61 | u64 (*base)(struct gk20a_allocator *allocator); | ||
62 | u64 (*length)(struct gk20a_allocator *allocator); | ||
63 | u64 (*end)(struct gk20a_allocator *allocator); | ||
64 | int (*inited)(struct gk20a_allocator *allocator); | ||
65 | u64 (*space)(struct gk20a_allocator *allocator); | ||
66 | |||
67 | /* Destructor. */ | ||
68 | void (*fini)(struct gk20a_allocator *allocator); | ||
69 | |||
70 | /* Debugging. */ | ||
71 | void (*print_stats)(struct gk20a_allocator *allocator, | ||
72 | struct seq_file *s, int lock); | ||
73 | }; | ||
74 | |||
75 | struct gk20a_allocator { | ||
76 | char name[32]; | ||
77 | struct mutex lock; | ||
78 | |||
79 | void *priv; | ||
80 | const struct gk20a_allocator_ops *ops; | ||
81 | |||
82 | struct dentry *debugfs_entry; | ||
83 | bool debug; /* Control for debug msgs. */ | ||
84 | }; | ||
85 | |||
86 | struct gk20a_alloc_carveout { | ||
87 | const char *name; | ||
88 | u64 base; | ||
89 | u64 length; | ||
90 | |||
91 | struct gk20a_allocator *allocator; | ||
92 | |||
93 | /* | ||
94 | * For usage by the allocator implementation. | ||
95 | */ | ||
96 | struct list_head co_entry; | ||
97 | }; | ||
98 | |||
99 | #define GK20A_CARVEOUT(__name, __base, __length) \ | ||
100 | { \ | ||
101 | .name = (__name), \ | ||
102 | .base = (__base), \ | ||
103 | .length = (__length) \ | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * These are the available allocator flags. | ||
108 | * | ||
109 | * GPU_ALLOC_GVA_SPACE | ||
110 | * | ||
111 | * This flag makes sense for the buddy allocator only. It specifies that the | ||
112 | * allocator will be used for managing a GVA space. When managing GVA spaces | ||
113 | * special care has to be taken to ensure that allocations of similar PTE | ||
114 | * sizes are placed in the same PDE block. This allows the higher level | ||
115 | * code to skip defining both small and large PTE tables for every PDE. That | ||
116 | * can save considerable memory for address spaces that have a lot of | ||
117 | * allocations. | ||
118 | * | ||
119 | * GPU_ALLOC_NO_ALLOC_PAGE | ||
120 | * | ||
121 | * For any allocator that needs to manage a resource in a latency critical | ||
122 | * path this flag specifies that the allocator should not use any kmalloc() | ||
123 | * or similar functions during normal operation. Initialization routines | ||
124 | * may still use kmalloc(). This prevents the possibility of long waits for | ||
125 | * pages when using alloc_page(). Currently only the bitmap allocator | ||
126 | * implements this functionality. | ||
127 | * | ||
128 | * Also note that if you accept this flag then you must also define the | ||
129 | * free_fixed() function. Since no meta-data is allocated to help free | ||
130 | * allocations you need to keep track of the meta-data yourself (in this | ||
131 | * case the base and length of the allocation as opposed to just the base | ||
132 | * of the allocation). | ||
133 | * | ||
134 | * GPU_ALLOC_4K_VIDMEM_PAGES | ||
135 | * | ||
136 | * We manage vidmem pages at a large page granularity for performance | ||
137 | * reasons; however, this can lead to wasting memory. For page allocators | ||
138 | * setting this flag will tell the allocator to manage pools of 4K pages | ||
139 | * inside internally allocated large pages. | ||
140 | * | ||
141 | * Currently this flag is ignored since the only usage of the page allocator | ||
142 | * uses a 4K block size already. However, this flag has been reserved since | ||
143 | * it will be necessary in the future. | ||
144 | * | ||
145 | * GPU_ALLOC_FORCE_CONTIG | ||
146 | * | ||
147 | * Force allocations to be contiguous. Currently only relevant for page | ||
148 | * allocators since all other allocators are naturally contiguous. | ||
149 | * | ||
150 | * GPU_ALLOC_NO_SCATTER_GATHER | ||
151 | * | ||
152 | * The page allocator normally returns a scatter gather data structure for | ||
153 | * allocations (to handle discontiguous pages). However, at times that can | ||
154 | * be annoying so this flag forces the page allocator to return a u64 | ||
155 | * pointing to the allocation base (requires GPU_ALLOC_FORCE_CONTIG to be | ||
156 | * set as well). | ||
157 | */ | ||
158 | #define GPU_ALLOC_GVA_SPACE 0x1 | ||
159 | #define GPU_ALLOC_NO_ALLOC_PAGE 0x2 | ||
160 | #define GPU_ALLOC_4K_VIDMEM_PAGES 0x4 | ||
161 | #define GPU_ALLOC_FORCE_CONTIG 0x8 | ||
162 | #define GPU_ALLOC_NO_SCATTER_GATHER 0x10 | ||
163 | |||
164 | static inline void alloc_lock(struct gk20a_allocator *a) | ||
165 | { | ||
166 | mutex_lock(&a->lock); | ||
167 | } | ||
168 | |||
169 | static inline void alloc_unlock(struct gk20a_allocator *a) | ||
170 | { | ||
171 | mutex_unlock(&a->lock); | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Buddy allocator specific initializers. | ||
176 | */ | ||
177 | int __gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a, | ||
178 | struct vm_gk20a *vm, const char *name, | ||
179 | u64 base, u64 size, u64 blk_size, | ||
180 | u64 max_order, u64 flags); | ||
181 | int gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a, | ||
182 | const char *name, u64 base, u64 size, | ||
183 | u64 blk_size, u64 flags); | ||
184 | |||
185 | /* | ||
186 | * Bitmap initializers. | ||
187 | */ | ||
188 | int gk20a_bitmap_allocator_init(struct gk20a *g, struct gk20a_allocator *a, | ||
189 | const char *name, u64 base, u64 length, | ||
190 | u64 blk_size, u64 flags); | ||
191 | |||
192 | /* | ||
193 | * Page allocator initializers. | ||
194 | */ | ||
195 | int gk20a_page_allocator_init(struct gk20a *g, struct gk20a_allocator *a, | ||
196 | const char *name, u64 base, u64 length, | ||
197 | u64 blk_size, u64 flags); | ||
198 | |||
199 | /* | ||
200 | * Lockless allocatior initializers. | ||
201 | * Note: This allocator can only allocate fixed-size structures of a | ||
202 | * pre-defined size. | ||
203 | */ | ||
204 | int gk20a_lockless_allocator_init(struct gk20a *g, struct gk20a_allocator *a, | ||
205 | const char *name, u64 base, u64 length, | ||
206 | u64 struct_size, u64 flags); | ||
207 | |||
208 | #define GPU_BALLOC_MAX_ORDER 31 | ||
209 | |||
210 | /* | ||
211 | * Allocator APIs. | ||
212 | */ | ||
213 | u64 gk20a_alloc(struct gk20a_allocator *allocator, u64 len); | ||
214 | void gk20a_free(struct gk20a_allocator *allocator, u64 addr); | ||
215 | |||
216 | u64 gk20a_alloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); | ||
217 | void gk20a_free_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); | ||
218 | |||
219 | int gk20a_alloc_reserve_carveout(struct gk20a_allocator *a, | ||
220 | struct gk20a_alloc_carveout *co); | ||
221 | void gk20a_alloc_release_carveout(struct gk20a_allocator *a, | ||
222 | struct gk20a_alloc_carveout *co); | ||
223 | |||
224 | u64 gk20a_alloc_base(struct gk20a_allocator *a); | ||
225 | u64 gk20a_alloc_length(struct gk20a_allocator *a); | ||
226 | u64 gk20a_alloc_end(struct gk20a_allocator *a); | ||
227 | u64 gk20a_alloc_initialized(struct gk20a_allocator *a); | ||
228 | u64 gk20a_alloc_space(struct gk20a_allocator *a); | ||
229 | |||
230 | void gk20a_alloc_destroy(struct gk20a_allocator *allocator); | ||
231 | |||
232 | void gk20a_alloc_print_stats(struct gk20a_allocator *a, | ||
233 | struct seq_file *s, int lock); | ||
234 | |||
235 | /* | ||
236 | * Common functionality for the internals of the allocators. | ||
237 | */ | ||
238 | void gk20a_init_alloc_debug(struct gk20a *g, struct gk20a_allocator *a); | ||
239 | void gk20a_fini_alloc_debug(struct gk20a_allocator *a); | ||
240 | |||
241 | int __gk20a_alloc_common_init(struct gk20a_allocator *a, | ||
242 | const char *name, void *priv, bool dbg, | ||
243 | const struct gk20a_allocator_ops *ops); | ||
244 | |||
245 | static inline void gk20a_alloc_enable_dbg(struct gk20a_allocator *a) | ||
246 | { | ||
247 | a->debug = true; | ||
248 | } | ||
249 | |||
250 | static inline void gk20a_alloc_disable_dbg(struct gk20a_allocator *a) | ||
251 | { | ||
252 | a->debug = false; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Debug stuff. | ||
257 | */ | ||
258 | extern u32 gk20a_alloc_tracing_on; | ||
259 | |||
260 | void gk20a_alloc_debugfs_init(struct device *dev); | ||
261 | |||
262 | #define gk20a_alloc_trace_func() \ | ||
263 | do { \ | ||
264 | if (gk20a_alloc_tracing_on) \ | ||
265 | trace_printk("%s\n", __func__); \ | ||
266 | } while (0) | ||
267 | |||
268 | #define gk20a_alloc_trace_func_done() \ | ||
269 | do { \ | ||
270 | if (gk20a_alloc_tracing_on) \ | ||
271 | trace_printk("%s_done\n", __func__); \ | ||
272 | } while (0) | ||
273 | |||
274 | #define __alloc_pstat(seq, allocator, fmt, arg...) \ | ||
275 | do { \ | ||
276 | if (s) \ | ||
277 | seq_printf(seq, fmt, ##arg); \ | ||
278 | else \ | ||
279 | alloc_dbg(allocator, fmt, ##arg); \ | ||
280 | } while (0) | ||
281 | |||
282 | #define __alloc_dbg(a, fmt, arg...) \ | ||
283 | pr_info("%-25s %25s() " fmt, (a)->name, __func__, ##arg) | ||
284 | |||
285 | #if defined(ALLOCATOR_DEBUG) | ||
286 | /* | ||
287 | * Always print the debug messages... | ||
288 | */ | ||
289 | #define alloc_dbg(a, fmt, arg...) __alloc_dbg(a, fmt, ##arg) | ||
290 | #else | ||
291 | /* | ||
292 | * Only print debug messages if debug is enabled for a given allocator. | ||
293 | */ | ||
294 | #define alloc_dbg(a, fmt, arg...) \ | ||
295 | do { \ | ||
296 | if ((a)->debug) \ | ||
297 | __alloc_dbg((a), fmt, ##arg); \ | ||
298 | } while (0) | ||
299 | |||
300 | #endif | ||
301 | |||
302 | #endif /* GK20A_ALLOCATOR_H */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c deleted file mode 100644 index f98e0782..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c +++ /dev/null | |||
@@ -1,442 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/bitops.h> | ||
20 | |||
21 | #include "gk20a_allocator.h" | ||
22 | #include "bitmap_allocator_priv.h" | ||
23 | |||
24 | static struct kmem_cache *meta_data_cache; /* slab cache for meta data. */ | ||
25 | static DEFINE_MUTEX(meta_data_cache_lock); | ||
26 | |||
27 | static u64 gk20a_bitmap_alloc_length(struct gk20a_allocator *a) | ||
28 | { | ||
29 | struct gk20a_bitmap_allocator *ba = a->priv; | ||
30 | |||
31 | return ba->length; | ||
32 | } | ||
33 | |||
34 | static u64 gk20a_bitmap_alloc_base(struct gk20a_allocator *a) | ||
35 | { | ||
36 | struct gk20a_bitmap_allocator *ba = a->priv; | ||
37 | |||
38 | return ba->base; | ||
39 | } | ||
40 | |||
41 | static int gk20a_bitmap_alloc_inited(struct gk20a_allocator *a) | ||
42 | { | ||
43 | struct gk20a_bitmap_allocator *ba = a->priv; | ||
44 | int inited = ba->inited; | ||
45 | |||
46 | rmb(); | ||
47 | return inited; | ||
48 | } | ||
49 | |||
50 | static u64 gk20a_bitmap_alloc_end(struct gk20a_allocator *a) | ||
51 | { | ||
52 | struct gk20a_bitmap_allocator *ba = a->priv; | ||
53 | |||
54 | return ba->base + ba->length; | ||
55 | } | ||
56 | |||
57 | static u64 gk20a_bitmap_alloc_fixed(struct gk20a_allocator *__a, | ||
58 | u64 base, u64 len) | ||
59 | { | ||
60 | struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); | ||
61 | u64 blks, offs, ret; | ||
62 | |||
63 | /* Compute the bit offset and make sure it's aligned to a block. */ | ||
64 | offs = base >> a->blk_shift; | ||
65 | if (offs * a->blk_size != base) | ||
66 | return 0; | ||
67 | |||
68 | offs -= a->bit_offs; | ||
69 | |||
70 | blks = len >> a->blk_shift; | ||
71 | if (blks * a->blk_size != len) | ||
72 | blks++; | ||
73 | |||
74 | alloc_lock(__a); | ||
75 | |||
76 | /* Check if the space requested is already occupied. */ | ||
77 | ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, blks, 0); | ||
78 | if (ret != offs) | ||
79 | goto fail; | ||
80 | |||
81 | bitmap_set(a->bitmap, offs, blks); | ||
82 | |||
83 | a->bytes_alloced += blks * a->blk_size; | ||
84 | a->nr_fixed_allocs++; | ||
85 | alloc_unlock(__a); | ||
86 | |||
87 | alloc_dbg(__a, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", | ||
88 | base, len, blks, blks); | ||
89 | return base; | ||
90 | |||
91 | fail: | ||
92 | alloc_unlock(__a); | ||
93 | alloc_dbg(__a, "Alloc-fixed failed! (0x%llx)\n", base); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Two possibilities for this function: either we are freeing a fixed allocation | ||
99 | * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined. | ||
100 | * | ||
101 | * Note: this function won't do much error checking. Thus you could really | ||
102 | * confuse the allocator if you misuse this function. | ||
103 | */ | ||
104 | static void gk20a_bitmap_free_fixed(struct gk20a_allocator *__a, | ||
105 | u64 base, u64 len) | ||
106 | { | ||
107 | struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); | ||
108 | u64 blks, offs; | ||
109 | |||
110 | offs = base >> a->blk_shift; | ||
111 | if (WARN_ON(offs * a->blk_size != base)) | ||
112 | return; | ||
113 | |||
114 | offs -= a->bit_offs; | ||
115 | |||
116 | blks = len >> a->blk_shift; | ||
117 | if (blks * a->blk_size != len) | ||
118 | blks++; | ||
119 | |||
120 | alloc_lock(__a); | ||
121 | bitmap_clear(a->bitmap, offs, blks); | ||
122 | a->bytes_freed += blks * a->blk_size; | ||
123 | alloc_unlock(__a); | ||
124 | |||
125 | alloc_dbg(__a, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", | ||
126 | base, len, blks, blks); | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Add the passed alloc to the tree of stored allocations. | ||
131 | */ | ||
132 | static void insert_alloc_metadata(struct gk20a_bitmap_allocator *a, | ||
133 | struct gk20a_bitmap_alloc *alloc) | ||
134 | { | ||
135 | struct rb_node **new = &a->allocs.rb_node; | ||
136 | struct rb_node *parent = NULL; | ||
137 | struct gk20a_bitmap_alloc *tmp; | ||
138 | |||
139 | while (*new) { | ||
140 | tmp = container_of(*new, struct gk20a_bitmap_alloc, | ||
141 | alloc_entry); | ||
142 | |||
143 | parent = *new; | ||
144 | if (alloc->base < tmp->base) | ||
145 | new = &((*new)->rb_left); | ||
146 | else if (alloc->base > tmp->base) | ||
147 | new = &((*new)->rb_right); | ||
148 | else { | ||
149 | WARN_ON("Duplicate entries in RB alloc tree!\n"); | ||
150 | return; | ||
151 | } | ||
152 | } | ||
153 | |||
154 | rb_link_node(&alloc->alloc_entry, parent, new); | ||
155 | rb_insert_color(&alloc->alloc_entry, &a->allocs); | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * Find and remove meta-data from the outstanding allocations. | ||
160 | */ | ||
161 | static struct gk20a_bitmap_alloc *find_alloc_metadata( | ||
162 | struct gk20a_bitmap_allocator *a, u64 addr) | ||
163 | { | ||
164 | struct rb_node *node = a->allocs.rb_node; | ||
165 | struct gk20a_bitmap_alloc *alloc; | ||
166 | |||
167 | while (node) { | ||
168 | alloc = container_of(node, struct gk20a_bitmap_alloc, | ||
169 | alloc_entry); | ||
170 | |||
171 | if (addr < alloc->base) | ||
172 | node = node->rb_left; | ||
173 | else if (addr > alloc->base) | ||
174 | node = node->rb_right; | ||
175 | else | ||
176 | break; | ||
177 | } | ||
178 | |||
179 | if (!node) | ||
180 | return NULL; | ||
181 | |||
182 | rb_erase(node, &a->allocs); | ||
183 | |||
184 | return alloc; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Tree of alloc meta data stores the address of the alloc not the bit offset. | ||
189 | */ | ||
190 | static int __gk20a_bitmap_store_alloc(struct gk20a_bitmap_allocator *a, | ||
191 | u64 addr, u64 len) | ||
192 | { | ||
193 | struct gk20a_bitmap_alloc *alloc = | ||
194 | kmem_cache_alloc(meta_data_cache, GFP_KERNEL); | ||
195 | |||
196 | if (!alloc) | ||
197 | return -ENOMEM; | ||
198 | |||
199 | alloc->base = addr; | ||
200 | alloc->length = len; | ||
201 | |||
202 | insert_alloc_metadata(a, alloc); | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * @len is in bytes. This routine will figure out the right number of bits to | ||
209 | * actually allocate. The return is the address in bytes as well. | ||
210 | */ | ||
211 | static u64 gk20a_bitmap_alloc(struct gk20a_allocator *__a, u64 len) | ||
212 | { | ||
213 | u64 blks, addr; | ||
214 | unsigned long offs, adjusted_offs, limit; | ||
215 | struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); | ||
216 | |||
217 | blks = len >> a->blk_shift; | ||
218 | |||
219 | if (blks * a->blk_size != len) | ||
220 | blks++; | ||
221 | |||
222 | alloc_lock(__a); | ||
223 | |||
224 | /* | ||
225 | * First look from next_blk and onwards... | ||
226 | */ | ||
227 | offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits, | ||
228 | a->next_blk, blks, 0); | ||
229 | if (offs >= a->num_bits) { | ||
230 | /* | ||
231 | * If that didn't work try the remaining area. Since there can | ||
232 | * be available space that spans across a->next_blk we need to | ||
233 | * search up to the first set bit after that. | ||
234 | */ | ||
235 | limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk); | ||
236 | offs = bitmap_find_next_zero_area(a->bitmap, limit, | ||
237 | 0, blks, 0); | ||
238 | if (offs >= a->next_blk) | ||
239 | goto fail; | ||
240 | } | ||
241 | |||
242 | bitmap_set(a->bitmap, offs, blks); | ||
243 | a->next_blk = offs + blks; | ||
244 | |||
245 | adjusted_offs = offs + a->bit_offs; | ||
246 | addr = ((u64)adjusted_offs) * a->blk_size; | ||
247 | |||
248 | /* | ||
249 | * Only do meta-data storage if we are allowed to allocate storage for | ||
250 | * that meta-data. The issue with using kmalloc() and friends is that | ||
251 | * in latency and success critical paths an alloc_page() call can either | ||
252 | * sleep for potentially a long time or, assuming GFP_ATOMIC, fail. | ||
253 | * Since we might not want either of these possibilities assume that the | ||
254 | * caller will keep what data it needs around to successfully free this | ||
255 | * allocation. | ||
256 | */ | ||
257 | if (!(a->flags & GPU_ALLOC_NO_ALLOC_PAGE) && | ||
258 | __gk20a_bitmap_store_alloc(a, addr, blks * a->blk_size)) | ||
259 | goto fail_reset_bitmap; | ||
260 | |||
261 | alloc_dbg(__a, "Alloc 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", | ||
262 | addr, len, blks, blks); | ||
263 | |||
264 | a->nr_allocs++; | ||
265 | a->bytes_alloced += (blks * a->blk_size); | ||
266 | alloc_unlock(__a); | ||
267 | |||
268 | return addr; | ||
269 | |||
270 | fail_reset_bitmap: | ||
271 | bitmap_clear(a->bitmap, offs, blks); | ||
272 | fail: | ||
273 | a->next_blk = 0; | ||
274 | alloc_unlock(__a); | ||
275 | alloc_dbg(__a, "Alloc failed!\n"); | ||
276 | return 0; | ||
277 | } | ||
278 | |||
279 | static void gk20a_bitmap_free(struct gk20a_allocator *__a, u64 addr) | ||
280 | { | ||
281 | struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); | ||
282 | struct gk20a_bitmap_alloc *alloc = NULL; | ||
283 | u64 offs, adjusted_offs, blks; | ||
284 | |||
285 | alloc_lock(__a); | ||
286 | |||
287 | if (a->flags & GPU_ALLOC_NO_ALLOC_PAGE) { | ||
288 | WARN(1, "Using wrong free for NO_ALLOC_PAGE bitmap allocator"); | ||
289 | goto done; | ||
290 | } | ||
291 | |||
292 | alloc = find_alloc_metadata(a, addr); | ||
293 | if (!alloc) | ||
294 | goto done; | ||
295 | |||
296 | /* | ||
297 | * Address comes from adjusted offset (i.e the bit offset with | ||
298 | * a->bit_offs added. So start with that and then work out the real | ||
299 | * offs into the bitmap. | ||
300 | */ | ||
301 | adjusted_offs = addr >> a->blk_shift; | ||
302 | offs = adjusted_offs - a->bit_offs; | ||
303 | blks = alloc->length >> a->blk_shift; | ||
304 | |||
305 | bitmap_clear(a->bitmap, offs, blks); | ||
306 | alloc_dbg(__a, "Free 0x%-10llx\n", addr); | ||
307 | |||
308 | a->bytes_freed += alloc->length; | ||
309 | |||
310 | done: | ||
311 | kfree(alloc); | ||
312 | alloc_unlock(__a); | ||
313 | } | ||
314 | |||
315 | static void gk20a_bitmap_alloc_destroy(struct gk20a_allocator *__a) | ||
316 | { | ||
317 | struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); | ||
318 | struct gk20a_bitmap_alloc *alloc; | ||
319 | struct rb_node *node; | ||
320 | |||
321 | /* | ||
322 | * Kill any outstanding allocations. | ||
323 | */ | ||
324 | while ((node = rb_first(&a->allocs)) != NULL) { | ||
325 | alloc = container_of(node, struct gk20a_bitmap_alloc, | ||
326 | alloc_entry); | ||
327 | |||
328 | rb_erase(node, &a->allocs); | ||
329 | kfree(alloc); | ||
330 | } | ||
331 | |||
332 | kfree(a->bitmap); | ||
333 | kfree(a); | ||
334 | } | ||
335 | |||
336 | static void gk20a_bitmap_print_stats(struct gk20a_allocator *__a, | ||
337 | struct seq_file *s, int lock) | ||
338 | { | ||
339 | struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); | ||
340 | |||
341 | __alloc_pstat(s, __a, "Bitmap allocator params:\n"); | ||
342 | __alloc_pstat(s, __a, " start = 0x%llx\n", a->base); | ||
343 | __alloc_pstat(s, __a, " end = 0x%llx\n", a->base + a->length); | ||
344 | __alloc_pstat(s, __a, " blks = 0x%llx\n", a->num_bits); | ||
345 | |||
346 | /* Actual stats. */ | ||
347 | __alloc_pstat(s, __a, "Stats:\n"); | ||
348 | __alloc_pstat(s, __a, " Number allocs = 0x%llx\n", a->nr_allocs); | ||
349 | __alloc_pstat(s, __a, " Number fixed = 0x%llx\n", a->nr_fixed_allocs); | ||
350 | __alloc_pstat(s, __a, " Bytes alloced = 0x%llx\n", a->bytes_alloced); | ||
351 | __alloc_pstat(s, __a, " Bytes freed = 0x%llx\n", a->bytes_freed); | ||
352 | __alloc_pstat(s, __a, " Outstanding = 0x%llx\n", | ||
353 | a->bytes_alloced - a->bytes_freed); | ||
354 | } | ||
355 | |||
356 | static const struct gk20a_allocator_ops bitmap_ops = { | ||
357 | .alloc = gk20a_bitmap_alloc, | ||
358 | .free = gk20a_bitmap_free, | ||
359 | |||
360 | .alloc_fixed = gk20a_bitmap_alloc_fixed, | ||
361 | .free_fixed = gk20a_bitmap_free_fixed, | ||
362 | |||
363 | .base = gk20a_bitmap_alloc_base, | ||
364 | .length = gk20a_bitmap_alloc_length, | ||
365 | .end = gk20a_bitmap_alloc_end, | ||
366 | .inited = gk20a_bitmap_alloc_inited, | ||
367 | |||
368 | .fini = gk20a_bitmap_alloc_destroy, | ||
369 | |||
370 | .print_stats = gk20a_bitmap_print_stats, | ||
371 | }; | ||
372 | |||
373 | |||
374 | int gk20a_bitmap_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, | ||
375 | const char *name, u64 base, u64 length, | ||
376 | u64 blk_size, u64 flags) | ||
377 | { | ||
378 | int err; | ||
379 | struct gk20a_bitmap_allocator *a; | ||
380 | |||
381 | mutex_lock(&meta_data_cache_lock); | ||
382 | if (!meta_data_cache) | ||
383 | meta_data_cache = KMEM_CACHE(gk20a_bitmap_alloc, 0); | ||
384 | mutex_unlock(&meta_data_cache_lock); | ||
385 | |||
386 | if (!meta_data_cache) | ||
387 | return -ENOMEM; | ||
388 | |||
389 | if (WARN_ON(blk_size & (blk_size - 1))) | ||
390 | return -EINVAL; | ||
391 | |||
392 | /* | ||
393 | * blk_size must be a power-of-2; base length also need to be aligned | ||
394 | * to blk_size. | ||
395 | */ | ||
396 | if (blk_size & (blk_size - 1) || | ||
397 | base & (blk_size - 1) || length & (blk_size - 1)) | ||
398 | return -EINVAL; | ||
399 | |||
400 | if (base == 0) { | ||
401 | base = blk_size; | ||
402 | length -= blk_size; | ||
403 | } | ||
404 | |||
405 | a = kzalloc(sizeof(struct gk20a_bitmap_allocator), GFP_KERNEL); | ||
406 | if (!a) | ||
407 | return -ENOMEM; | ||
408 | |||
409 | err = __gk20a_alloc_common_init(__a, name, a, false, &bitmap_ops); | ||
410 | if (err) | ||
411 | goto fail; | ||
412 | |||
413 | a->base = base; | ||
414 | a->length = length; | ||
415 | a->blk_size = blk_size; | ||
416 | a->blk_shift = __ffs(a->blk_size); | ||
417 | a->num_bits = length >> a->blk_shift; | ||
418 | a->bit_offs = a->base >> a->blk_shift; | ||
419 | a->flags = flags; | ||
420 | |||
421 | a->bitmap = kcalloc(BITS_TO_LONGS(a->num_bits), sizeof(*a->bitmap), | ||
422 | GFP_KERNEL); | ||
423 | if (!a->bitmap) | ||
424 | goto fail; | ||
425 | |||
426 | wmb(); | ||
427 | a->inited = true; | ||
428 | |||
429 | gk20a_init_alloc_debug(g, __a); | ||
430 | alloc_dbg(__a, "New allocator: type bitmap\n"); | ||
431 | alloc_dbg(__a, " base 0x%llx\n", a->base); | ||
432 | alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs); | ||
433 | alloc_dbg(__a, " size 0x%llx\n", a->length); | ||
434 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
435 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
436 | |||
437 | return 0; | ||
438 | |||
439 | fail: | ||
440 | kfree(a); | ||
441 | return err; | ||
442 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c deleted file mode 100644 index 3715e9f8..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c +++ /dev/null | |||
@@ -1,1327 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | |||
20 | #include "mm_gk20a.h" | ||
21 | #include "platform_gk20a.h" | ||
22 | #include "gk20a_allocator.h" | ||
23 | #include "buddy_allocator_priv.h" | ||
24 | |||
25 | static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ | ||
26 | |||
27 | /* Some other buddy allocator functions. */ | ||
28 | static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, | ||
29 | u64 addr); | ||
30 | static void balloc_coalesce(struct gk20a_buddy_allocator *a, | ||
31 | struct gk20a_buddy *b); | ||
32 | static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, | ||
33 | struct gk20a_fixed_alloc *falloc); | ||
34 | |||
35 | /* | ||
36 | * This function is not present in older kernel's list.h code. | ||
37 | */ | ||
38 | #ifndef list_last_entry | ||
39 | #define list_last_entry(ptr, type, member) \ | ||
40 | list_entry((ptr)->prev, type, member) | ||
41 | #endif | ||
42 | |||
43 | /* | ||
44 | * GPU buddy allocator for various address spaces. | ||
45 | * | ||
46 | * Current limitations: | ||
47 | * o A fixed allocation could potentially be made that borders PDEs with | ||
48 | * different PTE sizes. This would require that fixed buffer to have | ||
49 | * different sized PTEs for different parts of the allocation. Probably | ||
50 | * best to just require PDE alignment for fixed address allocs. | ||
51 | * | ||
52 | * o It is currently possible to make an allocator that has a buddy alignment | ||
53 | * out of sync with the PDE block size alignment. A simple example is a | ||
54 | * 32GB address space starting at byte 1. Every buddy is shifted off by 1 | ||
55 | * which means each buddy corresponf to more than one actual GPU page. The | ||
56 | * best way to fix this is probably just require PDE blocksize alignment | ||
57 | * for the start of the address space. At the moment all allocators are | ||
58 | * easily PDE aligned so this hasn't been a problem. | ||
59 | */ | ||
60 | |||
61 | /* | ||
62 | * Pick a suitable maximum order for this allocator. | ||
63 | * | ||
64 | * Hueristic: Just guessing that the best max order is the largest single | ||
65 | * block that will fit in the address space. | ||
66 | */ | ||
67 | static void balloc_compute_max_order(struct gk20a_buddy_allocator *a) | ||
68 | { | ||
69 | u64 true_max_order = ilog2(a->blks); | ||
70 | |||
71 | if (a->max_order == 0) { | ||
72 | a->max_order = true_max_order; | ||
73 | return; | ||
74 | } | ||
75 | |||
76 | if (a->max_order > true_max_order) | ||
77 | a->max_order = true_max_order; | ||
78 | if (a->max_order > GPU_BALLOC_MAX_ORDER) | ||
79 | a->max_order = GPU_BALLOC_MAX_ORDER; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Since we can only allocate in chucks of a->blk_size we need to trim off | ||
84 | * any excess data that is not aligned to a->blk_size. | ||
85 | */ | ||
86 | static void balloc_allocator_align(struct gk20a_buddy_allocator *a) | ||
87 | { | ||
88 | a->start = ALIGN(a->base, a->blk_size); | ||
89 | WARN_ON(a->start != a->base); | ||
90 | a->end = (a->base + a->length) & ~(a->blk_size - 1); | ||
91 | a->count = a->end - a->start; | ||
92 | a->blks = a->count >> a->blk_shift; | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Pass NULL for parent if you want a top level buddy. | ||
97 | */ | ||
98 | static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a, | ||
99 | struct gk20a_buddy *parent, | ||
100 | u64 start, u64 order) | ||
101 | { | ||
102 | struct gk20a_buddy *new_buddy; | ||
103 | |||
104 | new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); | ||
105 | if (!new_buddy) | ||
106 | return NULL; | ||
107 | |||
108 | memset(new_buddy, 0, sizeof(struct gk20a_buddy)); | ||
109 | |||
110 | new_buddy->parent = parent; | ||
111 | new_buddy->start = start; | ||
112 | new_buddy->order = order; | ||
113 | new_buddy->end = start + (1 << order) * a->blk_size; | ||
114 | new_buddy->pte_size = BALLOC_PTE_SIZE_ANY; | ||
115 | |||
116 | return new_buddy; | ||
117 | } | ||
118 | |||
119 | static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a, | ||
120 | struct gk20a_buddy *b, | ||
121 | struct list_head *list) | ||
122 | { | ||
123 | if (buddy_is_in_list(b)) { | ||
124 | alloc_dbg(balloc_owner(a), | ||
125 | "Oops: adding added buddy (%llu:0x%llx)\n", | ||
126 | b->order, b->start); | ||
127 | BUG(); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Add big PTE blocks to the tail, small to the head for GVA spaces. | ||
132 | * This lets the code that checks if there are available blocks check | ||
133 | * without cycling through the entire list. | ||
134 | */ | ||
135 | if (a->flags & GPU_ALLOC_GVA_SPACE && | ||
136 | b->pte_size == gmmu_page_size_big) | ||
137 | list_add_tail(&b->buddy_entry, list); | ||
138 | else | ||
139 | list_add(&b->buddy_entry, list); | ||
140 | |||
141 | buddy_set_in_list(b); | ||
142 | } | ||
143 | |||
144 | static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a, | ||
145 | struct gk20a_buddy *b) | ||
146 | { | ||
147 | if (!buddy_is_in_list(b)) { | ||
148 | alloc_dbg(balloc_owner(a), | ||
149 | "Oops: removing removed buddy (%llu:0x%llx)\n", | ||
150 | b->order, b->start); | ||
151 | BUG(); | ||
152 | } | ||
153 | |||
154 | list_del_init(&b->buddy_entry); | ||
155 | buddy_clr_in_list(b); | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * Add a buddy to one of the buddy lists and deal with the necessary | ||
160 | * book keeping. Adds the buddy to the list specified by the buddy's order. | ||
161 | */ | ||
162 | static void balloc_blist_add(struct gk20a_buddy_allocator *a, | ||
163 | struct gk20a_buddy *b) | ||
164 | { | ||
165 | __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); | ||
166 | a->buddy_list_len[b->order]++; | ||
167 | } | ||
168 | |||
169 | static void balloc_blist_rem(struct gk20a_buddy_allocator *a, | ||
170 | struct gk20a_buddy *b) | ||
171 | { | ||
172 | __balloc_buddy_list_rem(a, b); | ||
173 | a->buddy_list_len[b->order]--; | ||
174 | } | ||
175 | |||
176 | static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len) | ||
177 | { | ||
178 | if (len == 0) | ||
179 | return 0; | ||
180 | |||
181 | len--; | ||
182 | len >>= a->blk_shift; | ||
183 | |||
184 | return fls(len); | ||
185 | } | ||
186 | |||
187 | static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a, | ||
188 | u64 start, u64 end) | ||
189 | { | ||
190 | u64 size = (end - start) >> a->blk_shift; | ||
191 | |||
192 | if (size > 0) | ||
193 | return min_t(u64, ilog2(size), a->max_order); | ||
194 | else | ||
195 | return GPU_BALLOC_MAX_ORDER; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * Initialize the buddy lists. | ||
200 | */ | ||
201 | static int balloc_init_lists(struct gk20a_buddy_allocator *a) | ||
202 | { | ||
203 | int i; | ||
204 | u64 bstart, bend, order; | ||
205 | struct gk20a_buddy *buddy; | ||
206 | |||
207 | bstart = a->start; | ||
208 | bend = a->end; | ||
209 | |||
210 | /* First make sure the LLs are valid. */ | ||
211 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) | ||
212 | INIT_LIST_HEAD(balloc_get_order_list(a, i)); | ||
213 | |||
214 | while (bstart < bend) { | ||
215 | order = __balloc_max_order_in(a, bstart, bend); | ||
216 | |||
217 | buddy = balloc_new_buddy(a, NULL, bstart, order); | ||
218 | if (!buddy) | ||
219 | goto cleanup; | ||
220 | |||
221 | balloc_blist_add(a, buddy); | ||
222 | bstart += balloc_order_to_len(a, order); | ||
223 | } | ||
224 | |||
225 | return 0; | ||
226 | |||
227 | cleanup: | ||
228 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
229 | if (!list_empty(balloc_get_order_list(a, i))) { | ||
230 | buddy = list_first_entry(balloc_get_order_list(a, i), | ||
231 | struct gk20a_buddy, buddy_entry); | ||
232 | balloc_blist_rem(a, buddy); | ||
233 | kmem_cache_free(buddy_cache, buddy); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | return -ENOMEM; | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * Clean up and destroy the passed allocator. | ||
242 | */ | ||
243 | static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a) | ||
244 | { | ||
245 | int i; | ||
246 | struct rb_node *node; | ||
247 | struct gk20a_buddy *bud; | ||
248 | struct gk20a_fixed_alloc *falloc; | ||
249 | struct gk20a_buddy_allocator *a = __a->priv; | ||
250 | |||
251 | alloc_lock(__a); | ||
252 | |||
253 | gk20a_fini_alloc_debug(__a); | ||
254 | |||
255 | /* | ||
256 | * Free the fixed allocs first. | ||
257 | */ | ||
258 | while ((node = rb_first(&a->fixed_allocs)) != NULL) { | ||
259 | falloc = container_of(node, | ||
260 | struct gk20a_fixed_alloc, alloced_entry); | ||
261 | |||
262 | rb_erase(node, &a->fixed_allocs); | ||
263 | __balloc_do_free_fixed(a, falloc); | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * And now free all outstanding allocations. | ||
268 | */ | ||
269 | while ((node = rb_first(&a->alloced_buddies)) != NULL) { | ||
270 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
271 | balloc_free_buddy(a, bud->start); | ||
272 | balloc_blist_add(a, bud); | ||
273 | balloc_coalesce(a, bud); | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Now clean up the unallocated buddies. | ||
278 | */ | ||
279 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
280 | BUG_ON(a->buddy_list_alloced[i] != 0); | ||
281 | |||
282 | while (!list_empty(balloc_get_order_list(a, i))) { | ||
283 | bud = list_first_entry(balloc_get_order_list(a, i), | ||
284 | struct gk20a_buddy, buddy_entry); | ||
285 | balloc_blist_rem(a, bud); | ||
286 | kmem_cache_free(buddy_cache, bud); | ||
287 | } | ||
288 | |||
289 | if (a->buddy_list_len[i] != 0) { | ||
290 | pr_info("Excess buddies!!! (%d: %llu)\n", | ||
291 | i, a->buddy_list_len[i]); | ||
292 | BUG(); | ||
293 | } | ||
294 | if (a->buddy_list_split[i] != 0) { | ||
295 | pr_info("Excess split nodes!!! (%d: %llu)\n", | ||
296 | i, a->buddy_list_split[i]); | ||
297 | BUG(); | ||
298 | } | ||
299 | if (a->buddy_list_alloced[i] != 0) { | ||
300 | pr_info("Excess alloced nodes!!! (%d: %llu)\n", | ||
301 | i, a->buddy_list_alloced[i]); | ||
302 | BUG(); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | kfree(a); | ||
307 | |||
308 | alloc_unlock(__a); | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Combine the passed buddy if possible. The pointer in @b may not be valid | ||
313 | * after this as the buddy may be freed. | ||
314 | * | ||
315 | * @a must be locked. | ||
316 | */ | ||
317 | static void balloc_coalesce(struct gk20a_buddy_allocator *a, | ||
318 | struct gk20a_buddy *b) | ||
319 | { | ||
320 | struct gk20a_buddy *parent; | ||
321 | |||
322 | if (buddy_is_alloced(b) || buddy_is_split(b)) | ||
323 | return; | ||
324 | |||
325 | /* | ||
326 | * If both our buddy and I are both not allocated and not split then | ||
327 | * we can coalesce ourselves. | ||
328 | */ | ||
329 | if (!b->buddy) | ||
330 | return; | ||
331 | if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) | ||
332 | return; | ||
333 | |||
334 | parent = b->parent; | ||
335 | |||
336 | balloc_blist_rem(a, b); | ||
337 | balloc_blist_rem(a, b->buddy); | ||
338 | |||
339 | buddy_clr_split(parent); | ||
340 | a->buddy_list_split[parent->order]--; | ||
341 | balloc_blist_add(a, parent); | ||
342 | |||
343 | /* | ||
344 | * Recursively coalesce as far as we can go. | ||
345 | */ | ||
346 | balloc_coalesce(a, parent); | ||
347 | |||
348 | /* Clean up the remains. */ | ||
349 | kmem_cache_free(buddy_cache, b->buddy); | ||
350 | kmem_cache_free(buddy_cache, b); | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. | ||
355 | * | ||
356 | * @a must be locked. | ||
357 | */ | ||
358 | static int balloc_split_buddy(struct gk20a_buddy_allocator *a, | ||
359 | struct gk20a_buddy *b, int pte_size) | ||
360 | { | ||
361 | struct gk20a_buddy *left, *right; | ||
362 | u64 half; | ||
363 | |||
364 | left = balloc_new_buddy(a, b, b->start, b->order - 1); | ||
365 | if (!left) | ||
366 | return -ENOMEM; | ||
367 | |||
368 | half = (b->end - b->start) / 2; | ||
369 | |||
370 | right = balloc_new_buddy(a, b, b->start + half, b->order - 1); | ||
371 | if (!right) { | ||
372 | kmem_cache_free(buddy_cache, left); | ||
373 | return -ENOMEM; | ||
374 | } | ||
375 | |||
376 | buddy_set_split(b); | ||
377 | a->buddy_list_split[b->order]++; | ||
378 | |||
379 | b->left = left; | ||
380 | b->right = right; | ||
381 | left->buddy = right; | ||
382 | right->buddy = left; | ||
383 | left->parent = b; | ||
384 | right->parent = b; | ||
385 | |||
386 | /* PTE considerations. */ | ||
387 | if (a->flags & GPU_ALLOC_GVA_SPACE && | ||
388 | left->order <= a->pte_blk_order) { | ||
389 | left->pte_size = pte_size; | ||
390 | right->pte_size = pte_size; | ||
391 | } | ||
392 | |||
393 | balloc_blist_rem(a, b); | ||
394 | balloc_blist_add(a, left); | ||
395 | balloc_blist_add(a, right); | ||
396 | |||
397 | return 0; | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * Place the passed buddy into the RB tree for allocated buddies. Never fails | ||
402 | * unless the passed entry is a duplicate which is a bug. | ||
403 | * | ||
404 | * @a must be locked. | ||
405 | */ | ||
406 | static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a, | ||
407 | struct gk20a_buddy *b) | ||
408 | { | ||
409 | struct rb_node **new = &(a->alloced_buddies.rb_node); | ||
410 | struct rb_node *parent = NULL; | ||
411 | |||
412 | while (*new) { | ||
413 | struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, | ||
414 | alloced_entry); | ||
415 | |||
416 | parent = *new; | ||
417 | if (b->start < bud->start) | ||
418 | new = &((*new)->rb_left); | ||
419 | else if (b->start > bud->start) | ||
420 | new = &((*new)->rb_right); | ||
421 | else | ||
422 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
423 | } | ||
424 | |||
425 | rb_link_node(&b->alloced_entry, parent, new); | ||
426 | rb_insert_color(&b->alloced_entry, &a->alloced_buddies); | ||
427 | |||
428 | buddy_set_alloced(b); | ||
429 | a->buddy_list_alloced[b->order]++; | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
434 | * deallocated buddy for further processing. | ||
435 | * | ||
436 | * @a must be locked. | ||
437 | */ | ||
438 | static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, | ||
439 | u64 addr) | ||
440 | { | ||
441 | struct rb_node *node = a->alloced_buddies.rb_node; | ||
442 | struct gk20a_buddy *bud; | ||
443 | |||
444 | while (node) { | ||
445 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
446 | |||
447 | if (addr < bud->start) | ||
448 | node = node->rb_left; | ||
449 | else if (addr > bud->start) | ||
450 | node = node->rb_right; | ||
451 | else | ||
452 | break; | ||
453 | } | ||
454 | |||
455 | if (!node) | ||
456 | return NULL; | ||
457 | |||
458 | rb_erase(node, &a->alloced_buddies); | ||
459 | buddy_clr_alloced(bud); | ||
460 | a->buddy_list_alloced[bud->order]--; | ||
461 | |||
462 | return bud; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Find a suitable buddy for the given order and PTE type (big or little). | ||
467 | */ | ||
468 | static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a, | ||
469 | u64 order, int pte_size) | ||
470 | { | ||
471 | struct gk20a_buddy *bud; | ||
472 | |||
473 | if (order > a->max_order || | ||
474 | list_empty(balloc_get_order_list(a, order))) | ||
475 | return NULL; | ||
476 | |||
477 | if (a->flags & GPU_ALLOC_GVA_SPACE && | ||
478 | pte_size == gmmu_page_size_big) | ||
479 | bud = list_last_entry(balloc_get_order_list(a, order), | ||
480 | struct gk20a_buddy, buddy_entry); | ||
481 | else | ||
482 | bud = list_first_entry(balloc_get_order_list(a, order), | ||
483 | struct gk20a_buddy, buddy_entry); | ||
484 | |||
485 | if (bud->pte_size != BALLOC_PTE_SIZE_ANY && | ||
486 | bud->pte_size != pte_size) | ||
487 | return NULL; | ||
488 | |||
489 | return bud; | ||
490 | } | ||
491 | |||
492 | /* | ||
493 | * Allocate a suitably sized buddy. If no suitable buddy exists split higher | ||
494 | * order buddies until we have a suitable buddy to allocate. | ||
495 | * | ||
496 | * For PDE grouping add an extra check to see if a buddy is suitable: that the | ||
497 | * buddy exists in a PDE who's PTE size is reasonable | ||
498 | * | ||
499 | * @a must be locked. | ||
500 | */ | ||
501 | static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a, | ||
502 | u64 order, int pte_size) | ||
503 | { | ||
504 | u64 split_order; | ||
505 | struct gk20a_buddy *bud = NULL; | ||
506 | |||
507 | split_order = order; | ||
508 | while (split_order <= a->max_order && | ||
509 | !(bud = __balloc_find_buddy(a, split_order, pte_size))) | ||
510 | split_order++; | ||
511 | |||
512 | /* Out of memory! */ | ||
513 | if (!bud) | ||
514 | return 0; | ||
515 | |||
516 | while (bud->order != order) { | ||
517 | if (balloc_split_buddy(a, bud, pte_size)) | ||
518 | return 0; /* No mem... */ | ||
519 | bud = bud->left; | ||
520 | } | ||
521 | |||
522 | balloc_blist_rem(a, bud); | ||
523 | balloc_alloc_buddy(a, bud); | ||
524 | |||
525 | return bud->start; | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * See if the passed range is actually available for allocation. If so, then | ||
530 | * return 1, otherwise return 0. | ||
531 | * | ||
532 | * TODO: Right now this uses the unoptimal approach of going through all | ||
533 | * outstanding allocations and checking their base/ends. This could be better. | ||
534 | */ | ||
535 | static int balloc_is_range_free(struct gk20a_buddy_allocator *a, | ||
536 | u64 base, u64 end) | ||
537 | { | ||
538 | struct rb_node *node; | ||
539 | struct gk20a_buddy *bud; | ||
540 | |||
541 | node = rb_first(&a->alloced_buddies); | ||
542 | if (!node) | ||
543 | return 1; /* No allocs yet. */ | ||
544 | |||
545 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
546 | |||
547 | while (bud->start < end) { | ||
548 | if ((bud->start > base && bud->start < end) || | ||
549 | (bud->end > base && bud->end < end)) | ||
550 | return 0; | ||
551 | |||
552 | node = rb_next(node); | ||
553 | if (!node) | ||
554 | break; | ||
555 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
556 | } | ||
557 | |||
558 | return 1; | ||
559 | } | ||
560 | |||
561 | static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a, | ||
562 | struct gk20a_fixed_alloc *f) | ||
563 | { | ||
564 | struct rb_node **new = &(a->fixed_allocs.rb_node); | ||
565 | struct rb_node *parent = NULL; | ||
566 | |||
567 | while (*new) { | ||
568 | struct gk20a_fixed_alloc *falloc = | ||
569 | container_of(*new, struct gk20a_fixed_alloc, | ||
570 | alloced_entry); | ||
571 | |||
572 | BUG_ON(!virt_addr_valid(falloc)); | ||
573 | |||
574 | parent = *new; | ||
575 | if (f->start < falloc->start) | ||
576 | new = &((*new)->rb_left); | ||
577 | else if (f->start > falloc->start) | ||
578 | new = &((*new)->rb_right); | ||
579 | else | ||
580 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
581 | } | ||
582 | |||
583 | rb_link_node(&f->alloced_entry, parent, new); | ||
584 | rb_insert_color(&f->alloced_entry, &a->fixed_allocs); | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
589 | * deallocated buddy for further processing. | ||
590 | * | ||
591 | * @a must be locked. | ||
592 | */ | ||
593 | static struct gk20a_fixed_alloc *balloc_free_fixed( | ||
594 | struct gk20a_buddy_allocator *a, u64 addr) | ||
595 | { | ||
596 | struct rb_node *node = a->fixed_allocs.rb_node; | ||
597 | struct gk20a_fixed_alloc *falloc; | ||
598 | |||
599 | while (node) { | ||
600 | falloc = container_of(node, | ||
601 | struct gk20a_fixed_alloc, alloced_entry); | ||
602 | |||
603 | if (addr < falloc->start) | ||
604 | node = node->rb_left; | ||
605 | else if (addr > falloc->start) | ||
606 | node = node->rb_right; | ||
607 | else | ||
608 | break; | ||
609 | } | ||
610 | |||
611 | if (!node) | ||
612 | return NULL; | ||
613 | |||
614 | rb_erase(node, &a->fixed_allocs); | ||
615 | |||
616 | return falloc; | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | * Find the parent range - doesn't necessarily need the parent to actually exist | ||
621 | * as a buddy. Finding an existing parent comes later... | ||
622 | */ | ||
623 | static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a, | ||
624 | u64 base, u64 order, | ||
625 | u64 *pbase, u64 *porder) | ||
626 | { | ||
627 | u64 base_mask; | ||
628 | u64 shifted_base = balloc_base_shift(a, base); | ||
629 | |||
630 | order++; | ||
631 | base_mask = ~((a->blk_size << order) - 1); | ||
632 | |||
633 | shifted_base &= base_mask; | ||
634 | |||
635 | *pbase = balloc_base_unshift(a, shifted_base); | ||
636 | *porder = order; | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * Makes a buddy at the passed address. This will make all parent buddies | ||
641 | * necessary for this buddy to exist as well. | ||
642 | */ | ||
643 | static struct gk20a_buddy *__balloc_make_fixed_buddy( | ||
644 | struct gk20a_buddy_allocator *a, u64 base, u64 order) | ||
645 | { | ||
646 | struct gk20a_buddy *bud = NULL; | ||
647 | struct list_head *order_list; | ||
648 | u64 cur_order = order, cur_base = base; | ||
649 | |||
650 | /* | ||
651 | * Algo: | ||
652 | * 1. Keep jumping up a buddy order until we find the real buddy that | ||
653 | * this buddy exists in. | ||
654 | * 2. Then work our way down through the buddy tree until we hit a dead | ||
655 | * end. | ||
656 | * 3. Start splitting buddies until we split to the one we need to | ||
657 | * make. | ||
658 | */ | ||
659 | while (cur_order <= a->max_order) { | ||
660 | int found = 0; | ||
661 | |||
662 | order_list = balloc_get_order_list(a, cur_order); | ||
663 | list_for_each_entry(bud, order_list, buddy_entry) { | ||
664 | if (bud->start == cur_base) { | ||
665 | found = 1; | ||
666 | break; | ||
667 | } | ||
668 | } | ||
669 | |||
670 | if (found) | ||
671 | break; | ||
672 | |||
673 | __balloc_get_parent_range(a, cur_base, cur_order, | ||
674 | &cur_base, &cur_order); | ||
675 | } | ||
676 | |||
677 | if (cur_order > a->max_order) { | ||
678 | alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); | ||
679 | return NULL; | ||
680 | } | ||
681 | |||
682 | /* Split this buddy as necessary until we get the target buddy. */ | ||
683 | while (bud->start != base || bud->order != order) { | ||
684 | if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { | ||
685 | balloc_coalesce(a, bud); | ||
686 | return NULL; | ||
687 | } | ||
688 | |||
689 | if (base < bud->right->start) | ||
690 | bud = bud->left; | ||
691 | else | ||
692 | bud = bud->right; | ||
693 | |||
694 | } | ||
695 | |||
696 | return bud; | ||
697 | } | ||
698 | |||
699 | static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a, | ||
700 | struct gk20a_fixed_alloc *falloc, | ||
701 | u64 base, u64 len) | ||
702 | { | ||
703 | u64 shifted_base, inc_base; | ||
704 | u64 align_order; | ||
705 | |||
706 | shifted_base = balloc_base_shift(a, base); | ||
707 | if (shifted_base == 0) | ||
708 | align_order = __fls(len >> a->blk_shift); | ||
709 | else | ||
710 | align_order = min_t(u64, | ||
711 | __ffs(shifted_base >> a->blk_shift), | ||
712 | __fls(len >> a->blk_shift)); | ||
713 | |||
714 | if (align_order > a->max_order) { | ||
715 | alloc_dbg(balloc_owner(a), | ||
716 | "Align order too big: %llu > %llu\n", | ||
717 | align_order, a->max_order); | ||
718 | return 0; | ||
719 | } | ||
720 | |||
721 | /* | ||
722 | * Generate a list of buddies that satisfy this allocation. | ||
723 | */ | ||
724 | inc_base = shifted_base; | ||
725 | while (inc_base < (shifted_base + len)) { | ||
726 | u64 order_len = balloc_order_to_len(a, align_order); | ||
727 | u64 remaining; | ||
728 | struct gk20a_buddy *bud; | ||
729 | |||
730 | bud = __balloc_make_fixed_buddy(a, | ||
731 | balloc_base_unshift(a, inc_base), | ||
732 | align_order); | ||
733 | if (!bud) { | ||
734 | alloc_dbg(balloc_owner(a), | ||
735 | "Fixed buddy failed: {0x%llx, %llu}!\n", | ||
736 | balloc_base_unshift(a, inc_base), | ||
737 | align_order); | ||
738 | goto err_and_cleanup; | ||
739 | } | ||
740 | |||
741 | balloc_blist_rem(a, bud); | ||
742 | balloc_alloc_buddy(a, bud); | ||
743 | __balloc_buddy_list_add(a, bud, &falloc->buddies); | ||
744 | |||
745 | /* Book keeping. */ | ||
746 | inc_base += order_len; | ||
747 | remaining = (shifted_base + len) - inc_base; | ||
748 | align_order = __ffs(inc_base >> a->blk_shift); | ||
749 | |||
750 | /* If we don't have much left - trim down align_order. */ | ||
751 | if (balloc_order_to_len(a, align_order) > remaining) | ||
752 | align_order = __balloc_max_order_in(a, inc_base, | ||
753 | inc_base + remaining); | ||
754 | } | ||
755 | |||
756 | return base; | ||
757 | |||
758 | err_and_cleanup: | ||
759 | while (!list_empty(&falloc->buddies)) { | ||
760 | struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, | ||
761 | struct gk20a_buddy, | ||
762 | buddy_entry); | ||
763 | |||
764 | __balloc_buddy_list_rem(a, bud); | ||
765 | balloc_free_buddy(a, bud->start); | ||
766 | kmem_cache_free(buddy_cache, bud); | ||
767 | } | ||
768 | |||
769 | return 0; | ||
770 | } | ||
771 | |||
772 | static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, | ||
773 | struct gk20a_fixed_alloc *falloc) | ||
774 | { | ||
775 | struct gk20a_buddy *bud; | ||
776 | |||
777 | while (!list_empty(&falloc->buddies)) { | ||
778 | bud = list_first_entry(&falloc->buddies, | ||
779 | struct gk20a_buddy, | ||
780 | buddy_entry); | ||
781 | __balloc_buddy_list_rem(a, bud); | ||
782 | |||
783 | balloc_free_buddy(a, bud->start); | ||
784 | balloc_blist_add(a, bud); | ||
785 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
786 | |||
787 | /* | ||
788 | * Attemp to defrag the allocation. | ||
789 | */ | ||
790 | balloc_coalesce(a, bud); | ||
791 | } | ||
792 | |||
793 | kfree(falloc); | ||
794 | } | ||
795 | |||
796 | /* | ||
797 | * Allocate memory from the passed allocator. | ||
798 | */ | ||
799 | static u64 gk20a_buddy_balloc(struct gk20a_allocator *__a, u64 len) | ||
800 | { | ||
801 | u64 order, addr; | ||
802 | int pte_size; | ||
803 | struct gk20a_buddy_allocator *a = __a->priv; | ||
804 | |||
805 | gk20a_alloc_trace_func(); | ||
806 | |||
807 | alloc_lock(__a); | ||
808 | |||
809 | order = balloc_get_order(a, len); | ||
810 | |||
811 | if (order > a->max_order) { | ||
812 | alloc_unlock(__a); | ||
813 | alloc_dbg(balloc_owner(a), "Alloc fail\n"); | ||
814 | gk20a_alloc_trace_func_done(); | ||
815 | return 0; | ||
816 | } | ||
817 | |||
818 | /* | ||
819 | * For now pass the base address of the allocator's region to | ||
820 | * __get_pte_size(). This ensures we get the right page size for | ||
821 | * the alloc but we don't have to know what the real address is | ||
822 | * going to be quite yet. | ||
823 | * | ||
824 | * TODO: once userspace supports a unified address space pass 0 for | ||
825 | * the base. This will make only 'len' affect the PTE size. | ||
826 | */ | ||
827 | if (a->flags & GPU_ALLOC_GVA_SPACE) | ||
828 | pte_size = __get_pte_size(a->vm, a->base, len); | ||
829 | else | ||
830 | pte_size = BALLOC_PTE_SIZE_ANY; | ||
831 | |||
832 | addr = __balloc_do_alloc(a, order, pte_size); | ||
833 | |||
834 | if (addr) { | ||
835 | a->bytes_alloced += len; | ||
836 | a->bytes_alloced_real += balloc_order_to_len(a, order); | ||
837 | alloc_dbg(balloc_owner(a), | ||
838 | "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", | ||
839 | addr, order, len, | ||
840 | pte_size == gmmu_page_size_big ? "big" : | ||
841 | pte_size == gmmu_page_size_small ? "small" : | ||
842 | "NA/any"); | ||
843 | } else { | ||
844 | alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); | ||
845 | } | ||
846 | |||
847 | a->alloc_made = 1; | ||
848 | |||
849 | alloc_unlock(__a); | ||
850 | |||
851 | gk20a_alloc_trace_func_done(); | ||
852 | return addr; | ||
853 | } | ||
854 | |||
855 | /* | ||
856 | * Requires @__a to be locked. | ||
857 | */ | ||
858 | static u64 __gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a, | ||
859 | u64 base, u64 len) | ||
860 | { | ||
861 | u64 ret, real_bytes = 0; | ||
862 | struct gk20a_buddy *bud; | ||
863 | struct gk20a_fixed_alloc *falloc = NULL; | ||
864 | struct gk20a_buddy_allocator *a = __a->priv; | ||
865 | |||
866 | gk20a_alloc_trace_func(); | ||
867 | |||
868 | /* If base isn't aligned to an order 0 block, fail. */ | ||
869 | if (base & (a->blk_size - 1)) | ||
870 | goto fail; | ||
871 | |||
872 | if (len == 0) | ||
873 | goto fail; | ||
874 | |||
875 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); | ||
876 | if (!falloc) | ||
877 | goto fail; | ||
878 | |||
879 | INIT_LIST_HEAD(&falloc->buddies); | ||
880 | falloc->start = base; | ||
881 | falloc->end = base + len; | ||
882 | |||
883 | if (!balloc_is_range_free(a, base, base + len)) { | ||
884 | alloc_dbg(balloc_owner(a), | ||
885 | "Range not free: 0x%llx -> 0x%llx\n", | ||
886 | base, base + len); | ||
887 | goto fail_unlock; | ||
888 | } | ||
889 | |||
890 | ret = __balloc_do_alloc_fixed(a, falloc, base, len); | ||
891 | if (!ret) { | ||
892 | alloc_dbg(balloc_owner(a), | ||
893 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", | ||
894 | base, base + len); | ||
895 | goto fail_unlock; | ||
896 | } | ||
897 | |||
898 | balloc_alloc_fixed(a, falloc); | ||
899 | |||
900 | list_for_each_entry(bud, &falloc->buddies, buddy_entry) | ||
901 | real_bytes += (bud->end - bud->start); | ||
902 | |||
903 | a->bytes_alloced += len; | ||
904 | a->bytes_alloced_real += real_bytes; | ||
905 | |||
906 | alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); | ||
907 | |||
908 | gk20a_alloc_trace_func_done(); | ||
909 | return base; | ||
910 | |||
911 | fail_unlock: | ||
912 | alloc_unlock(__a); | ||
913 | fail: | ||
914 | kfree(falloc); | ||
915 | gk20a_alloc_trace_func_done(); | ||
916 | return 0; | ||
917 | } | ||
918 | |||
919 | /* | ||
920 | * Allocate a fixed address allocation. The address of the allocation is @base | ||
921 | * and the length is @len. This is not a typical buddy allocator operation and | ||
922 | * as such has a high posibility of failure if the address space is heavily in | ||
923 | * use. | ||
924 | * | ||
925 | * Please do not use this function unless _absolutely_ necessary. | ||
926 | */ | ||
927 | static u64 gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a, | ||
928 | u64 base, u64 len) | ||
929 | { | ||
930 | u64 alloc; | ||
931 | struct gk20a_buddy_allocator *a = __a->priv; | ||
932 | |||
933 | alloc_lock(__a); | ||
934 | alloc = __gk20a_balloc_fixed_buddy(__a, base, len); | ||
935 | a->alloc_made = 1; | ||
936 | alloc_unlock(__a); | ||
937 | |||
938 | return alloc; | ||
939 | } | ||
940 | |||
941 | /* | ||
942 | * Free the passed allocation. | ||
943 | */ | ||
944 | static void gk20a_buddy_bfree(struct gk20a_allocator *__a, u64 addr) | ||
945 | { | ||
946 | struct gk20a_buddy *bud; | ||
947 | struct gk20a_fixed_alloc *falloc; | ||
948 | struct gk20a_buddy_allocator *a = __a->priv; | ||
949 | |||
950 | gk20a_alloc_trace_func(); | ||
951 | |||
952 | if (!addr) { | ||
953 | gk20a_alloc_trace_func_done(); | ||
954 | return; | ||
955 | } | ||
956 | |||
957 | alloc_lock(__a); | ||
958 | |||
959 | /* | ||
960 | * First see if this is a fixed alloc. If not fall back to a regular | ||
961 | * buddy. | ||
962 | */ | ||
963 | falloc = balloc_free_fixed(a, addr); | ||
964 | if (falloc) { | ||
965 | __balloc_do_free_fixed(a, falloc); | ||
966 | goto done; | ||
967 | } | ||
968 | |||
969 | bud = balloc_free_buddy(a, addr); | ||
970 | if (!bud) | ||
971 | goto done; | ||
972 | |||
973 | balloc_blist_add(a, bud); | ||
974 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
975 | |||
976 | /* | ||
977 | * Attemp to defrag the allocation. | ||
978 | */ | ||
979 | balloc_coalesce(a, bud); | ||
980 | |||
981 | done: | ||
982 | alloc_unlock(__a); | ||
983 | alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); | ||
984 | gk20a_alloc_trace_func_done(); | ||
985 | return; | ||
986 | } | ||
987 | |||
988 | static bool gk20a_buddy_reserve_is_possible(struct gk20a_buddy_allocator *a, | ||
989 | struct gk20a_alloc_carveout *co) | ||
990 | { | ||
991 | struct gk20a_alloc_carveout *tmp; | ||
992 | u64 co_base, co_end; | ||
993 | |||
994 | co_base = co->base; | ||
995 | co_end = co->base + co->length; | ||
996 | |||
997 | /* | ||
998 | * Not the fastest approach but we should not have that many carveouts | ||
999 | * for any reasonable allocator. | ||
1000 | */ | ||
1001 | list_for_each_entry(tmp, &a->co_list, co_entry) { | ||
1002 | if ((co_base >= tmp->base && | ||
1003 | co_base < (tmp->base + tmp->length)) || | ||
1004 | (co_end >= tmp->base && | ||
1005 | co_end < (tmp->base + tmp->length))) | ||
1006 | return false; | ||
1007 | } | ||
1008 | |||
1009 | return true; | ||
1010 | } | ||
1011 | |||
1012 | /* | ||
1013 | * Carveouts can only be reserved before any regular allocations have been | ||
1014 | * made. | ||
1015 | */ | ||
1016 | static int gk20a_buddy_reserve_co(struct gk20a_allocator *__a, | ||
1017 | struct gk20a_alloc_carveout *co) | ||
1018 | { | ||
1019 | struct gk20a_buddy_allocator *a = __a->priv; | ||
1020 | u64 addr; | ||
1021 | int err = 0; | ||
1022 | |||
1023 | if (co->base < a->start || (co->base + co->length) > a->end || | ||
1024 | a->alloc_made) | ||
1025 | return -EINVAL; | ||
1026 | |||
1027 | alloc_lock(__a); | ||
1028 | |||
1029 | if (!gk20a_buddy_reserve_is_possible(a, co)) { | ||
1030 | err = -EBUSY; | ||
1031 | goto done; | ||
1032 | } | ||
1033 | |||
1034 | /* Should not be possible to fail... */ | ||
1035 | addr = __gk20a_balloc_fixed_buddy(__a, co->base, co->length); | ||
1036 | if (!addr) { | ||
1037 | err = -ENOMEM; | ||
1038 | pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); | ||
1039 | goto done; | ||
1040 | } | ||
1041 | |||
1042 | list_add(&co->co_entry, &a->co_list); | ||
1043 | |||
1044 | done: | ||
1045 | alloc_unlock(__a); | ||
1046 | return err; | ||
1047 | } | ||
1048 | |||
1049 | /* | ||
1050 | * Carveouts can be release at any time. | ||
1051 | */ | ||
1052 | static void gk20a_buddy_release_co(struct gk20a_allocator *__a, | ||
1053 | struct gk20a_alloc_carveout *co) | ||
1054 | { | ||
1055 | alloc_lock(__a); | ||
1056 | |||
1057 | list_del_init(&co->co_entry); | ||
1058 | gk20a_free(__a, co->base); | ||
1059 | |||
1060 | alloc_unlock(__a); | ||
1061 | } | ||
1062 | |||
1063 | static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a) | ||
1064 | { | ||
1065 | struct gk20a_buddy_allocator *ba = a->priv; | ||
1066 | |||
1067 | return ba->length; | ||
1068 | } | ||
1069 | |||
1070 | static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a) | ||
1071 | { | ||
1072 | struct gk20a_buddy_allocator *ba = a->priv; | ||
1073 | |||
1074 | return ba->start; | ||
1075 | } | ||
1076 | |||
1077 | static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a) | ||
1078 | { | ||
1079 | struct gk20a_buddy_allocator *ba = a->priv; | ||
1080 | int inited = ba->initialized; | ||
1081 | |||
1082 | rmb(); | ||
1083 | return inited; | ||
1084 | } | ||
1085 | |||
1086 | static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a) | ||
1087 | { | ||
1088 | struct gk20a_buddy_allocator *ba = a->priv; | ||
1089 | |||
1090 | return ba->end; | ||
1091 | } | ||
1092 | |||
1093 | static u64 gk20a_buddy_alloc_space(struct gk20a_allocator *a) | ||
1094 | { | ||
1095 | struct gk20a_buddy_allocator *ba = a->priv; | ||
1096 | u64 space; | ||
1097 | |||
1098 | alloc_lock(a); | ||
1099 | space = ba->end - ba->start - | ||
1100 | (ba->bytes_alloced_real - ba->bytes_freed); | ||
1101 | alloc_unlock(a); | ||
1102 | |||
1103 | return space; | ||
1104 | } | ||
1105 | |||
1106 | /* | ||
1107 | * Print the buddy allocator top level stats. If you pass @s as NULL then the | ||
1108 | * stats are printed to the kernel log. This lets this code be used for | ||
1109 | * debugging purposes internal to the allocator. | ||
1110 | */ | ||
1111 | static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, | ||
1112 | struct seq_file *s, int lock) | ||
1113 | { | ||
1114 | int i = 0; | ||
1115 | struct rb_node *node; | ||
1116 | struct gk20a_fixed_alloc *falloc; | ||
1117 | struct gk20a_alloc_carveout *tmp; | ||
1118 | struct gk20a_buddy_allocator *a = __a->priv; | ||
1119 | |||
1120 | __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", | ||
1121 | a->base, a->length, a->blk_size); | ||
1122 | __alloc_pstat(s, __a, "Internal params:\n"); | ||
1123 | __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); | ||
1124 | __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); | ||
1125 | __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); | ||
1126 | __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); | ||
1127 | __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); | ||
1128 | |||
1129 | if (lock) | ||
1130 | alloc_lock(__a); | ||
1131 | |||
1132 | if (!list_empty(&a->co_list)) { | ||
1133 | __alloc_pstat(s, __a, "\n"); | ||
1134 | __alloc_pstat(s, __a, "Carveouts:\n"); | ||
1135 | list_for_each_entry(tmp, &a->co_list, co_entry) | ||
1136 | __alloc_pstat(s, __a, | ||
1137 | " CO %2d: %-20s 0x%010llx + 0x%llx\n", | ||
1138 | i++, tmp->name, tmp->base, tmp->length); | ||
1139 | } | ||
1140 | |||
1141 | __alloc_pstat(s, __a, "\n"); | ||
1142 | __alloc_pstat(s, __a, "Buddy blocks:\n"); | ||
1143 | __alloc_pstat(s, __a, " Order Free Alloced Split\n"); | ||
1144 | __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); | ||
1145 | |||
1146 | for (i = a->max_order; i >= 0; i--) { | ||
1147 | if (a->buddy_list_len[i] == 0 && | ||
1148 | a->buddy_list_alloced[i] == 0 && | ||
1149 | a->buddy_list_split[i] == 0) | ||
1150 | continue; | ||
1151 | |||
1152 | __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, | ||
1153 | a->buddy_list_len[i], | ||
1154 | a->buddy_list_alloced[i], | ||
1155 | a->buddy_list_split[i]); | ||
1156 | } | ||
1157 | |||
1158 | __alloc_pstat(s, __a, "\n"); | ||
1159 | |||
1160 | for (node = rb_first(&a->fixed_allocs), i = 1; | ||
1161 | node != NULL; | ||
1162 | node = rb_next(node)) { | ||
1163 | falloc = container_of(node, | ||
1164 | struct gk20a_fixed_alloc, alloced_entry); | ||
1165 | |||
1166 | __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", | ||
1167 | i, falloc->start, falloc->end); | ||
1168 | } | ||
1169 | |||
1170 | __alloc_pstat(s, __a, "\n"); | ||
1171 | __alloc_pstat(s, __a, "Bytes allocated: %llu\n", | ||
1172 | a->bytes_alloced); | ||
1173 | __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", | ||
1174 | a->bytes_alloced_real); | ||
1175 | __alloc_pstat(s, __a, "Bytes freed: %llu\n", | ||
1176 | a->bytes_freed); | ||
1177 | |||
1178 | if (lock) | ||
1179 | alloc_unlock(__a); | ||
1180 | } | ||
1181 | |||
1182 | static const struct gk20a_allocator_ops buddy_ops = { | ||
1183 | .alloc = gk20a_buddy_balloc, | ||
1184 | .free = gk20a_buddy_bfree, | ||
1185 | |||
1186 | .alloc_fixed = gk20a_balloc_fixed_buddy, | ||
1187 | /* .free_fixed not needed. */ | ||
1188 | |||
1189 | .reserve_carveout = gk20a_buddy_reserve_co, | ||
1190 | .release_carveout = gk20a_buddy_release_co, | ||
1191 | |||
1192 | .base = gk20a_buddy_alloc_base, | ||
1193 | .length = gk20a_buddy_alloc_length, | ||
1194 | .end = gk20a_buddy_alloc_end, | ||
1195 | .inited = gk20a_buddy_alloc_inited, | ||
1196 | .space = gk20a_buddy_alloc_space, | ||
1197 | |||
1198 | .fini = gk20a_buddy_allocator_destroy, | ||
1199 | |||
1200 | .print_stats = gk20a_buddy_print_stats, | ||
1201 | }; | ||
1202 | |||
1203 | /* | ||
1204 | * Initialize a buddy allocator. Returns 0 on success. This allocator does | ||
1205 | * not necessarily manage bytes. It manages distinct ranges of resources. This | ||
1206 | * allows the allocator to work for things like comp_tags, semaphores, etc. | ||
1207 | * | ||
1208 | * @allocator: Ptr to an allocator struct to init. | ||
1209 | * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to | ||
1210 | * get PTE size for GVA spaces. | ||
1211 | * @name: Name of the allocator. Doesn't have to be static storage. | ||
1212 | * @base: The base address of the resource pool being managed. | ||
1213 | * @size: Number of resources in the pool. | ||
1214 | * @blk_size: Minimum number of resources to allocate at once. For things like | ||
1215 | * semaphores this is 1. For GVA this might be as much as 64k. This | ||
1216 | * corresponds to order 0. Must be power of 2. | ||
1217 | * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator | ||
1218 | * will try and pick a reasonable max order. | ||
1219 | * @flags: Extra flags necessary. See GPU_BALLOC_*. | ||
1220 | */ | ||
1221 | int __gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, | ||
1222 | struct vm_gk20a *vm, const char *name, | ||
1223 | u64 base, u64 size, u64 blk_size, | ||
1224 | u64 max_order, u64 flags) | ||
1225 | { | ||
1226 | int err; | ||
1227 | u64 pde_size; | ||
1228 | struct gk20a_buddy_allocator *a; | ||
1229 | |||
1230 | /* blk_size must be greater than 0 and a power of 2. */ | ||
1231 | if (blk_size == 0) | ||
1232 | return -EINVAL; | ||
1233 | if (blk_size & (blk_size - 1)) | ||
1234 | return -EINVAL; | ||
1235 | |||
1236 | if (max_order > GPU_BALLOC_MAX_ORDER) | ||
1237 | return -EINVAL; | ||
1238 | |||
1239 | /* If this is to manage a GVA space we need a VM. */ | ||
1240 | if (flags & GPU_ALLOC_GVA_SPACE && !vm) | ||
1241 | return -EINVAL; | ||
1242 | |||
1243 | a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL); | ||
1244 | if (!a) | ||
1245 | return -ENOMEM; | ||
1246 | |||
1247 | err = __gk20a_alloc_common_init(__a, name, a, false, &buddy_ops); | ||
1248 | if (err) | ||
1249 | goto fail; | ||
1250 | |||
1251 | a->base = base; | ||
1252 | a->length = size; | ||
1253 | a->blk_size = blk_size; | ||
1254 | a->blk_shift = __ffs(blk_size); | ||
1255 | a->owner = __a; | ||
1256 | |||
1257 | /* | ||
1258 | * If base is 0 then modfy base to be the size of one block so that we | ||
1259 | * can return errors by returning addr == 0. | ||
1260 | */ | ||
1261 | if (a->base == 0) { | ||
1262 | a->base = a->blk_size; | ||
1263 | a->length -= a->blk_size; | ||
1264 | } | ||
1265 | |||
1266 | a->vm = vm; | ||
1267 | if (flags & GPU_ALLOC_GVA_SPACE) { | ||
1268 | pde_size = ((u64)vm->big_page_size) << 10; | ||
1269 | a->pte_blk_order = balloc_get_order(a, pde_size); | ||
1270 | } | ||
1271 | |||
1272 | /* | ||
1273 | * When we have a GVA space with big_pages enabled the size and base | ||
1274 | * must be PDE aligned. If big_pages are not enabled then this | ||
1275 | * requirement is not necessary. | ||
1276 | */ | ||
1277 | if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages && | ||
1278 | (base & ((vm->big_page_size << 10) - 1) || | ||
1279 | size & ((vm->big_page_size << 10) - 1))) | ||
1280 | return -EINVAL; | ||
1281 | |||
1282 | a->flags = flags; | ||
1283 | a->max_order = max_order; | ||
1284 | |||
1285 | balloc_allocator_align(a); | ||
1286 | balloc_compute_max_order(a); | ||
1287 | |||
1288 | /* Shared buddy kmem_cache for all allocators. */ | ||
1289 | if (!buddy_cache) | ||
1290 | buddy_cache = KMEM_CACHE(gk20a_buddy, 0); | ||
1291 | if (!buddy_cache) { | ||
1292 | err = -ENOMEM; | ||
1293 | goto fail; | ||
1294 | } | ||
1295 | |||
1296 | a->alloced_buddies = RB_ROOT; | ||
1297 | a->fixed_allocs = RB_ROOT; | ||
1298 | INIT_LIST_HEAD(&a->co_list); | ||
1299 | err = balloc_init_lists(a); | ||
1300 | if (err) | ||
1301 | goto fail; | ||
1302 | |||
1303 | wmb(); | ||
1304 | a->initialized = 1; | ||
1305 | |||
1306 | gk20a_init_alloc_debug(g, __a); | ||
1307 | alloc_dbg(__a, "New allocator: type buddy\n"); | ||
1308 | alloc_dbg(__a, " base 0x%llx\n", a->base); | ||
1309 | alloc_dbg(__a, " size 0x%llx\n", a->length); | ||
1310 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
1311 | alloc_dbg(__a, " max_order %llu\n", a->max_order); | ||
1312 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
1313 | |||
1314 | return 0; | ||
1315 | |||
1316 | fail: | ||
1317 | kfree(a); | ||
1318 | return err; | ||
1319 | } | ||
1320 | |||
1321 | int gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a, | ||
1322 | const char *name, u64 base, u64 size, | ||
1323 | u64 blk_size, u64 flags) | ||
1324 | { | ||
1325 | return __gk20a_buddy_allocator_init(g, a, NULL, name, | ||
1326 | base, size, blk_size, 0, 0); | ||
1327 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c deleted file mode 100644 index 5b011d8c..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c +++ /dev/null | |||
@@ -1,206 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/vmalloc.h> | ||
20 | #include <linux/atomic.h> | ||
21 | |||
22 | #include "gk20a_allocator.h" | ||
23 | #include "lockless_allocator_priv.h" | ||
24 | |||
25 | static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a) | ||
26 | { | ||
27 | struct gk20a_lockless_allocator *pa = a->priv; | ||
28 | |||
29 | return pa->length; | ||
30 | } | ||
31 | |||
32 | static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a) | ||
33 | { | ||
34 | struct gk20a_lockless_allocator *pa = a->priv; | ||
35 | |||
36 | return pa->base; | ||
37 | } | ||
38 | |||
39 | static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a) | ||
40 | { | ||
41 | struct gk20a_lockless_allocator *pa = a->priv; | ||
42 | int inited = pa->inited; | ||
43 | |||
44 | rmb(); | ||
45 | return inited; | ||
46 | } | ||
47 | |||
48 | static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a) | ||
49 | { | ||
50 | struct gk20a_lockless_allocator *pa = a->priv; | ||
51 | |||
52 | return pa->base + pa->length; | ||
53 | } | ||
54 | |||
55 | static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len) | ||
56 | { | ||
57 | struct gk20a_lockless_allocator *pa = a->priv; | ||
58 | int head, new_head, ret; | ||
59 | u64 addr = 0; | ||
60 | |||
61 | if (len != pa->blk_size) | ||
62 | return 0; | ||
63 | |||
64 | head = ACCESS_ONCE(pa->head); | ||
65 | while (head >= 0) { | ||
66 | new_head = ACCESS_ONCE(pa->next[head]); | ||
67 | ret = cmpxchg(&pa->head, head, new_head); | ||
68 | if (ret == head) { | ||
69 | addr = pa->base + head * pa->blk_size; | ||
70 | atomic_inc(&pa->nr_allocs); | ||
71 | alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head, | ||
72 | addr); | ||
73 | break; | ||
74 | } | ||
75 | head = ACCESS_ONCE(pa->head); | ||
76 | } | ||
77 | return addr; | ||
78 | } | ||
79 | |||
80 | static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr) | ||
81 | { | ||
82 | struct gk20a_lockless_allocator *pa = a->priv; | ||
83 | int head, ret; | ||
84 | u64 cur_idx, rem; | ||
85 | |||
86 | cur_idx = addr - pa->base; | ||
87 | rem = do_div(cur_idx, pa->blk_size); | ||
88 | |||
89 | while (1) { | ||
90 | head = ACCESS_ONCE(pa->head); | ||
91 | ACCESS_ONCE(pa->next[cur_idx]) = head; | ||
92 | ret = cmpxchg(&pa->head, head, cur_idx); | ||
93 | if (ret == head) { | ||
94 | atomic_dec(&pa->nr_allocs); | ||
95 | alloc_dbg(a, "Free node # %llu\n", cur_idx); | ||
96 | break; | ||
97 | } | ||
98 | } | ||
99 | } | ||
100 | |||
101 | static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a) | ||
102 | { | ||
103 | struct gk20a_lockless_allocator *pa = a->priv; | ||
104 | |||
105 | gk20a_fini_alloc_debug(a); | ||
106 | |||
107 | vfree(pa->next); | ||
108 | kfree(pa); | ||
109 | } | ||
110 | |||
111 | static void gk20a_lockless_print_stats(struct gk20a_allocator *a, | ||
112 | struct seq_file *s, int lock) | ||
113 | { | ||
114 | struct gk20a_lockless_allocator *pa = a->priv; | ||
115 | |||
116 | __alloc_pstat(s, a, "Lockless allocator params:\n"); | ||
117 | __alloc_pstat(s, a, " start = 0x%llx\n", pa->base); | ||
118 | __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length); | ||
119 | |||
120 | /* Actual stats. */ | ||
121 | __alloc_pstat(s, a, "Stats:\n"); | ||
122 | __alloc_pstat(s, a, " Number allocs = %d\n", | ||
123 | atomic_read(&pa->nr_allocs)); | ||
124 | __alloc_pstat(s, a, " Number free = %d\n", | ||
125 | pa->nr_nodes - atomic_read(&pa->nr_allocs)); | ||
126 | } | ||
127 | |||
128 | static const struct gk20a_allocator_ops pool_ops = { | ||
129 | .alloc = gk20a_lockless_alloc, | ||
130 | .free = gk20a_lockless_free, | ||
131 | |||
132 | .base = gk20a_lockless_alloc_base, | ||
133 | .length = gk20a_lockless_alloc_length, | ||
134 | .end = gk20a_lockless_alloc_end, | ||
135 | .inited = gk20a_lockless_alloc_inited, | ||
136 | |||
137 | .fini = gk20a_lockless_alloc_destroy, | ||
138 | |||
139 | .print_stats = gk20a_lockless_print_stats, | ||
140 | }; | ||
141 | |||
142 | int gk20a_lockless_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, | ||
143 | const char *name, u64 base, u64 length, | ||
144 | u64 blk_size, u64 flags) | ||
145 | { | ||
146 | int i; | ||
147 | int err; | ||
148 | int nr_nodes; | ||
149 | u64 count, rem; | ||
150 | struct gk20a_lockless_allocator *a; | ||
151 | |||
152 | if (!blk_size) | ||
153 | return -EINVAL; | ||
154 | |||
155 | /* | ||
156 | * Ensure we have space for atleast one node & there's no overflow. | ||
157 | * In order to control memory footprint, we require count < INT_MAX | ||
158 | */ | ||
159 | count = length; | ||
160 | rem = do_div(count, blk_size); | ||
161 | if (!base || !count || count > INT_MAX) | ||
162 | return -EINVAL; | ||
163 | |||
164 | a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL); | ||
165 | if (!a) | ||
166 | return -ENOMEM; | ||
167 | |||
168 | err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops); | ||
169 | if (err) | ||
170 | goto fail; | ||
171 | |||
172 | a->next = vzalloc(sizeof(*a->next) * count); | ||
173 | if (!a->next) { | ||
174 | err = -ENOMEM; | ||
175 | goto fail; | ||
176 | } | ||
177 | |||
178 | /* chain the elements together to form the initial free list */ | ||
179 | nr_nodes = (int)count; | ||
180 | for (i = 0; i < nr_nodes; i++) | ||
181 | a->next[i] = i + 1; | ||
182 | a->next[nr_nodes - 1] = -1; | ||
183 | |||
184 | a->base = base; | ||
185 | a->length = length; | ||
186 | a->blk_size = blk_size; | ||
187 | a->nr_nodes = nr_nodes; | ||
188 | a->flags = flags; | ||
189 | atomic_set(&a->nr_allocs, 0); | ||
190 | |||
191 | wmb(); | ||
192 | a->inited = true; | ||
193 | |||
194 | gk20a_init_alloc_debug(g, __a); | ||
195 | alloc_dbg(__a, "New allocator: type lockless\n"); | ||
196 | alloc_dbg(__a, " base 0x%llx\n", a->base); | ||
197 | alloc_dbg(__a, " nodes %d\n", a->nr_nodes); | ||
198 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
199 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
200 | |||
201 | return 0; | ||
202 | |||
203 | fail: | ||
204 | kfree(a); | ||
205 | return err; | ||
206 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c deleted file mode 100644 index 9717a726..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c +++ /dev/null | |||
@@ -1,936 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/bitops.h> | ||
20 | #include <linux/mm.h> | ||
21 | |||
22 | #include "gk20a_allocator.h" | ||
23 | #include "buddy_allocator_priv.h" | ||
24 | #include "page_allocator_priv.h" | ||
25 | |||
26 | #define palloc_dbg(a, fmt, arg...) \ | ||
27 | alloc_dbg(palloc_owner(a), fmt, ##arg) | ||
28 | |||
29 | static struct kmem_cache *page_alloc_cache; | ||
30 | static struct kmem_cache *page_alloc_chunk_cache; | ||
31 | static struct kmem_cache *page_alloc_slab_page_cache; | ||
32 | static DEFINE_MUTEX(meta_data_cache_lock); | ||
33 | |||
34 | /* | ||
35 | * Handle the book-keeping for these operations. | ||
36 | */ | ||
37 | static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, | ||
38 | struct page_alloc_slab_page *page) | ||
39 | { | ||
40 | BUG_ON(page->state != SP_NONE); | ||
41 | list_add(&page->list_entry, &slab->empty); | ||
42 | slab->nr_empty++; | ||
43 | page->state = SP_EMPTY; | ||
44 | } | ||
45 | static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, | ||
46 | struct page_alloc_slab_page *page) | ||
47 | { | ||
48 | BUG_ON(page->state != SP_NONE); | ||
49 | list_add(&page->list_entry, &slab->partial); | ||
50 | slab->nr_partial++; | ||
51 | page->state = SP_PARTIAL; | ||
52 | } | ||
53 | static inline void add_slab_page_to_full(struct page_alloc_slab *slab, | ||
54 | struct page_alloc_slab_page *page) | ||
55 | { | ||
56 | BUG_ON(page->state != SP_NONE); | ||
57 | list_add(&page->list_entry, &slab->full); | ||
58 | slab->nr_full++; | ||
59 | page->state = SP_FULL; | ||
60 | } | ||
61 | |||
62 | static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, | ||
63 | struct page_alloc_slab_page *page) | ||
64 | { | ||
65 | list_del_init(&page->list_entry); | ||
66 | slab->nr_empty--; | ||
67 | page->state = SP_NONE; | ||
68 | } | ||
69 | static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, | ||
70 | struct page_alloc_slab_page *page) | ||
71 | { | ||
72 | list_del_init(&page->list_entry); | ||
73 | slab->nr_partial--; | ||
74 | page->state = SP_NONE; | ||
75 | } | ||
76 | static inline void del_slab_page_from_full(struct page_alloc_slab *slab, | ||
77 | struct page_alloc_slab_page *page) | ||
78 | { | ||
79 | list_del_init(&page->list_entry); | ||
80 | slab->nr_full--; | ||
81 | page->state = SP_NONE; | ||
82 | } | ||
83 | |||
84 | static u64 gk20a_page_alloc_length(struct gk20a_allocator *a) | ||
85 | { | ||
86 | struct gk20a_page_allocator *va = a->priv; | ||
87 | |||
88 | return gk20a_alloc_length(&va->source_allocator); | ||
89 | } | ||
90 | |||
91 | static u64 gk20a_page_alloc_base(struct gk20a_allocator *a) | ||
92 | { | ||
93 | struct gk20a_page_allocator *va = a->priv; | ||
94 | |||
95 | return gk20a_alloc_base(&va->source_allocator); | ||
96 | } | ||
97 | |||
98 | static int gk20a_page_alloc_inited(struct gk20a_allocator *a) | ||
99 | { | ||
100 | struct gk20a_page_allocator *va = a->priv; | ||
101 | |||
102 | return gk20a_alloc_initialized(&va->source_allocator); | ||
103 | } | ||
104 | |||
105 | static u64 gk20a_page_alloc_end(struct gk20a_allocator *a) | ||
106 | { | ||
107 | struct gk20a_page_allocator *va = a->priv; | ||
108 | |||
109 | return gk20a_alloc_end(&va->source_allocator); | ||
110 | } | ||
111 | |||
112 | static u64 gk20a_page_alloc_space(struct gk20a_allocator *a) | ||
113 | { | ||
114 | struct gk20a_page_allocator *va = a->priv; | ||
115 | |||
116 | return gk20a_alloc_space(&va->source_allocator); | ||
117 | } | ||
118 | |||
119 | static int gk20a_page_reserve_co(struct gk20a_allocator *a, | ||
120 | struct gk20a_alloc_carveout *co) | ||
121 | { | ||
122 | struct gk20a_page_allocator *va = a->priv; | ||
123 | |||
124 | return gk20a_alloc_reserve_carveout(&va->source_allocator, co); | ||
125 | } | ||
126 | |||
127 | static void gk20a_page_release_co(struct gk20a_allocator *a, | ||
128 | struct gk20a_alloc_carveout *co) | ||
129 | { | ||
130 | struct gk20a_page_allocator *va = a->priv; | ||
131 | |||
132 | gk20a_alloc_release_carveout(&va->source_allocator, co); | ||
133 | } | ||
134 | |||
135 | static void __gk20a_free_pages(struct gk20a_page_allocator *a, | ||
136 | struct gk20a_page_alloc *alloc, | ||
137 | bool free_buddy_alloc) | ||
138 | { | ||
139 | struct page_alloc_chunk *chunk; | ||
140 | |||
141 | while (!list_empty(&alloc->alloc_chunks)) { | ||
142 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
143 | struct page_alloc_chunk, | ||
144 | list_entry); | ||
145 | list_del(&chunk->list_entry); | ||
146 | |||
147 | if (free_buddy_alloc) | ||
148 | gk20a_free(&a->source_allocator, chunk->base); | ||
149 | kfree(chunk); | ||
150 | } | ||
151 | |||
152 | kfree(alloc); | ||
153 | } | ||
154 | |||
155 | static int __insert_page_alloc(struct gk20a_page_allocator *a, | ||
156 | struct gk20a_page_alloc *alloc) | ||
157 | { | ||
158 | struct rb_node **new = &a->allocs.rb_node; | ||
159 | struct rb_node *parent = NULL; | ||
160 | |||
161 | while (*new) { | ||
162 | struct gk20a_page_alloc *tmp = | ||
163 | container_of(*new, struct gk20a_page_alloc, | ||
164 | tree_entry); | ||
165 | |||
166 | parent = *new; | ||
167 | if (alloc->base < tmp->base) { | ||
168 | new = &((*new)->rb_left); | ||
169 | } else if (alloc->base > tmp->base) { | ||
170 | new = &((*new)->rb_right); | ||
171 | } else { | ||
172 | WARN(1, "Duplicate entries in allocated list!\n"); | ||
173 | return 0; | ||
174 | } | ||
175 | } | ||
176 | |||
177 | rb_link_node(&alloc->tree_entry, parent, new); | ||
178 | rb_insert_color(&alloc->tree_entry, &a->allocs); | ||
179 | |||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static struct gk20a_page_alloc *__find_page_alloc( | ||
184 | struct gk20a_page_allocator *a, | ||
185 | u64 addr) | ||
186 | { | ||
187 | struct rb_node *node = a->allocs.rb_node; | ||
188 | struct gk20a_page_alloc *alloc; | ||
189 | |||
190 | while (node) { | ||
191 | alloc = container_of(node, struct gk20a_page_alloc, tree_entry); | ||
192 | |||
193 | if (addr < alloc->base) | ||
194 | node = node->rb_left; | ||
195 | else if (addr > alloc->base) | ||
196 | node = node->rb_right; | ||
197 | else | ||
198 | break; | ||
199 | } | ||
200 | |||
201 | if (!node) | ||
202 | return NULL; | ||
203 | |||
204 | rb_erase(node, &a->allocs); | ||
205 | |||
206 | return alloc; | ||
207 | } | ||
208 | |||
209 | static struct page_alloc_slab_page *alloc_slab_page( | ||
210 | struct gk20a_page_allocator *a, | ||
211 | struct page_alloc_slab *slab) | ||
212 | { | ||
213 | struct page_alloc_slab_page *slab_page; | ||
214 | |||
215 | slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL); | ||
216 | if (!slab_page) { | ||
217 | palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n"); | ||
218 | return ERR_PTR(-ENOMEM); | ||
219 | } | ||
220 | |||
221 | memset(slab_page, 0, sizeof(*slab_page)); | ||
222 | |||
223 | slab_page->page_addr = gk20a_alloc(&a->source_allocator, a->page_size); | ||
224 | if (!slab_page->page_addr) { | ||
225 | kfree(slab_page); | ||
226 | palloc_dbg(a, "OOM: vidmem is full!\n"); | ||
227 | return ERR_PTR(-ENOMEM); | ||
228 | } | ||
229 | |||
230 | INIT_LIST_HEAD(&slab_page->list_entry); | ||
231 | slab_page->slab_size = slab->slab_size; | ||
232 | slab_page->nr_objects = (u32)a->page_size / slab->slab_size; | ||
233 | slab_page->nr_objects_alloced = 0; | ||
234 | slab_page->owner = slab; | ||
235 | slab_page->state = SP_NONE; | ||
236 | |||
237 | a->pages_alloced++; | ||
238 | |||
239 | palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n", | ||
240 | slab_page->page_addr, slab_page->slab_size); | ||
241 | |||
242 | return slab_page; | ||
243 | } | ||
244 | |||
245 | static void free_slab_page(struct gk20a_page_allocator *a, | ||
246 | struct page_alloc_slab_page *slab_page) | ||
247 | { | ||
248 | palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr); | ||
249 | |||
250 | BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) || | ||
251 | slab_page->nr_objects_alloced != 0 || | ||
252 | slab_page->bitmap != 0); | ||
253 | |||
254 | gk20a_free(&a->source_allocator, slab_page->page_addr); | ||
255 | a->pages_freed++; | ||
256 | |||
257 | kmem_cache_free(page_alloc_slab_page_cache, slab_page); | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | ||
262 | * alloc_chunks list. | ||
263 | */ | ||
264 | static int __do_slab_alloc(struct gk20a_page_allocator *a, | ||
265 | struct page_alloc_slab *slab, | ||
266 | struct gk20a_page_alloc *alloc) | ||
267 | { | ||
268 | struct page_alloc_slab_page *slab_page = NULL; | ||
269 | struct page_alloc_chunk *chunk; | ||
270 | unsigned long offs; | ||
271 | |||
272 | /* | ||
273 | * Check the partial and empty lists to see if we have some space | ||
274 | * readily available. Take the slab_page out of what ever list it | ||
275 | * was in since it may be put back into a different list later. | ||
276 | */ | ||
277 | if (!list_empty(&slab->partial)) { | ||
278 | slab_page = list_first_entry(&slab->partial, | ||
279 | struct page_alloc_slab_page, | ||
280 | list_entry); | ||
281 | del_slab_page_from_partial(slab, slab_page); | ||
282 | } else if (!list_empty(&slab->empty)) { | ||
283 | slab_page = list_first_entry(&slab->empty, | ||
284 | struct page_alloc_slab_page, | ||
285 | list_entry); | ||
286 | del_slab_page_from_empty(slab, slab_page); | ||
287 | } | ||
288 | |||
289 | if (!slab_page) { | ||
290 | slab_page = alloc_slab_page(a, slab); | ||
291 | if (IS_ERR(slab_page)) | ||
292 | return PTR_ERR(slab_page); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * We now have a slab_page. Do the alloc. | ||
297 | */ | ||
298 | offs = bitmap_find_next_zero_area(&slab_page->bitmap, | ||
299 | slab_page->nr_objects, | ||
300 | 0, 1, 0); | ||
301 | if (offs >= slab_page->nr_objects) { | ||
302 | WARN(1, "Empty/partial slab with no free objects?"); | ||
303 | |||
304 | /* Add the buggy page to the full list... This isn't ideal. */ | ||
305 | add_slab_page_to_full(slab, slab_page); | ||
306 | return -ENOMEM; | ||
307 | } | ||
308 | |||
309 | bitmap_set(&slab_page->bitmap, offs, 1); | ||
310 | slab_page->nr_objects_alloced++; | ||
311 | |||
312 | if (slab_page->nr_objects_alloced < slab_page->nr_objects) | ||
313 | add_slab_page_to_partial(slab, slab_page); | ||
314 | else if (slab_page->nr_objects_alloced == slab_page->nr_objects) | ||
315 | add_slab_page_to_full(slab, slab_page); | ||
316 | else | ||
317 | BUG(); /* Should be impossible to hit this. */ | ||
318 | |||
319 | /* | ||
320 | * Handle building the gk20a_page_alloc struct. We expect one | ||
321 | * page_alloc_chunk to be present. | ||
322 | */ | ||
323 | alloc->slab_page = slab_page; | ||
324 | alloc->nr_chunks = 1; | ||
325 | alloc->length = slab_page->slab_size; | ||
326 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | ||
327 | |||
328 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
329 | struct page_alloc_chunk, list_entry); | ||
330 | chunk->base = alloc->base; | ||
331 | chunk->length = alloc->length; | ||
332 | |||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Allocate from a slab instead of directly from the page allocator. | ||
338 | */ | ||
339 | static struct gk20a_page_alloc *__gk20a_alloc_slab( | ||
340 | struct gk20a_page_allocator *a, u64 len) | ||
341 | { | ||
342 | int err, slab_nr; | ||
343 | struct page_alloc_slab *slab; | ||
344 | struct gk20a_page_alloc *alloc = NULL; | ||
345 | struct page_alloc_chunk *chunk = NULL; | ||
346 | |||
347 | /* | ||
348 | * Align the length to a page and then divide by the page size (4k for | ||
349 | * this code). ilog2() of that then gets us the correct slab to use. | ||
350 | */ | ||
351 | slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12); | ||
352 | slab = &a->slabs[slab_nr]; | ||
353 | |||
354 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
355 | if (!alloc) { | ||
356 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | ||
357 | goto fail; | ||
358 | } | ||
359 | chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
360 | if (!chunk) { | ||
361 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | ||
362 | goto fail; | ||
363 | } | ||
364 | |||
365 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
366 | list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
367 | |||
368 | err = __do_slab_alloc(a, slab, alloc); | ||
369 | if (err) | ||
370 | goto fail; | ||
371 | |||
372 | palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n", | ||
373 | len, slab_nr, alloc->base); | ||
374 | a->nr_slab_allocs++; | ||
375 | |||
376 | return alloc; | ||
377 | |||
378 | fail: | ||
379 | kfree(alloc); | ||
380 | kfree(chunk); | ||
381 | return NULL; | ||
382 | } | ||
383 | |||
384 | static void __gk20a_free_slab(struct gk20a_page_allocator *a, | ||
385 | struct gk20a_page_alloc *alloc) | ||
386 | { | ||
387 | struct page_alloc_slab_page *slab_page = alloc->slab_page; | ||
388 | struct page_alloc_slab *slab = slab_page->owner; | ||
389 | enum slab_page_state new_state; | ||
390 | int offs; | ||
391 | |||
392 | offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size; | ||
393 | bitmap_clear(&slab_page->bitmap, offs, 1); | ||
394 | |||
395 | slab_page->nr_objects_alloced--; | ||
396 | |||
397 | if (slab_page->nr_objects_alloced == 0) | ||
398 | new_state = SP_EMPTY; | ||
399 | else | ||
400 | new_state = SP_PARTIAL; | ||
401 | |||
402 | /* | ||
403 | * Need to migrate the page to a different list. | ||
404 | */ | ||
405 | if (new_state != slab_page->state) { | ||
406 | /* Delete - can't be in empty. */ | ||
407 | if (slab_page->state == SP_PARTIAL) | ||
408 | del_slab_page_from_partial(slab, slab_page); | ||
409 | else | ||
410 | del_slab_page_from_full(slab, slab_page); | ||
411 | |||
412 | /* And add. */ | ||
413 | if (new_state == SP_EMPTY) { | ||
414 | if (list_empty(&slab->empty)) | ||
415 | add_slab_page_to_empty(slab, slab_page); | ||
416 | else | ||
417 | free_slab_page(a, slab_page); | ||
418 | } else { | ||
419 | add_slab_page_to_partial(slab, slab_page); | ||
420 | } | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Now handle the page_alloc. | ||
425 | */ | ||
426 | __gk20a_free_pages(a, alloc, false); | ||
427 | a->nr_slab_frees++; | ||
428 | |||
429 | return; | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * Allocate physical pages. Since the underlying allocator is a buddy allocator | ||
434 | * the returned pages are always contiguous. However, since there could be | ||
435 | * fragmentation in the space this allocator will collate smaller non-contiguous | ||
436 | * allocations together if necessary. | ||
437 | */ | ||
438 | static struct gk20a_page_alloc *__do_gk20a_alloc_pages( | ||
439 | struct gk20a_page_allocator *a, u64 pages) | ||
440 | { | ||
441 | struct gk20a_page_alloc *alloc; | ||
442 | struct page_alloc_chunk *c; | ||
443 | u64 max_chunk_len = pages << a->page_shift; | ||
444 | int i = 0; | ||
445 | |||
446 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
447 | if (!alloc) | ||
448 | goto fail; | ||
449 | |||
450 | memset(alloc, 0, sizeof(*alloc)); | ||
451 | |||
452 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
453 | alloc->length = pages << a->page_shift; | ||
454 | |||
455 | while (pages) { | ||
456 | u64 chunk_addr = 0; | ||
457 | u64 chunk_pages = (u64)1 << __fls(pages); | ||
458 | u64 chunk_len = chunk_pages << a->page_shift; | ||
459 | |||
460 | /* | ||
461 | * Take care of the possibility that the allocation must be | ||
462 | * contiguous. If this is not the first iteration then that | ||
463 | * means the first iteration failed to alloc the entire | ||
464 | * requested size. The buddy allocator guarantees any given | ||
465 | * single alloc is contiguous. | ||
466 | */ | ||
467 | if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0) | ||
468 | goto fail_cleanup; | ||
469 | |||
470 | if (chunk_len > max_chunk_len) | ||
471 | chunk_len = max_chunk_len; | ||
472 | |||
473 | /* | ||
474 | * Keep attempting to allocate in smaller chunks until the alloc | ||
475 | * either succeeds or is smaller than the page_size of the | ||
476 | * allocator (i.e the allocator is OOM). | ||
477 | */ | ||
478 | do { | ||
479 | chunk_addr = gk20a_alloc(&a->source_allocator, | ||
480 | chunk_len); | ||
481 | |||
482 | /* Divide by 2 and try again */ | ||
483 | if (!chunk_addr) { | ||
484 | palloc_dbg(a, "balloc failed: 0x%llx\n", | ||
485 | chunk_len); | ||
486 | chunk_len >>= 1; | ||
487 | max_chunk_len = chunk_len; | ||
488 | } | ||
489 | } while (!chunk_addr && chunk_len >= a->page_size); | ||
490 | |||
491 | chunk_pages = chunk_len >> a->page_shift; | ||
492 | |||
493 | if (!chunk_addr) { | ||
494 | palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len); | ||
495 | goto fail_cleanup; | ||
496 | } | ||
497 | |||
498 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
499 | if (!c) { | ||
500 | gk20a_free(&a->source_allocator, chunk_addr); | ||
501 | goto fail_cleanup; | ||
502 | } | ||
503 | |||
504 | pages -= chunk_pages; | ||
505 | |||
506 | c->base = chunk_addr; | ||
507 | c->length = chunk_len; | ||
508 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
509 | |||
510 | i++; | ||
511 | } | ||
512 | |||
513 | alloc->nr_chunks = i; | ||
514 | c = list_first_entry(&alloc->alloc_chunks, | ||
515 | struct page_alloc_chunk, list_entry); | ||
516 | alloc->base = c->base; | ||
517 | |||
518 | return alloc; | ||
519 | |||
520 | fail_cleanup: | ||
521 | while (!list_empty(&alloc->alloc_chunks)) { | ||
522 | c = list_first_entry(&alloc->alloc_chunks, | ||
523 | struct page_alloc_chunk, list_entry); | ||
524 | list_del(&c->list_entry); | ||
525 | gk20a_free(&a->source_allocator, c->base); | ||
526 | kfree(c); | ||
527 | } | ||
528 | kfree(alloc); | ||
529 | fail: | ||
530 | return ERR_PTR(-ENOMEM); | ||
531 | } | ||
532 | |||
533 | static struct gk20a_page_alloc *__gk20a_alloc_pages( | ||
534 | struct gk20a_page_allocator *a, u64 len) | ||
535 | { | ||
536 | struct gk20a_page_alloc *alloc = NULL; | ||
537 | struct page_alloc_chunk *c; | ||
538 | u64 pages; | ||
539 | int i = 0; | ||
540 | |||
541 | pages = ALIGN(len, a->page_size) >> a->page_shift; | ||
542 | |||
543 | alloc = __do_gk20a_alloc_pages(a, pages); | ||
544 | if (IS_ERR(alloc)) { | ||
545 | palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", | ||
546 | pages << a->page_shift, pages); | ||
547 | return NULL; | ||
548 | } | ||
549 | |||
550 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | ||
551 | pages << a->page_shift, pages, alloc->base); | ||
552 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
553 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
554 | i++, c->base, c->length); | ||
555 | } | ||
556 | |||
557 | return alloc; | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Allocate enough pages to satisfy @len. Page size is determined at | ||
562 | * initialization of the allocator. | ||
563 | * | ||
564 | * The return is actually a pointer to a struct gk20a_page_alloc pointer. This | ||
565 | * is because it doesn't make a lot of sense to return the address of the first | ||
566 | * page in the list of pages (since they could be discontiguous). This has | ||
567 | * precedent in the dma_alloc APIs, though, it's really just an annoying | ||
568 | * artifact of the fact that the gk20a_alloc() API requires a u64 return type. | ||
569 | */ | ||
570 | static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len) | ||
571 | { | ||
572 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
573 | struct gk20a_page_alloc *alloc = NULL; | ||
574 | u64 real_len; | ||
575 | |||
576 | /* | ||
577 | * If we want contig pages we have to round up to a power of two. It's | ||
578 | * easier to do that here than in the buddy allocator. | ||
579 | */ | ||
580 | real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? | ||
581 | roundup_pow_of_two(len) : len; | ||
582 | |||
583 | alloc_lock(__a); | ||
584 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES && | ||
585 | real_len <= (a->page_size / 2)) | ||
586 | alloc = __gk20a_alloc_slab(a, real_len); | ||
587 | else | ||
588 | alloc = __gk20a_alloc_pages(a, real_len); | ||
589 | |||
590 | if (!alloc) { | ||
591 | alloc_unlock(__a); | ||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | __insert_page_alloc(a, alloc); | ||
596 | |||
597 | a->nr_allocs++; | ||
598 | if (real_len > a->page_size / 2) | ||
599 | a->pages_alloced += alloc->length >> a->page_shift; | ||
600 | alloc_unlock(__a); | ||
601 | |||
602 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
603 | return alloc->base; | ||
604 | else | ||
605 | return (u64) (uintptr_t) alloc; | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * Note: this will remove the gk20a_page_alloc struct from the RB tree | ||
610 | * if it's found. | ||
611 | */ | ||
612 | static void gk20a_page_free(struct gk20a_allocator *__a, u64 base) | ||
613 | { | ||
614 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
615 | struct gk20a_page_alloc *alloc; | ||
616 | |||
617 | alloc_lock(__a); | ||
618 | |||
619 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
620 | alloc = __find_page_alloc(a, base); | ||
621 | else | ||
622 | alloc = __find_page_alloc(a, | ||
623 | ((struct gk20a_page_alloc *)(uintptr_t)base)->base); | ||
624 | |||
625 | if (!alloc) { | ||
626 | palloc_dbg(a, "Hrm, found no alloc?\n"); | ||
627 | goto done; | ||
628 | } | ||
629 | |||
630 | a->nr_frees++; | ||
631 | |||
632 | palloc_dbg(a, "Free 0x%llx id=0x%010llx\n", | ||
633 | alloc->length, alloc->base); | ||
634 | |||
635 | /* | ||
636 | * Frees *alloc. | ||
637 | */ | ||
638 | if (alloc->slab_page) { | ||
639 | __gk20a_free_slab(a, alloc); | ||
640 | } else { | ||
641 | a->pages_freed += (alloc->length >> a->page_shift); | ||
642 | __gk20a_free_pages(a, alloc, true); | ||
643 | } | ||
644 | |||
645 | done: | ||
646 | alloc_unlock(__a); | ||
647 | } | ||
648 | |||
649 | static struct gk20a_page_alloc *__gk20a_alloc_pages_fixed( | ||
650 | struct gk20a_page_allocator *a, u64 base, u64 length) | ||
651 | { | ||
652 | struct gk20a_page_alloc *alloc; | ||
653 | struct page_alloc_chunk *c; | ||
654 | |||
655 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
656 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
657 | if (!alloc || !c) | ||
658 | goto fail; | ||
659 | |||
660 | alloc->base = gk20a_alloc_fixed(&a->source_allocator, base, length); | ||
661 | if (!alloc->base) { | ||
662 | WARN(1, "gk20a: failed to fixed alloc pages @ 0x%010llx", base); | ||
663 | goto fail; | ||
664 | } | ||
665 | |||
666 | alloc->nr_chunks = 1; | ||
667 | alloc->length = length; | ||
668 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
669 | |||
670 | c->base = alloc->base; | ||
671 | c->length = length; | ||
672 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
673 | |||
674 | return alloc; | ||
675 | |||
676 | fail: | ||
677 | kfree(c); | ||
678 | kfree(alloc); | ||
679 | return ERR_PTR(-ENOMEM); | ||
680 | } | ||
681 | |||
682 | static u64 gk20a_page_alloc_fixed(struct gk20a_allocator *__a, | ||
683 | u64 base, u64 len) | ||
684 | { | ||
685 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
686 | struct gk20a_page_alloc *alloc = NULL; | ||
687 | struct page_alloc_chunk *c; | ||
688 | u64 aligned_len, pages; | ||
689 | int i = 0; | ||
690 | |||
691 | aligned_len = ALIGN(len, a->page_size); | ||
692 | pages = aligned_len >> a->page_shift; | ||
693 | |||
694 | alloc_lock(__a); | ||
695 | |||
696 | alloc = __gk20a_alloc_pages_fixed(a, base, aligned_len); | ||
697 | if (IS_ERR(alloc)) { | ||
698 | alloc_unlock(__a); | ||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | __insert_page_alloc(a, alloc); | ||
703 | alloc_unlock(__a); | ||
704 | |||
705 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | ||
706 | alloc->base, aligned_len, pages); | ||
707 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
708 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
709 | i++, c->base, c->length); | ||
710 | } | ||
711 | |||
712 | a->nr_fixed_allocs++; | ||
713 | a->pages_alloced += pages; | ||
714 | |||
715 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
716 | return alloc->base; | ||
717 | else | ||
718 | return (u64) (uintptr_t) alloc; | ||
719 | } | ||
720 | |||
721 | static void gk20a_page_free_fixed(struct gk20a_allocator *__a, | ||
722 | u64 base, u64 len) | ||
723 | { | ||
724 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
725 | struct gk20a_page_alloc *alloc; | ||
726 | |||
727 | alloc_lock(__a); | ||
728 | |||
729 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) { | ||
730 | alloc = __find_page_alloc(a, base); | ||
731 | if (!alloc) | ||
732 | goto done; | ||
733 | } else { | ||
734 | alloc = (struct gk20a_page_alloc *) (uintptr_t) base; | ||
735 | } | ||
736 | |||
737 | palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", | ||
738 | alloc->base, alloc->length); | ||
739 | |||
740 | a->nr_fixed_frees++; | ||
741 | a->pages_freed += (alloc->length >> a->page_shift); | ||
742 | |||
743 | /* | ||
744 | * This works for the time being since the buddy allocator | ||
745 | * uses the same free function for both fixed and regular | ||
746 | * allocs. This would have to be updated if the underlying | ||
747 | * allocator were to change. | ||
748 | */ | ||
749 | __gk20a_free_pages(a, alloc, true); | ||
750 | |||
751 | done: | ||
752 | alloc_unlock(__a); | ||
753 | } | ||
754 | |||
755 | static void gk20a_page_allocator_destroy(struct gk20a_allocator *__a) | ||
756 | { | ||
757 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
758 | |||
759 | alloc_lock(__a); | ||
760 | kfree(a); | ||
761 | __a->priv = NULL; | ||
762 | alloc_unlock(__a); | ||
763 | } | ||
764 | |||
765 | static void gk20a_page_print_stats(struct gk20a_allocator *__a, | ||
766 | struct seq_file *s, int lock) | ||
767 | { | ||
768 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
769 | int i; | ||
770 | |||
771 | if (lock) | ||
772 | alloc_lock(__a); | ||
773 | |||
774 | __alloc_pstat(s, __a, "Page allocator:\n"); | ||
775 | __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs); | ||
776 | __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); | ||
777 | __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); | ||
778 | __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); | ||
779 | __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs); | ||
780 | __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees); | ||
781 | __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); | ||
782 | __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); | ||
783 | __alloc_pstat(s, __a, "\n"); | ||
784 | |||
785 | /* | ||
786 | * Slab info. | ||
787 | */ | ||
788 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { | ||
789 | __alloc_pstat(s, __a, "Slabs:\n"); | ||
790 | __alloc_pstat(s, __a, " size empty partial full\n"); | ||
791 | __alloc_pstat(s, __a, " ---- ----- ------- ----\n"); | ||
792 | |||
793 | for (i = 0; i < a->nr_slabs; i++) { | ||
794 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
795 | |||
796 | __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n", | ||
797 | slab->slab_size, | ||
798 | slab->nr_empty, slab->nr_partial, | ||
799 | slab->nr_full); | ||
800 | } | ||
801 | __alloc_pstat(s, __a, "\n"); | ||
802 | } | ||
803 | |||
804 | __alloc_pstat(s, __a, "Source alloc: %s\n", | ||
805 | a->source_allocator.name); | ||
806 | gk20a_alloc_print_stats(&a->source_allocator, s, lock); | ||
807 | |||
808 | if (lock) | ||
809 | alloc_unlock(__a); | ||
810 | } | ||
811 | |||
812 | static const struct gk20a_allocator_ops page_ops = { | ||
813 | .alloc = gk20a_page_alloc, | ||
814 | .free = gk20a_page_free, | ||
815 | |||
816 | .alloc_fixed = gk20a_page_alloc_fixed, | ||
817 | .free_fixed = gk20a_page_free_fixed, | ||
818 | |||
819 | .reserve_carveout = gk20a_page_reserve_co, | ||
820 | .release_carveout = gk20a_page_release_co, | ||
821 | |||
822 | .base = gk20a_page_alloc_base, | ||
823 | .length = gk20a_page_alloc_length, | ||
824 | .end = gk20a_page_alloc_end, | ||
825 | .inited = gk20a_page_alloc_inited, | ||
826 | .space = gk20a_page_alloc_space, | ||
827 | |||
828 | .fini = gk20a_page_allocator_destroy, | ||
829 | |||
830 | .print_stats = gk20a_page_print_stats, | ||
831 | }; | ||
832 | |||
833 | /* | ||
834 | * nr_slabs is computed as follows: divide page_size by 4096 to get number of | ||
835 | * 4k pages in page_size. Then take the base 2 log of that to get number of | ||
836 | * slabs. For 64k page_size that works on like: | ||
837 | * | ||
838 | * 1024*64 / 1024*4 = 16 | ||
839 | * ilog2(16) = 4 | ||
840 | * | ||
841 | * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). | ||
842 | */ | ||
843 | static int gk20a_page_alloc_init_slabs(struct gk20a_page_allocator *a) | ||
844 | { | ||
845 | size_t nr_slabs = ilog2(a->page_size >> 12); | ||
846 | unsigned int i; | ||
847 | |||
848 | a->slabs = kcalloc(nr_slabs, | ||
849 | sizeof(struct page_alloc_slab), | ||
850 | GFP_KERNEL); | ||
851 | if (!a->slabs) | ||
852 | return -ENOMEM; | ||
853 | a->nr_slabs = nr_slabs; | ||
854 | |||
855 | for (i = 0; i < nr_slabs; i++) { | ||
856 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
857 | |||
858 | slab->slab_size = SZ_4K * (1 << i); | ||
859 | INIT_LIST_HEAD(&slab->empty); | ||
860 | INIT_LIST_HEAD(&slab->partial); | ||
861 | INIT_LIST_HEAD(&slab->full); | ||
862 | slab->nr_empty = 0; | ||
863 | slab->nr_partial = 0; | ||
864 | slab->nr_full = 0; | ||
865 | } | ||
866 | |||
867 | return 0; | ||
868 | } | ||
869 | |||
870 | int gk20a_page_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, | ||
871 | const char *name, u64 base, u64 length, | ||
872 | u64 blk_size, u64 flags) | ||
873 | { | ||
874 | struct gk20a_page_allocator *a; | ||
875 | char buddy_name[sizeof(__a->name)]; | ||
876 | int err; | ||
877 | |||
878 | mutex_lock(&meta_data_cache_lock); | ||
879 | if (!page_alloc_cache) | ||
880 | page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0); | ||
881 | if (!page_alloc_chunk_cache) | ||
882 | page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); | ||
883 | if (!page_alloc_slab_page_cache) | ||
884 | page_alloc_slab_page_cache = | ||
885 | KMEM_CACHE(page_alloc_slab_page, 0); | ||
886 | mutex_unlock(&meta_data_cache_lock); | ||
887 | |||
888 | if (!page_alloc_cache || !page_alloc_chunk_cache) | ||
889 | return -ENOMEM; | ||
890 | |||
891 | if (blk_size < SZ_4K) | ||
892 | return -EINVAL; | ||
893 | |||
894 | a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL); | ||
895 | if (!a) | ||
896 | return -ENOMEM; | ||
897 | |||
898 | err = __gk20a_alloc_common_init(__a, name, a, false, &page_ops); | ||
899 | if (err) | ||
900 | goto fail; | ||
901 | |||
902 | a->base = base; | ||
903 | a->length = length; | ||
904 | a->page_size = blk_size; | ||
905 | a->page_shift = __ffs(blk_size); | ||
906 | a->allocs = RB_ROOT; | ||
907 | a->owner = __a; | ||
908 | a->flags = flags; | ||
909 | |||
910 | if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) { | ||
911 | err = gk20a_page_alloc_init_slabs(a); | ||
912 | if (err) | ||
913 | goto fail; | ||
914 | } | ||
915 | |||
916 | snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); | ||
917 | |||
918 | err = gk20a_buddy_allocator_init(g, &a->source_allocator, buddy_name, | ||
919 | base, length, blk_size, 0); | ||
920 | if (err) | ||
921 | goto fail; | ||
922 | |||
923 | gk20a_init_alloc_debug(g, __a); | ||
924 | palloc_dbg(a, "New allocator: type page\n"); | ||
925 | palloc_dbg(a, " base 0x%llx\n", a->base); | ||
926 | palloc_dbg(a, " size 0x%llx\n", a->length); | ||
927 | palloc_dbg(a, " page_size 0x%llx\n", a->page_size); | ||
928 | palloc_dbg(a, " flags 0x%llx\n", a->flags); | ||
929 | palloc_dbg(a, " slabs: %d\n", a->nr_slabs); | ||
930 | |||
931 | return 0; | ||
932 | |||
933 | fail: | ||
934 | kfree(a); | ||
935 | return err; | ||
936 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 39562ec1..2ee2dd43 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3400,7 +3400,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
3400 | gr->ctx_vars.local_golden_image = NULL; | 3400 | gr->ctx_vars.local_golden_image = NULL; |
3401 | 3401 | ||
3402 | if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) | 3402 | if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) |
3403 | nvgpu_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); | 3403 | nvgpu_kfree(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); |
3404 | gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; | 3404 | gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; |
3405 | 3405 | ||
3406 | gk20a_comptag_allocator_destroy(&gr->comp_tags); | 3406 | gk20a_comptag_allocator_destroy(&gr->comp_tags); |
@@ -7998,7 +7998,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7998 | hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; | 7998 | hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; |
7999 | map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); | 7999 | map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); |
8000 | 8000 | ||
8001 | map = nvgpu_alloc(map_size, true); | 8001 | map = nvgpu_kalloc(map_size, true); |
8002 | if (!map) | 8002 | if (!map) |
8003 | return -ENOMEM; | 8003 | return -ENOMEM; |
8004 | 8004 | ||
@@ -8088,7 +8088,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
8088 | return 0; | 8088 | return 0; |
8089 | cleanup: | 8089 | cleanup: |
8090 | gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); | 8090 | gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); |
8091 | nvgpu_free(map); | 8091 | nvgpu_kfree(map); |
8092 | return -EINVAL; | 8092 | return -EINVAL; |
8093 | } | 8093 | } |
8094 | 8094 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h deleted file mode 100644 index f9b03e0e..00000000 --- a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h +++ /dev/null | |||
@@ -1,121 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * Basics: | ||
19 | * | ||
20 | * - Lockless memory allocator for fixed-size structures, whose | ||
21 | * size is defined up front at init time. | ||
22 | * - Memory footprint scales linearly w/ the number of structures in | ||
23 | * the pool. It is ~= sizeof(int) * N. | ||
24 | * - Memory is pre-allocated by the client. The allocator itself | ||
25 | * only computes the addresses for allocations. | ||
26 | * - Limit of MAX_INT nodes that the allocator can be responsible for. | ||
27 | * | ||
28 | * Implementation details: | ||
29 | * | ||
30 | * The allocator maintains a single list of free nodes. We allocate & | ||
31 | * free nodes from the head of the list. We rely on the cmpxchg() operator | ||
32 | * to maintain atomicity on the head. | ||
33 | * | ||
34 | * So, both allocs & frees are O(1)!! | ||
35 | * | ||
36 | * -- Definitions -- | ||
37 | * Block Size - size of a single structure that this allocator will | ||
38 | * allocate. | ||
39 | * Node - one of the elements of size blk_size in the | ||
40 | * client-allocated buffer. | ||
41 | * Node Index - zero-based index of a node in the client-allocated | ||
42 | * contiguous buffer. | ||
43 | * | ||
44 | * -- Initial State -- | ||
45 | * We maintain the following to track the state of the free list: | ||
46 | * | ||
47 | * 1) A "head" index to track the index of the first free node in the list | ||
48 | * 2) A "next" array to track the index of the next free node in the list | ||
49 | * for every node. So next[head], will give the index to the 2nd free | ||
50 | * element in the list. | ||
51 | * | ||
52 | * So, to begin with, the free list consists of all node indices, and each | ||
53 | * position in the next array contains index N + 1: | ||
54 | * | ||
55 | * head = 0 | ||
56 | * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes | ||
57 | * free_list = 0->1->2->3->4->-1 | ||
58 | * | ||
59 | * -- Allocations -- | ||
60 | * 1) Read the current head (aka acq_head) | ||
61 | * 2) Read next[acq_head], to get the 2nd free element (aka new_head) | ||
62 | * 3) cmp_xchg(&head, acq_head, new_head) | ||
63 | * 4) If it succeeds, compute the address of the node, based on | ||
64 | * base address, blk_size, & acq_head. | ||
65 | * | ||
66 | * head = 1; | ||
67 | * next = [1, 2, 3, 4, -1] : Example after allocating Node #0 | ||
68 | * free_list = 1->2->3->4->-1 | ||
69 | * | ||
70 | * head = 2; | ||
71 | * next = [1, 2, 3, 4, -1] : Example after allocating Node #1 | ||
72 | * free_list = 2->3->4->-1 | ||
73 | * | ||
74 | * -- Frees -- | ||
75 | * 1) Based on the address to be freed, calculate the index of the node | ||
76 | * being freed (cur_idx) | ||
77 | * 2) Read the current head (old_head) | ||
78 | * 3) So the freed node is going to go at the head of the list, and we | ||
79 | * want to put the old_head after it. So next[cur_idx] = old_head | ||
80 | * 4) cmpxchg(head, old_head, cur_idx) | ||
81 | * | ||
82 | * head = 0 | ||
83 | * next = [2, 2, 3, 4, -1] | ||
84 | * free_list = 0->2->3->4->-1 : Example after freeing Node #0 | ||
85 | * | ||
86 | * head = 1 | ||
87 | * next = [2, 0, 3, 4, -1] | ||
88 | * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1 | ||
89 | */ | ||
90 | |||
91 | #ifndef LOCKLESS_ALLOCATOR_PRIV_H | ||
92 | #define LOCKLESS_ALLOCATOR_PRIV_H | ||
93 | |||
94 | struct gk20a_allocator; | ||
95 | |||
96 | struct gk20a_lockless_allocator { | ||
97 | struct gk20a_allocator *owner; | ||
98 | |||
99 | u64 base; /* Base address of the space. */ | ||
100 | u64 length; /* Length of the space. */ | ||
101 | u64 blk_size; /* Size of the structure being allocated */ | ||
102 | int nr_nodes; /* Number of nodes available for allocation */ | ||
103 | |||
104 | int *next; /* An array holding the next indices per node */ | ||
105 | int head; /* Current node at the top of the stack */ | ||
106 | |||
107 | u64 flags; | ||
108 | |||
109 | bool inited; | ||
110 | |||
111 | /* Statistics */ | ||
112 | atomic_t nr_allocs; | ||
113 | }; | ||
114 | |||
115 | static inline struct gk20a_lockless_allocator *lockless_allocator( | ||
116 | struct gk20a_allocator *a) | ||
117 | { | ||
118 | return (struct gk20a_lockless_allocator *)(a)->priv; | ||
119 | } | ||
120 | |||
121 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 2e338fef..d594a5a4 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -31,9 +31,9 @@ | |||
31 | #include <uapi/linux/nvgpu.h> | 31 | #include <uapi/linux/nvgpu.h> |
32 | #include <trace/events/gk20a.h> | 32 | #include <trace/events/gk20a.h> |
33 | 33 | ||
34 | #include <gk20a/page_allocator_priv.h> | ||
35 | |||
36 | #include <nvgpu/timers.h> | 34 | #include <nvgpu/timers.h> |
35 | #include <nvgpu/allocator.h> | ||
36 | #include <nvgpu/page_allocator.h> | ||
37 | 37 | ||
38 | #include "gk20a.h" | 38 | #include "gk20a.h" |
39 | #include "mm_gk20a.h" | 39 | #include "mm_gk20a.h" |
@@ -74,7 +74,7 @@ is_vidmem_page_alloc(u64 addr) | |||
74 | return !!(addr & 1ULL); | 74 | return !!(addr & 1ULL); |
75 | } | 75 | } |
76 | 76 | ||
77 | static inline struct gk20a_page_alloc * | 77 | static inline struct nvgpu_page_alloc * |
78 | get_vidmem_page_alloc(struct scatterlist *sgl) | 78 | get_vidmem_page_alloc(struct scatterlist *sgl) |
79 | { | 79 | { |
80 | u64 addr; | 80 | u64 addr; |
@@ -86,7 +86,7 @@ get_vidmem_page_alloc(struct scatterlist *sgl) | |||
86 | else | 86 | else |
87 | WARN_ON(1); | 87 | WARN_ON(1); |
88 | 88 | ||
89 | return (struct gk20a_page_alloc *)(uintptr_t)addr; | 89 | return (struct nvgpu_page_alloc *)(uintptr_t)addr; |
90 | } | 90 | } |
91 | 91 | ||
92 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | 92 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) |
@@ -176,7 +176,7 @@ typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, | |||
176 | static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, | 176 | static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, |
177 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | 177 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) |
178 | { | 178 | { |
179 | struct gk20a_page_alloc *alloc = NULL; | 179 | struct nvgpu_page_alloc *alloc = NULL; |
180 | struct page_alloc_chunk *chunk = NULL; | 180 | struct page_alloc_chunk *chunk = NULL; |
181 | u32 byteoff, start_reg, until_end, n; | 181 | u32 byteoff, start_reg, until_end, n; |
182 | 182 | ||
@@ -797,8 +797,8 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block) | |||
797 | static void gk20a_vidmem_destroy(struct gk20a *g) | 797 | static void gk20a_vidmem_destroy(struct gk20a *g) |
798 | { | 798 | { |
799 | #if defined(CONFIG_GK20A_VIDMEM) | 799 | #if defined(CONFIG_GK20A_VIDMEM) |
800 | if (gk20a_alloc_initialized(&g->mm.vidmem.allocator)) | 800 | if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) |
801 | gk20a_alloc_destroy(&g->mm.vidmem.allocator); | 801 | nvgpu_alloc_destroy(&g->mm.vidmem.allocator); |
802 | #endif | 802 | #endif |
803 | } | 803 | } |
804 | 804 | ||
@@ -928,8 +928,8 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
928 | u64 default_page_size = SZ_64K; | 928 | u64 default_page_size = SZ_64K; |
929 | int err; | 929 | int err; |
930 | 930 | ||
931 | static struct gk20a_alloc_carveout wpr_co = | 931 | static struct nvgpu_alloc_carveout wpr_co = |
932 | GK20A_CARVEOUT("wpr-region", 0, SZ_16M); | 932 | NVGPU_CARVEOUT("wpr-region", 0, SZ_16M); |
933 | 933 | ||
934 | if (!size) | 934 | if (!size) |
935 | return 0; | 935 | return 0; |
@@ -944,12 +944,12 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
944 | * initialization requires vidmem but we want to use the CE to zero | 944 | * initialization requires vidmem but we want to use the CE to zero |
945 | * out vidmem before allocating it... | 945 | * out vidmem before allocating it... |
946 | */ | 946 | */ |
947 | err = gk20a_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator, | 947 | err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator, |
948 | "vidmem-bootstrap", | 948 | "vidmem-bootstrap", |
949 | bootstrap_base, bootstrap_size, | 949 | bootstrap_base, bootstrap_size, |
950 | SZ_4K, 0); | 950 | SZ_4K, 0); |
951 | 951 | ||
952 | err = gk20a_page_allocator_init(g, &g->mm.vidmem.allocator, | 952 | err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator, |
953 | "vidmem", | 953 | "vidmem", |
954 | base, size - base, | 954 | base, size - base, |
955 | default_page_size, | 955 | default_page_size, |
@@ -961,7 +961,7 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
961 | } | 961 | } |
962 | 962 | ||
963 | /* Reserve bootstrap region in vidmem allocator */ | 963 | /* Reserve bootstrap region in vidmem allocator */ |
964 | gk20a_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co); | 964 | nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co); |
965 | 965 | ||
966 | mm->vidmem.base = base; | 966 | mm->vidmem.base = base; |
967 | mm->vidmem.size = size - base; | 967 | mm->vidmem.size = size - base; |
@@ -1482,7 +1482,7 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm, | |||
1482 | 1482 | ||
1483 | mutex_lock(&vm->update_gmmu_lock); | 1483 | mutex_lock(&vm->update_gmmu_lock); |
1484 | 1484 | ||
1485 | buffer_list = nvgpu_alloc(sizeof(*buffer_list) * | 1485 | buffer_list = nvgpu_kalloc(sizeof(*buffer_list) * |
1486 | vm->num_user_mapped_buffers, true); | 1486 | vm->num_user_mapped_buffers, true); |
1487 | if (!buffer_list) { | 1487 | if (!buffer_list) { |
1488 | mutex_unlock(&vm->update_gmmu_lock); | 1488 | mutex_unlock(&vm->update_gmmu_lock); |
@@ -1567,7 +1567,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm, | |||
1567 | gk20a_vm_mapping_batch_finish_locked(vm, &batch); | 1567 | gk20a_vm_mapping_batch_finish_locked(vm, &batch); |
1568 | mutex_unlock(&vm->update_gmmu_lock); | 1568 | mutex_unlock(&vm->update_gmmu_lock); |
1569 | 1569 | ||
1570 | nvgpu_free(mapped_buffers); | 1570 | nvgpu_kfree(mapped_buffers); |
1571 | } | 1571 | } |
1572 | 1572 | ||
1573 | static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, | 1573 | static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, |
@@ -1623,7 +1623,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
1623 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 1623 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
1624 | 1624 | ||
1625 | { | 1625 | { |
1626 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | 1626 | struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; |
1627 | u64 offset; | 1627 | u64 offset; |
1628 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 1628 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
1629 | 1629 | ||
@@ -1645,7 +1645,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
1645 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, | 1645 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, |
1646 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); | 1646 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); |
1647 | 1647 | ||
1648 | offset = gk20a_alloc(vma, size); | 1648 | offset = nvgpu_alloc(vma, size); |
1649 | if (!offset) { | 1649 | if (!offset) { |
1650 | gk20a_err(dev_from_vm(vm), | 1650 | gk20a_err(dev_from_vm(vm), |
1651 | "%s oom: sz=0x%llx", vma->name, size); | 1651 | "%s oom: sz=0x%llx", vma->name, size); |
@@ -1660,11 +1660,11 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
1660 | u64 offset, u64 size, | 1660 | u64 offset, u64 size, |
1661 | enum gmmu_pgsz_gk20a pgsz_idx) | 1661 | enum gmmu_pgsz_gk20a pgsz_idx) |
1662 | { | 1662 | { |
1663 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; | 1663 | struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; |
1664 | 1664 | ||
1665 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | 1665 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", |
1666 | vma->name, offset, size); | 1666 | vma->name, offset, size); |
1667 | gk20a_free(vma, offset); | 1667 | nvgpu_free(vma, offset); |
1668 | 1668 | ||
1669 | return 0; | 1669 | return 0; |
1670 | } | 1670 | } |
@@ -2302,15 +2302,15 @@ err_kfree: | |||
2302 | int gk20a_vidmem_get_space(struct gk20a *g, u64 *space) | 2302 | int gk20a_vidmem_get_space(struct gk20a *g, u64 *space) |
2303 | { | 2303 | { |
2304 | #if defined(CONFIG_GK20A_VIDMEM) | 2304 | #if defined(CONFIG_GK20A_VIDMEM) |
2305 | struct gk20a_allocator *allocator = &g->mm.vidmem.allocator; | 2305 | struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator; |
2306 | 2306 | ||
2307 | gk20a_dbg_fn(""); | 2307 | gk20a_dbg_fn(""); |
2308 | 2308 | ||
2309 | if (!gk20a_alloc_initialized(allocator)) | 2309 | if (!nvgpu_alloc_initialized(allocator)) |
2310 | return -ENOSYS; | 2310 | return -ENOSYS; |
2311 | 2311 | ||
2312 | mutex_lock(&g->mm.vidmem.clear_list_mutex); | 2312 | mutex_lock(&g->mm.vidmem.clear_list_mutex); |
2313 | *space = gk20a_alloc_space(allocator) + | 2313 | *space = nvgpu_alloc_space(allocator) + |
2314 | atomic64_read(&g->mm.vidmem.bytes_pending); | 2314 | atomic64_read(&g->mm.vidmem.bytes_pending); |
2315 | mutex_unlock(&g->mm.vidmem.clear_list_mutex); | 2315 | mutex_unlock(&g->mm.vidmem.clear_list_mutex); |
2316 | return 0; | 2316 | return 0; |
@@ -2359,7 +2359,7 @@ static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl, | |||
2359 | u64 buf_addr; | 2359 | u64 buf_addr; |
2360 | 2360 | ||
2361 | if (aperture == APERTURE_VIDMEM) { | 2361 | if (aperture == APERTURE_VIDMEM) { |
2362 | struct gk20a_page_alloc *alloc = get_vidmem_page_alloc(sgl); | 2362 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); |
2363 | struct page_alloc_chunk *chunk = NULL; | 2363 | struct page_alloc_chunk *chunk = NULL; |
2364 | 2364 | ||
2365 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | 2365 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { |
@@ -3068,7 +3068,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem) | |||
3068 | { | 3068 | { |
3069 | struct gk20a_fence *gk20a_fence_out = NULL; | 3069 | struct gk20a_fence *gk20a_fence_out = NULL; |
3070 | struct gk20a_fence *gk20a_last_fence = NULL; | 3070 | struct gk20a_fence *gk20a_last_fence = NULL; |
3071 | struct gk20a_page_alloc *alloc = NULL; | 3071 | struct nvgpu_page_alloc *alloc = NULL; |
3072 | struct page_alloc_chunk *chunk = NULL; | 3072 | struct page_alloc_chunk *chunk = NULL; |
3073 | int err = 0; | 3073 | int err = 0; |
3074 | 3074 | ||
@@ -3134,15 +3134,15 @@ int gk20a_gmmu_alloc_attr_vid(struct gk20a *g, enum dma_attr attr, | |||
3134 | } | 3134 | } |
3135 | 3135 | ||
3136 | #if defined(CONFIG_GK20A_VIDMEM) | 3136 | #if defined(CONFIG_GK20A_VIDMEM) |
3137 | static u64 __gk20a_gmmu_alloc(struct gk20a_allocator *allocator, dma_addr_t at, | 3137 | static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at, |
3138 | size_t size) | 3138 | size_t size) |
3139 | { | 3139 | { |
3140 | u64 addr = 0; | 3140 | u64 addr = 0; |
3141 | 3141 | ||
3142 | if (at) | 3142 | if (at) |
3143 | addr = gk20a_alloc_fixed(allocator, at, size); | 3143 | addr = nvgpu_alloc_fixed(allocator, at, size); |
3144 | else | 3144 | else |
3145 | addr = gk20a_alloc(allocator, size); | 3145 | addr = nvgpu_alloc(allocator, size); |
3146 | 3146 | ||
3147 | return addr; | 3147 | return addr; |
3148 | } | 3148 | } |
@@ -3154,14 +3154,14 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
3154 | #if defined(CONFIG_GK20A_VIDMEM) | 3154 | #if defined(CONFIG_GK20A_VIDMEM) |
3155 | u64 addr; | 3155 | u64 addr; |
3156 | int err; | 3156 | int err; |
3157 | struct gk20a_allocator *vidmem_alloc = g->mm.vidmem.cleared ? | 3157 | struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ? |
3158 | &g->mm.vidmem.allocator : | 3158 | &g->mm.vidmem.allocator : |
3159 | &g->mm.vidmem.bootstrap_allocator; | 3159 | &g->mm.vidmem.bootstrap_allocator; |
3160 | int before_pending; | 3160 | int before_pending; |
3161 | 3161 | ||
3162 | gk20a_dbg_fn(""); | 3162 | gk20a_dbg_fn(""); |
3163 | 3163 | ||
3164 | if (!gk20a_alloc_initialized(&g->mm.vidmem.allocator)) | 3164 | if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) |
3165 | return -ENOSYS; | 3165 | return -ENOSYS; |
3166 | 3166 | ||
3167 | /* we don't support dma attributes here, except that kernel mappings | 3167 | /* we don't support dma attributes here, except that kernel mappings |
@@ -3214,7 +3214,7 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
3214 | fail_kfree: | 3214 | fail_kfree: |
3215 | kfree(mem->sgt); | 3215 | kfree(mem->sgt); |
3216 | fail_physfree: | 3216 | fail_physfree: |
3217 | gk20a_free(&g->mm.vidmem.allocator, addr); | 3217 | nvgpu_free(&g->mm.vidmem.allocator, addr); |
3218 | return err; | 3218 | return err; |
3219 | #else | 3219 | #else |
3220 | return -ENOSYS; | 3220 | return -ENOSYS; |
@@ -3241,7 +3241,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, | |||
3241 | } | 3241 | } |
3242 | } else { | 3242 | } else { |
3243 | gk20a_memset(g, mem, 0, 0, mem->size); | 3243 | gk20a_memset(g, mem, 0, 0, mem->size); |
3244 | gk20a_free(mem->allocator, | 3244 | nvgpu_free(mem->allocator, |
3245 | (u64)get_vidmem_page_alloc(mem->sgt->sgl)); | 3245 | (u64)get_vidmem_page_alloc(mem->sgt->sgl)); |
3246 | gk20a_free_sgtable(&mem->sgt); | 3246 | gk20a_free_sgtable(&mem->sgt); |
3247 | 3247 | ||
@@ -3276,7 +3276,7 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | |||
3276 | u64 gk20a_mem_get_base_addr(struct gk20a *g, struct mem_desc *mem, | 3276 | u64 gk20a_mem_get_base_addr(struct gk20a *g, struct mem_desc *mem, |
3277 | u32 flags) | 3277 | u32 flags) |
3278 | { | 3278 | { |
3279 | struct gk20a_page_alloc *alloc; | 3279 | struct nvgpu_page_alloc *alloc; |
3280 | u64 addr; | 3280 | u64 addr; |
3281 | 3281 | ||
3282 | if (mem->aperture == APERTURE_VIDMEM) { | 3282 | if (mem->aperture == APERTURE_VIDMEM) { |
@@ -3317,7 +3317,7 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work) | |||
3317 | 3317 | ||
3318 | while ((mem = get_pending_mem_desc(mm)) != NULL) { | 3318 | while ((mem = get_pending_mem_desc(mm)) != NULL) { |
3319 | gk20a_gmmu_clear_vidmem_mem(g, mem); | 3319 | gk20a_gmmu_clear_vidmem_mem(g, mem); |
3320 | gk20a_free(mem->allocator, | 3320 | nvgpu_free(mem->allocator, |
3321 | (u64)get_vidmem_page_alloc(mem->sgt->sgl)); | 3321 | (u64)get_vidmem_page_alloc(mem->sgt->sgl)); |
3322 | gk20a_free_sgtable(&mem->sgt); | 3322 | gk20a_free_sgtable(&mem->sgt); |
3323 | 3323 | ||
@@ -3905,7 +3905,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
3905 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | 3905 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; |
3906 | int err; | 3906 | int err; |
3907 | struct scatterlist *sgl = NULL; | 3907 | struct scatterlist *sgl = NULL; |
3908 | struct gk20a_page_alloc *alloc = NULL; | 3908 | struct nvgpu_page_alloc *alloc = NULL; |
3909 | struct page_alloc_chunk *chunk = NULL; | 3909 | struct page_alloc_chunk *chunk = NULL; |
3910 | u64 length; | 3910 | u64 length; |
3911 | 3911 | ||
@@ -4251,12 +4251,12 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) | |||
4251 | * | 4251 | * |
4252 | * !!! TODO: cleanup. | 4252 | * !!! TODO: cleanup. |
4253 | */ | 4253 | */ |
4254 | sema_sea->gpu_va = gk20a_alloc_fixed(&vm->vma[gmmu_page_size_kernel], | 4254 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], |
4255 | vm->va_limit - | 4255 | vm->va_limit - |
4256 | mm->channel.kernel_size, | 4256 | mm->channel.kernel_size, |
4257 | 512 * PAGE_SIZE); | 4257 | 512 * PAGE_SIZE); |
4258 | if (!sema_sea->gpu_va) { | 4258 | if (!sema_sea->gpu_va) { |
4259 | gk20a_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); | 4259 | nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); |
4260 | gk20a_vm_put(vm); | 4260 | gk20a_vm_put(vm); |
4261 | return -ENOMEM; | 4261 | return -ENOMEM; |
4262 | } | 4262 | } |
@@ -4264,7 +4264,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) | |||
4264 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); | 4264 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); |
4265 | if (err) { | 4265 | if (err) { |
4266 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); | 4266 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); |
4267 | gk20a_free(&vm->vma[gmmu_page_size_small], | 4267 | nvgpu_free(&vm->vma[gmmu_page_size_small], |
4268 | vm->sema_pool->gpu_va); | 4268 | vm->sema_pool->gpu_va); |
4269 | gk20a_vm_put(vm); | 4269 | gk20a_vm_put(vm); |
4270 | } | 4270 | } |
@@ -4387,7 +4387,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4387 | snprintf(alloc_name, sizeof(alloc_name), | 4387 | snprintf(alloc_name, sizeof(alloc_name), |
4388 | "gk20a_%s-fixed", name); | 4388 | "gk20a_%s-fixed", name); |
4389 | 4389 | ||
4390 | err = __gk20a_buddy_allocator_init(g, &vm->fixed, | 4390 | err = __nvgpu_buddy_allocator_init(g, &vm->fixed, |
4391 | vm, alloc_name, | 4391 | vm, alloc_name, |
4392 | small_vma_start, | 4392 | small_vma_start, |
4393 | g->separate_fixed_allocs, | 4393 | g->separate_fixed_allocs, |
@@ -4404,7 +4404,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4404 | if (small_vma_start < small_vma_limit) { | 4404 | if (small_vma_start < small_vma_limit) { |
4405 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | 4405 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, |
4406 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | 4406 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); |
4407 | err = __gk20a_buddy_allocator_init( | 4407 | err = __nvgpu_buddy_allocator_init( |
4408 | g, | 4408 | g, |
4409 | &vm->vma[gmmu_page_size_small], | 4409 | &vm->vma[gmmu_page_size_small], |
4410 | vm, alloc_name, | 4410 | vm, alloc_name, |
@@ -4420,7 +4420,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4420 | if (large_vma_start < large_vma_limit) { | 4420 | if (large_vma_start < large_vma_limit) { |
4421 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 4421 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", |
4422 | name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | 4422 | name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); |
4423 | err = __gk20a_buddy_allocator_init( | 4423 | err = __nvgpu_buddy_allocator_init( |
4424 | g, | 4424 | g, |
4425 | &vm->vma[gmmu_page_size_big], | 4425 | &vm->vma[gmmu_page_size_big], |
4426 | vm, alloc_name, | 4426 | vm, alloc_name, |
@@ -4438,7 +4438,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4438 | /* | 4438 | /* |
4439 | * kernel reserved VMA is at the end of the aperture | 4439 | * kernel reserved VMA is at the end of the aperture |
4440 | */ | 4440 | */ |
4441 | err = __gk20a_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], | 4441 | err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], |
4442 | vm, alloc_name, | 4442 | vm, alloc_name, |
4443 | kernel_vma_start, | 4443 | kernel_vma_start, |
4444 | kernel_vma_limit - kernel_vma_start, | 4444 | kernel_vma_limit - kernel_vma_start, |
@@ -4469,10 +4469,10 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4469 | 4469 | ||
4470 | clean_up_big_allocator: | 4470 | clean_up_big_allocator: |
4471 | if (large_vma_start < large_vma_limit) | 4471 | if (large_vma_start < large_vma_limit) |
4472 | gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 4472 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); |
4473 | clean_up_small_allocator: | 4473 | clean_up_small_allocator: |
4474 | if (small_vma_start < small_vma_limit) | 4474 | if (small_vma_start < small_vma_limit) |
4475 | gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); | 4475 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); |
4476 | clean_up_ptes: | 4476 | clean_up_ptes: |
4477 | free_gmmu_pages(vm, &vm->pdb); | 4477 | free_gmmu_pages(vm, &vm->pdb); |
4478 | clean_up_pdes: | 4478 | clean_up_pdes: |
@@ -4547,7 +4547,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
4547 | { | 4547 | { |
4548 | int err = -ENOMEM; | 4548 | int err = -ENOMEM; |
4549 | int pgsz_idx = gmmu_page_size_small; | 4549 | int pgsz_idx = gmmu_page_size_small; |
4550 | struct gk20a_allocator *vma; | 4550 | struct nvgpu_allocator *vma; |
4551 | struct vm_gk20a *vm = as_share->vm; | 4551 | struct vm_gk20a *vm = as_share->vm; |
4552 | struct gk20a *g = vm->mm->g; | 4552 | struct gk20a *g = vm->mm->g; |
4553 | struct vm_reserved_va_node *va_node; | 4553 | struct vm_reserved_va_node *va_node; |
@@ -4579,13 +4579,13 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
4579 | 4579 | ||
4580 | vma = &vm->vma[pgsz_idx]; | 4580 | vma = &vm->vma[pgsz_idx]; |
4581 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { | 4581 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { |
4582 | if (gk20a_alloc_initialized(&vm->fixed)) | 4582 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4583 | vma = &vm->fixed; | 4583 | vma = &vm->fixed; |
4584 | vaddr_start = gk20a_alloc_fixed(vma, args->o_a.offset, | 4584 | vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, |
4585 | (u64)args->pages * | 4585 | (u64)args->pages * |
4586 | (u64)args->page_size); | 4586 | (u64)args->page_size); |
4587 | } else { | 4587 | } else { |
4588 | vaddr_start = gk20a_alloc(vma, | 4588 | vaddr_start = nvgpu_alloc(vma, |
4589 | (u64)args->pages * | 4589 | (u64)args->pages * |
4590 | (u64)args->page_size); | 4590 | (u64)args->page_size); |
4591 | } | 4591 | } |
@@ -4621,7 +4621,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
4621 | APERTURE_INVALID); | 4621 | APERTURE_INVALID); |
4622 | if (!map_offset) { | 4622 | if (!map_offset) { |
4623 | mutex_unlock(&vm->update_gmmu_lock); | 4623 | mutex_unlock(&vm->update_gmmu_lock); |
4624 | gk20a_free(vma, vaddr_start); | 4624 | nvgpu_free(vma, vaddr_start); |
4625 | kfree(va_node); | 4625 | kfree(va_node); |
4626 | goto clean_up; | 4626 | goto clean_up; |
4627 | } | 4627 | } |
@@ -4644,7 +4644,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
4644 | { | 4644 | { |
4645 | int err = -ENOMEM; | 4645 | int err = -ENOMEM; |
4646 | int pgsz_idx; | 4646 | int pgsz_idx; |
4647 | struct gk20a_allocator *vma; | 4647 | struct nvgpu_allocator *vma; |
4648 | struct vm_gk20a *vm = as_share->vm; | 4648 | struct vm_gk20a *vm = as_share->vm; |
4649 | struct vm_reserved_va_node *va_node; | 4649 | struct vm_reserved_va_node *va_node; |
4650 | struct gk20a *g = gk20a_from_vm(vm); | 4650 | struct gk20a *g = gk20a_from_vm(vm); |
@@ -4656,11 +4656,11 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
4656 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? | 4656 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? |
4657 | gmmu_page_size_big : gmmu_page_size_small; | 4657 | gmmu_page_size_big : gmmu_page_size_small; |
4658 | 4658 | ||
4659 | if (gk20a_alloc_initialized(&vm->fixed)) | 4659 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4660 | vma = &vm->fixed; | 4660 | vma = &vm->fixed; |
4661 | else | 4661 | else |
4662 | vma = &vm->vma[pgsz_idx]; | 4662 | vma = &vm->vma[pgsz_idx]; |
4663 | gk20a_free(vma, args->offset); | 4663 | nvgpu_free(vma, args->offset); |
4664 | 4664 | ||
4665 | mutex_lock(&vm->update_gmmu_lock); | 4665 | mutex_lock(&vm->update_gmmu_lock); |
4666 | va_node = addr_to_reservation(vm, args->offset); | 4666 | va_node = addr_to_reservation(vm, args->offset); |
@@ -4844,13 +4844,13 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | |||
4844 | 4844 | ||
4845 | void gk20a_deinit_vm(struct vm_gk20a *vm) | 4845 | void gk20a_deinit_vm(struct vm_gk20a *vm) |
4846 | { | 4846 | { |
4847 | gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); | 4847 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); |
4848 | if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big])) | 4848 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) |
4849 | gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 4849 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); |
4850 | if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small])) | 4850 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) |
4851 | gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); | 4851 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); |
4852 | if (gk20a_alloc_initialized(&vm->fixed)) | 4852 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4853 | gk20a_alloc_destroy(&vm->fixed); | 4853 | nvgpu_alloc_destroy(&vm->fixed); |
4854 | 4854 | ||
4855 | gk20a_vm_free_entries(vm, &vm->pdb, 0); | 4855 | gk20a_vm_free_entries(vm, &vm->pdb, 0); |
4856 | } | 4856 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d32e121a..f58b5df5 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -27,7 +27,8 @@ | |||
27 | #include <linux/version.h> | 27 | #include <linux/version.h> |
28 | #include <asm/dma-iommu.h> | 28 | #include <asm/dma-iommu.h> |
29 | #include <asm/cacheflush.h> | 29 | #include <asm/cacheflush.h> |
30 | #include "gk20a_allocator.h" | 30 | |
31 | #include <nvgpu/allocator.h> | ||
31 | 32 | ||
32 | #ifdef CONFIG_ARM64 | 33 | #ifdef CONFIG_ARM64 |
33 | #define outer_flush_range(a, b) | 34 | #define outer_flush_range(a, b) |
@@ -70,7 +71,7 @@ struct mem_desc { | |||
70 | u64 gpu_va; | 71 | u64 gpu_va; |
71 | bool fixed; /* vidmem only */ | 72 | bool fixed; /* vidmem only */ |
72 | bool user_mem; /* vidmem only */ | 73 | bool user_mem; /* vidmem only */ |
73 | struct gk20a_allocator *allocator; /* vidmem only */ | 74 | struct nvgpu_allocator *allocator; /* vidmem only */ |
74 | struct list_head clear_list_entry; /* vidmem only */ | 75 | struct list_head clear_list_entry; /* vidmem only */ |
75 | bool skip_wmb; | 76 | bool skip_wmb; |
76 | }; | 77 | }; |
@@ -295,10 +296,10 @@ struct vm_gk20a { | |||
295 | 296 | ||
296 | struct gk20a_mm_entry pdb; | 297 | struct gk20a_mm_entry pdb; |
297 | 298 | ||
298 | struct gk20a_allocator vma[gmmu_nr_page_sizes]; | 299 | struct nvgpu_allocator vma[gmmu_nr_page_sizes]; |
299 | 300 | ||
300 | /* If necessary, split fixed from non-fixed. */ | 301 | /* If necessary, split fixed from non-fixed. */ |
301 | struct gk20a_allocator fixed; | 302 | struct nvgpu_allocator fixed; |
302 | 303 | ||
303 | struct rb_root mapped_buffers; | 304 | struct rb_root mapped_buffers; |
304 | 305 | ||
@@ -421,8 +422,8 @@ struct mm_gk20a { | |||
421 | size_t bootstrap_size; | 422 | size_t bootstrap_size; |
422 | u64 bootstrap_base; | 423 | u64 bootstrap_base; |
423 | 424 | ||
424 | struct gk20a_allocator allocator; | 425 | struct nvgpu_allocator allocator; |
425 | struct gk20a_allocator bootstrap_allocator; | 426 | struct nvgpu_allocator bootstrap_allocator; |
426 | 427 | ||
427 | u32 ce_ctx_id; | 428 | u32 ce_ctx_id; |
428 | volatile bool cleared; | 429 | volatile bool cleared; |
@@ -470,13 +471,13 @@ static inline u64 __nv_gmmu_va_small_page_limit(void) | |||
470 | 471 | ||
471 | static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) | 472 | static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) |
472 | { | 473 | { |
473 | struct gk20a_allocator *a = &vm->vma[gmmu_page_size_big]; | 474 | struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big]; |
474 | 475 | ||
475 | if (!vm->big_pages) | 476 | if (!vm->big_pages) |
476 | return 0; | 477 | return 0; |
477 | 478 | ||
478 | return addr >= gk20a_alloc_base(a) && | 479 | return addr >= nvgpu_alloc_base(a) && |
479 | addr < gk20a_alloc_base(a) + gk20a_alloc_length(a); | 480 | addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a); |
480 | } | 481 | } |
481 | 482 | ||
482 | /* | 483 | /* |
@@ -825,7 +826,7 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); | |||
825 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; | 826 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; |
826 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; | 827 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; |
827 | 828 | ||
828 | static inline void *nvgpu_alloc(size_t size, bool clear) | 829 | static inline void *nvgpu_kalloc(size_t size, bool clear) |
829 | { | 830 | { |
830 | void *p; | 831 | void *p; |
831 | 832 | ||
@@ -844,7 +845,7 @@ static inline void *nvgpu_alloc(size_t size, bool clear) | |||
844 | return p; | 845 | return p; |
845 | } | 846 | } |
846 | 847 | ||
847 | static inline void nvgpu_free(void *p) | 848 | static inline void nvgpu_kfree(void *p) |
848 | { | 849 | { |
849 | if (virt_addr_valid(p)) | 850 | if (virt_addr_valid(p)) |
850 | kfree(p); | 851 | kfree(p); |
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h deleted file mode 100644 index 7d7f43c2..00000000 --- a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h +++ /dev/null | |||
@@ -1,164 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef PAGE_ALLOCATOR_PRIV_H | ||
18 | #define PAGE_ALLOCATOR_PRIV_H | ||
19 | |||
20 | #include <linux/list.h> | ||
21 | #include <linux/rbtree.h> | ||
22 | |||
23 | #include "gk20a_allocator.h" | ||
24 | |||
25 | struct gk20a_allocator; | ||
26 | |||
27 | /* | ||
28 | * This allocator implements the ability to do SLAB style allocation since the | ||
29 | * GPU has two page sizes available - 4k and 64k/128k. When the default | ||
30 | * granularity is the large page size (64k/128k) small allocations become very | ||
31 | * space inefficient. This is most notable in PDE and PTE blocks which are 4k | ||
32 | * in size. | ||
33 | * | ||
34 | * Thus we need the ability to suballocate in 64k pages. The way we do this for | ||
35 | * the GPU is as follows. We have several buckets for sub-64K allocations: | ||
36 | * | ||
37 | * B0 - 4k | ||
38 | * B1 - 8k | ||
39 | * B3 - 16k | ||
40 | * B4 - 32k | ||
41 | * B5 - 64k (for when large pages are 128k) | ||
42 | * | ||
43 | * When an allocation comes in for less than the large page size (from now on | ||
44 | * assumed to be 64k) the allocation is satisfied by one of the buckets. | ||
45 | */ | ||
46 | struct page_alloc_slab { | ||
47 | struct list_head empty; | ||
48 | struct list_head partial; | ||
49 | struct list_head full; | ||
50 | |||
51 | int nr_empty; | ||
52 | int nr_partial; | ||
53 | int nr_full; | ||
54 | |||
55 | u32 slab_size; | ||
56 | }; | ||
57 | |||
58 | enum slab_page_state { | ||
59 | SP_EMPTY, | ||
60 | SP_PARTIAL, | ||
61 | SP_FULL, | ||
62 | SP_NONE | ||
63 | }; | ||
64 | |||
65 | struct page_alloc_slab_page { | ||
66 | unsigned long bitmap; | ||
67 | u64 page_addr; | ||
68 | u32 slab_size; | ||
69 | |||
70 | u32 nr_objects; | ||
71 | u32 nr_objects_alloced; | ||
72 | |||
73 | enum slab_page_state state; | ||
74 | |||
75 | struct page_alloc_slab *owner; | ||
76 | struct list_head list_entry; | ||
77 | }; | ||
78 | |||
79 | struct page_alloc_chunk { | ||
80 | struct list_head list_entry; | ||
81 | |||
82 | u64 base; | ||
83 | u64 length; | ||
84 | }; | ||
85 | |||
86 | /* | ||
87 | * Struct to handle internal management of page allocation. It holds a list | ||
88 | * of the chunks of pages that make up the overall allocation - much like a | ||
89 | * scatter gather table. | ||
90 | */ | ||
91 | struct gk20a_page_alloc { | ||
92 | struct list_head alloc_chunks; | ||
93 | |||
94 | int nr_chunks; | ||
95 | u64 length; | ||
96 | |||
97 | /* | ||
98 | * Only useful for the RB tree - since the alloc may have discontiguous | ||
99 | * pages the base is essentially irrelevant except for the fact that it | ||
100 | * is guarenteed to be unique. | ||
101 | */ | ||
102 | u64 base; | ||
103 | |||
104 | struct rb_node tree_entry; | ||
105 | |||
106 | /* | ||
107 | * Set if this is a slab alloc. Points back to the slab page that owns | ||
108 | * this particular allocation. nr_chunks will always be 1 if this is | ||
109 | * set. | ||
110 | */ | ||
111 | struct page_alloc_slab_page *slab_page; | ||
112 | }; | ||
113 | |||
114 | struct gk20a_page_allocator { | ||
115 | struct gk20a_allocator *owner; /* Owner of this allocator. */ | ||
116 | |||
117 | /* | ||
118 | * Use a buddy allocator to manage the allocation of the underlying | ||
119 | * pages. This lets us abstract the discontiguous allocation handling | ||
120 | * out of the annoyingly complicated buddy allocator. | ||
121 | */ | ||
122 | struct gk20a_allocator source_allocator; | ||
123 | |||
124 | /* | ||
125 | * Page params. | ||
126 | */ | ||
127 | u64 base; | ||
128 | u64 length; | ||
129 | u64 page_size; | ||
130 | u32 page_shift; | ||
131 | |||
132 | struct rb_root allocs; /* Outstanding allocations. */ | ||
133 | |||
134 | struct page_alloc_slab *slabs; | ||
135 | int nr_slabs; | ||
136 | |||
137 | u64 flags; | ||
138 | |||
139 | /* | ||
140 | * Stat tracking. | ||
141 | */ | ||
142 | u64 nr_allocs; | ||
143 | u64 nr_frees; | ||
144 | u64 nr_fixed_allocs; | ||
145 | u64 nr_fixed_frees; | ||
146 | u64 nr_slab_allocs; | ||
147 | u64 nr_slab_frees; | ||
148 | u64 pages_alloced; | ||
149 | u64 pages_freed; | ||
150 | }; | ||
151 | |||
152 | static inline struct gk20a_page_allocator *page_allocator( | ||
153 | struct gk20a_allocator *a) | ||
154 | { | ||
155 | return (struct gk20a_page_allocator *)(a)->priv; | ||
156 | } | ||
157 | |||
158 | static inline struct gk20a_allocator *palloc_owner( | ||
159 | struct gk20a_page_allocator *a) | ||
160 | { | ||
161 | return a->owner; | ||
162 | } | ||
163 | |||
164 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index e221be11..56ebda1a 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -2896,8 +2896,8 @@ void gk20a_remove_pmu_support(struct pmu_gk20a *pmu) | |||
2896 | { | 2896 | { |
2897 | gk20a_dbg_fn(""); | 2897 | gk20a_dbg_fn(""); |
2898 | 2898 | ||
2899 | if (gk20a_alloc_initialized(&pmu->dmem)) | 2899 | if (nvgpu_alloc_initialized(&pmu->dmem)) |
2900 | gk20a_alloc_destroy(&pmu->dmem); | 2900 | nvgpu_alloc_destroy(&pmu->dmem); |
2901 | 2901 | ||
2902 | release_firmware(pmu->fw); | 2902 | release_firmware(pmu->fw); |
2903 | } | 2903 | } |
@@ -3607,7 +3607,7 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu) | |||
3607 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); | 3607 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); |
3608 | 3608 | ||
3609 | if (!pmu->sample_buffer) | 3609 | if (!pmu->sample_buffer) |
3610 | pmu->sample_buffer = gk20a_alloc(&pmu->dmem, | 3610 | pmu->sample_buffer = nvgpu_alloc(&pmu->dmem, |
3611 | 2 * sizeof(u16)); | 3611 | 2 * sizeof(u16)); |
3612 | if (!pmu->sample_buffer) { | 3612 | if (!pmu->sample_buffer) { |
3613 | gk20a_err(dev_from_gk20a(g), | 3613 | gk20a_err(dev_from_gk20a(g), |
@@ -3708,7 +3708,7 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, | |||
3708 | for (i = 0; i < PMU_QUEUE_COUNT; i++) | 3708 | for (i = 0; i < PMU_QUEUE_COUNT; i++) |
3709 | pmu_queue_init(pmu, i, init); | 3709 | pmu_queue_init(pmu, i, init); |
3710 | 3710 | ||
3711 | if (!gk20a_alloc_initialized(&pmu->dmem)) { | 3711 | if (!nvgpu_alloc_initialized(&pmu->dmem)) { |
3712 | /* Align start and end addresses */ | 3712 | /* Align start and end addresses */ |
3713 | u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), | 3713 | u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), |
3714 | PMU_DMEM_ALLOC_ALIGNMENT); | 3714 | PMU_DMEM_ALLOC_ALIGNMENT); |
@@ -3716,9 +3716,9 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, | |||
3716 | pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & | 3716 | pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & |
3717 | ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); | 3717 | ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); |
3718 | u32 size = end - start; | 3718 | u32 size = end - start; |
3719 | gk20a_bitmap_allocator_init(g, &pmu->dmem, "gk20a_pmu_dmem", | 3719 | nvgpu_bitmap_allocator_init(g, &pmu->dmem, "gk20a_pmu_dmem", |
3720 | start, size, | 3720 | start, size, |
3721 | PMU_DMEM_ALLOC_ALIGNMENT, 0); | 3721 | PMU_DMEM_ALLOC_ALIGNMENT, 0); |
3722 | } | 3722 | } |
3723 | 3723 | ||
3724 | pmu->pmu_ready = true; | 3724 | pmu->pmu_ready = true; |
@@ -3855,12 +3855,12 @@ static int pmu_response_handle(struct pmu_gk20a *pmu, | |||
3855 | seq->callback = NULL; | 3855 | seq->callback = NULL; |
3856 | if (pv->pmu_allocation_get_dmem_size(pmu, | 3856 | if (pv->pmu_allocation_get_dmem_size(pmu, |
3857 | pv->get_pmu_seq_in_a_ptr(seq)) != 0) | 3857 | pv->get_pmu_seq_in_a_ptr(seq)) != 0) |
3858 | gk20a_free(&pmu->dmem, | 3858 | nvgpu_free(&pmu->dmem, |
3859 | pv->pmu_allocation_get_dmem_offset(pmu, | 3859 | pv->pmu_allocation_get_dmem_offset(pmu, |
3860 | pv->get_pmu_seq_in_a_ptr(seq))); | 3860 | pv->get_pmu_seq_in_a_ptr(seq))); |
3861 | if (pv->pmu_allocation_get_dmem_size(pmu, | 3861 | if (pv->pmu_allocation_get_dmem_size(pmu, |
3862 | pv->get_pmu_seq_out_a_ptr(seq)) != 0) | 3862 | pv->get_pmu_seq_out_a_ptr(seq)) != 0) |
3863 | gk20a_free(&pmu->dmem, | 3863 | nvgpu_free(&pmu->dmem, |
3864 | pv->pmu_allocation_get_dmem_offset(pmu, | 3864 | pv->pmu_allocation_get_dmem_offset(pmu, |
3865 | pv->get_pmu_seq_out_a_ptr(seq))); | 3865 | pv->get_pmu_seq_out_a_ptr(seq))); |
3866 | 3866 | ||
@@ -4601,7 +4601,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, | |||
4601 | (u16)max(payload->in.size, payload->out.size)); | 4601 | (u16)max(payload->in.size, payload->out.size)); |
4602 | 4602 | ||
4603 | *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = | 4603 | *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = |
4604 | gk20a_alloc(&pmu->dmem, | 4604 | nvgpu_alloc(&pmu->dmem, |
4605 | pv->pmu_allocation_get_dmem_size(pmu, in)); | 4605 | pv->pmu_allocation_get_dmem_size(pmu, in)); |
4606 | if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) | 4606 | if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) |
4607 | goto clean_up; | 4607 | goto clean_up; |
@@ -4644,7 +4644,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, | |||
4644 | 4644 | ||
4645 | if (payload->in.buf != payload->out.buf) { | 4645 | if (payload->in.buf != payload->out.buf) { |
4646 | *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = | 4646 | *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = |
4647 | gk20a_alloc(&pmu->dmem, | 4647 | nvgpu_alloc(&pmu->dmem, |
4648 | pv->pmu_allocation_get_dmem_size(pmu, out)); | 4648 | pv->pmu_allocation_get_dmem_size(pmu, out)); |
4649 | if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, | 4649 | if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, |
4650 | out))) | 4650 | out))) |
@@ -4694,10 +4694,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, | |||
4694 | clean_up: | 4694 | clean_up: |
4695 | gk20a_dbg_fn("fail"); | 4695 | gk20a_dbg_fn("fail"); |
4696 | if (in) | 4696 | if (in) |
4697 | gk20a_free(&pmu->dmem, | 4697 | nvgpu_free(&pmu->dmem, |
4698 | pv->pmu_allocation_get_dmem_offset(pmu, in)); | 4698 | pv->pmu_allocation_get_dmem_offset(pmu, in)); |
4699 | if (out) | 4699 | if (out) |
4700 | gk20a_free(&pmu->dmem, | 4700 | nvgpu_free(&pmu->dmem, |
4701 | pv->pmu_allocation_get_dmem_offset(pmu, out)); | 4701 | pv->pmu_allocation_get_dmem_offset(pmu, out)); |
4702 | 4702 | ||
4703 | pmu_seq_release(pmu, seq); | 4703 | pmu_seq_release(pmu, seq); |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index cf4f3b52..32e2ef54 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | |||
@@ -709,7 +709,7 @@ struct pmu_gk20a { | |||
709 | struct mutex pmu_copy_lock; | 709 | struct mutex pmu_copy_lock; |
710 | struct mutex pmu_seq_lock; | 710 | struct mutex pmu_seq_lock; |
711 | 711 | ||
712 | struct gk20a_allocator dmem; | 712 | struct nvgpu_allocator dmem; |
713 | 713 | ||
714 | u32 *ucode_image; | 714 | u32 *ucode_image; |
715 | bool pmu_ready; | 715 | bool pmu_ready; |
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h index cf724fdb..8e09fcfc 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | |||
@@ -18,10 +18,11 @@ | |||
18 | #include <linux/list.h> | 18 | #include <linux/list.h> |
19 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
20 | 20 | ||
21 | #include <nvgpu/allocator.h> | ||
22 | |||
21 | #include "gk20a.h" | 23 | #include "gk20a.h" |
22 | #include "mm_gk20a.h" | 24 | #include "mm_gk20a.h" |
23 | #include "channel_gk20a.h" | 25 | #include "channel_gk20a.h" |
24 | #include "gk20a_allocator.h" | ||
25 | 26 | ||
26 | #define gpu_sema_dbg(fmt, args...) \ | 27 | #define gpu_sema_dbg(fmt, args...) \ |
27 | gk20a_dbg(gpu_dbg_sema, fmt, ##args) | 28 | gk20a_dbg(gpu_dbg_sema, fmt, ##args) |