diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vidmem.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vidmem.c | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c new file mode 100644 index 00000000..1ba07ca6 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c | |||
@@ -0,0 +1,259 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <linux/scatterlist.h> | ||
24 | |||
25 | #include <nvgpu/vidmem.h> | ||
26 | #include <nvgpu/page_allocator.h> | ||
27 | |||
28 | #include "gk20a/gk20a.h" | ||
29 | #include "gk20a/mm_gk20a.h" | ||
30 | |||
31 | void gk20a_vidmem_destroy(struct gk20a *g) | ||
32 | { | ||
33 | if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) | ||
34 | nvgpu_alloc_destroy(&g->mm.vidmem.allocator); | ||
35 | } | ||
36 | |||
37 | int gk20a_vidmem_clear_all(struct gk20a *g) | ||
38 | { | ||
39 | struct mm_gk20a *mm = &g->mm; | ||
40 | struct gk20a_fence *gk20a_fence_out = NULL; | ||
41 | u64 region2_base = 0; | ||
42 | int err = 0; | ||
43 | |||
44 | if (mm->vidmem.ce_ctx_id == (u32)~0) | ||
45 | return -EINVAL; | ||
46 | |||
47 | err = gk20a_ce_execute_ops(g, | ||
48 | mm->vidmem.ce_ctx_id, | ||
49 | 0, | ||
50 | mm->vidmem.base, | ||
51 | mm->vidmem.bootstrap_base - mm->vidmem.base, | ||
52 | 0x00000000, | ||
53 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | ||
54 | NVGPU_CE_MEMSET, | ||
55 | NULL, | ||
56 | 0, | ||
57 | NULL); | ||
58 | if (err) { | ||
59 | nvgpu_err(g, | ||
60 | "Failed to clear vidmem region 1 : %d", err); | ||
61 | return err; | ||
62 | } | ||
63 | |||
64 | region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size; | ||
65 | |||
66 | err = gk20a_ce_execute_ops(g, | ||
67 | mm->vidmem.ce_ctx_id, | ||
68 | 0, | ||
69 | region2_base, | ||
70 | mm->vidmem.size - region2_base, | ||
71 | 0x00000000, | ||
72 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | ||
73 | NVGPU_CE_MEMSET, | ||
74 | NULL, | ||
75 | 0, | ||
76 | &gk20a_fence_out); | ||
77 | if (err) { | ||
78 | nvgpu_err(g, | ||
79 | "Failed to clear vidmem region 2 : %d", err); | ||
80 | return err; | ||
81 | } | ||
82 | |||
83 | if (gk20a_fence_out) { | ||
84 | struct nvgpu_timeout timeout; | ||
85 | |||
86 | nvgpu_timeout_init(g, &timeout, | ||
87 | gk20a_get_gr_idle_timeout(g), | ||
88 | NVGPU_TIMER_CPU_TIMER); | ||
89 | |||
90 | do { | ||
91 | err = gk20a_fence_wait(g, gk20a_fence_out, | ||
92 | gk20a_get_gr_idle_timeout(g)); | ||
93 | } while (err == -ERESTARTSYS && | ||
94 | !nvgpu_timeout_expired(&timeout)); | ||
95 | |||
96 | gk20a_fence_put(gk20a_fence_out); | ||
97 | if (err) { | ||
98 | nvgpu_err(g, | ||
99 | "fence wait failed for CE execute ops"); | ||
100 | return err; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | mm->vidmem.cleared = true; | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | int gk20a_init_vidmem(struct mm_gk20a *mm) | ||
110 | { | ||
111 | struct gk20a *g = mm->g; | ||
112 | size_t size = g->ops.mm.get_vidmem_size ? | ||
113 | g->ops.mm.get_vidmem_size(g) : 0; | ||
114 | u64 bootstrap_base, bootstrap_size, base; | ||
115 | u64 default_page_size = SZ_64K; | ||
116 | int err; | ||
117 | |||
118 | static struct nvgpu_alloc_carveout wpr_co = | ||
119 | NVGPU_CARVEOUT("wpr-region", 0, SZ_16M); | ||
120 | |||
121 | if (!size) | ||
122 | return 0; | ||
123 | |||
124 | wpr_co.base = size - SZ_256M; | ||
125 | bootstrap_base = wpr_co.base; | ||
126 | bootstrap_size = SZ_16M; | ||
127 | base = default_page_size; | ||
128 | |||
129 | /* | ||
130 | * Bootstrap allocator for use before the CE is initialized (CE | ||
131 | * initialization requires vidmem but we want to use the CE to zero | ||
132 | * out vidmem before allocating it... | ||
133 | */ | ||
134 | err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator, | ||
135 | "vidmem-bootstrap", | ||
136 | bootstrap_base, bootstrap_size, | ||
137 | SZ_4K, 0); | ||
138 | |||
139 | err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator, | ||
140 | "vidmem", | ||
141 | base, size - base, | ||
142 | default_page_size, | ||
143 | GPU_ALLOC_4K_VIDMEM_PAGES); | ||
144 | if (err) { | ||
145 | nvgpu_err(g, "Failed to register vidmem for size %zu: %d", | ||
146 | size, err); | ||
147 | return err; | ||
148 | } | ||
149 | |||
150 | /* Reserve bootstrap region in vidmem allocator */ | ||
151 | nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co); | ||
152 | |||
153 | mm->vidmem.base = base; | ||
154 | mm->vidmem.size = size - base; | ||
155 | mm->vidmem.bootstrap_base = bootstrap_base; | ||
156 | mm->vidmem.bootstrap_size = bootstrap_size; | ||
157 | |||
158 | nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); | ||
159 | |||
160 | INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker); | ||
161 | nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); | ||
162 | nvgpu_init_list_node(&mm->vidmem.clear_list_head); | ||
163 | nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); | ||
164 | |||
165 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); | ||
166 | |||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | int gk20a_vidmem_get_space(struct gk20a *g, u64 *space) | ||
171 | { | ||
172 | struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator; | ||
173 | |||
174 | gk20a_dbg_fn(""); | ||
175 | |||
176 | if (!nvgpu_alloc_initialized(allocator)) | ||
177 | return -ENOSYS; | ||
178 | |||
179 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | ||
180 | *space = nvgpu_alloc_space(allocator) + | ||
181 | nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending); | ||
182 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | ||
187 | { | ||
188 | struct gk20a_fence *gk20a_fence_out = NULL; | ||
189 | struct gk20a_fence *gk20a_last_fence = NULL; | ||
190 | struct nvgpu_page_alloc *alloc = NULL; | ||
191 | void *sgl = NULL; | ||
192 | int err = 0; | ||
193 | |||
194 | if (g->mm.vidmem.ce_ctx_id == (u32)~0) | ||
195 | return -EINVAL; | ||
196 | |||
197 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); | ||
198 | |||
199 | nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) { | ||
200 | if (gk20a_last_fence) | ||
201 | gk20a_fence_put(gk20a_last_fence); | ||
202 | |||
203 | err = gk20a_ce_execute_ops(g, | ||
204 | g->mm.vidmem.ce_ctx_id, | ||
205 | 0, | ||
206 | nvgpu_sgt_get_phys(&alloc->sgt, sgl), | ||
207 | nvgpu_sgt_get_length(&alloc->sgt, sgl), | ||
208 | 0x00000000, | ||
209 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | ||
210 | NVGPU_CE_MEMSET, | ||
211 | NULL, | ||
212 | 0, | ||
213 | &gk20a_fence_out); | ||
214 | |||
215 | if (err) { | ||
216 | nvgpu_err(g, | ||
217 | "Failed gk20a_ce_execute_ops[%d]", err); | ||
218 | return err; | ||
219 | } | ||
220 | |||
221 | gk20a_last_fence = gk20a_fence_out; | ||
222 | } | ||
223 | |||
224 | if (gk20a_last_fence) { | ||
225 | struct nvgpu_timeout timeout; | ||
226 | |||
227 | nvgpu_timeout_init(g, &timeout, | ||
228 | gk20a_get_gr_idle_timeout(g), | ||
229 | NVGPU_TIMER_CPU_TIMER); | ||
230 | |||
231 | do { | ||
232 | err = gk20a_fence_wait(g, gk20a_last_fence, | ||
233 | gk20a_get_gr_idle_timeout(g)); | ||
234 | } while (err == -ERESTARTSYS && | ||
235 | !nvgpu_timeout_expired(&timeout)); | ||
236 | |||
237 | gk20a_fence_put(gk20a_last_fence); | ||
238 | if (err) | ||
239 | nvgpu_err(g, | ||
240 | "fence wait failed for CE execute ops"); | ||
241 | } | ||
242 | |||
243 | return err; | ||
244 | } | ||
245 | |||
246 | struct nvgpu_mem *get_pending_mem_desc(struct mm_gk20a *mm) | ||
247 | { | ||
248 | struct nvgpu_mem *mem = NULL; | ||
249 | |||
250 | nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); | ||
251 | if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { | ||
252 | mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, | ||
253 | nvgpu_mem, clear_list_entry); | ||
254 | nvgpu_list_del(&mem->clear_list_entry); | ||
255 | } | ||
256 | nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); | ||
257 | |||
258 | return mem; | ||
259 | } | ||