summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-03-15 18:49:18 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-31 20:21:34 -0400
commitdd88aed5cc3088285c5d0b900aebf705f52178c5 (patch)
tree371ffb74c97305be99fe312b45e30793dab36926 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent56df8c58088b5c8b4a09ce6f5e195614251bf8d0 (diff)
gpu: nvgpu: Split out pramin code
Split out the pramin interface code in preparation for splitting out the mem_desc code. JIRA NVGPU-12 Change-Id: I3f03447ea213cc15669b0934fa706e7cb22599b7 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1323323 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c178
1 files changed, 10 insertions, 168 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 05535412..94d31273 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -32,6 +32,7 @@
32 32
33#include <nvgpu/kmem.h> 33#include <nvgpu/kmem.h>
34#include <nvgpu/timers.h> 34#include <nvgpu/timers.h>
35#include <nvgpu/pramin.h>
35#include <nvgpu/allocator.h> 36#include <nvgpu/allocator.h>
36#include <nvgpu/semaphore.h> 37#include <nvgpu/semaphore.h>
37#include <nvgpu/page_allocator.h> 38#include <nvgpu/page_allocator.h>
@@ -50,13 +51,6 @@
50#include <nvgpu/hw/gk20a/hw_flush_gk20a.h> 51#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
51#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> 52#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
52 53
53/*
54 * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the
55 * boot, even for buffers that would work via cpu_va. In runtime, the flag is
56 * in debugfs, called "force_pramin".
57 */
58#define GK20A_FORCE_PRAMIN_DEFAULT false
59
60#if defined(CONFIG_GK20A_VIDMEM) 54#if defined(CONFIG_GK20A_VIDMEM)
61static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); 55static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);
62#endif 56#endif
@@ -74,7 +68,7 @@ is_vidmem_page_alloc(u64 addr)
74 return !!(addr & 1ULL); 68 return !!(addr & 1ULL);
75} 69}
76 70
77static inline struct nvgpu_page_alloc * 71struct nvgpu_page_alloc *
78get_vidmem_page_alloc(struct scatterlist *sgl) 72get_vidmem_page_alloc(struct scatterlist *sgl)
79{ 73{
80 u64 addr; 74 u64 addr;
@@ -121,151 +115,6 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
121 mem->cpu_va = NULL; 115 mem->cpu_va = NULL;
122} 116}
123 117
124/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
125static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem,
126 struct page_alloc_chunk *chunk, u32 w)
127{
128 u64 bufbase = chunk->base;
129 u64 addr = bufbase + w * sizeof(u32);
130 u32 hi = (u32)((addr & ~(u64)0xfffff)
131 >> bus_bar0_window_target_bar0_window_base_shift_v());
132 u32 lo = (u32)(addr & 0xfffff);
133 u32 win = gk20a_aperture_mask(g, mem,
134 bus_bar0_window_target_sys_mem_noncoherent_f(),
135 bus_bar0_window_target_vid_mem_f()) |
136 bus_bar0_window_base_f(hi);
137
138 gk20a_dbg(gpu_dbg_mem,
139 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
140 hi, lo, mem, chunk, bufbase,
141 bufbase + chunk->length, chunk->length);
142
143 WARN_ON(!bufbase);
144
145 nvgpu_spinlock_acquire(&g->mm.pramin_window_lock);
146
147 if (g->mm.pramin_window != win) {
148 gk20a_writel(g, bus_bar0_window_r(), win);
149 gk20a_readl(g, bus_bar0_window_r());
150 g->mm.pramin_window = win;
151 }
152
153 return lo;
154}
155
156static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem,
157 struct page_alloc_chunk *chunk)
158{
159 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk);
160
161 nvgpu_spinlock_release(&g->mm.pramin_window_lock);
162}
163
164/*
165 * Batch innerloop for the function below once per each PRAMIN range (some
166 * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}.
167 */
168typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words,
169 u32 **arg);
170
171/*
172 * The PRAMIN range is 1 MB, must change base addr if a buffer crosses that.
173 * This same loop is used for read/write/memset. Offset and size in bytes.
174 * One call to "loop" is done per range, with "arg" supplied.
175 */
176static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem,
177 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
178{
179 struct nvgpu_page_alloc *alloc = NULL;
180 struct page_alloc_chunk *chunk = NULL;
181 u32 byteoff, start_reg, until_end, n;
182
183 alloc = get_vidmem_page_alloc(mem->sgt->sgl);
184 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
185 if (offset >= chunk->length)
186 offset -= chunk->length;
187 else
188 break;
189 }
190
191 offset /= sizeof(u32);
192
193 while (size) {
194 byteoff = gk20a_pramin_enter(g, mem, chunk, offset);
195 start_reg = pram_data032_r(byteoff / sizeof(u32));
196 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
197
198 n = min3(size, until_end, (u32)(chunk->length - offset));
199
200 loop(g, start_reg, n / sizeof(u32), arg);
201
202 /* read back to synchronize accesses */
203 gk20a_readl(g, start_reg);
204 gk20a_pramin_exit(g, mem, chunk);
205
206 size -= n;
207
208 if (n == (chunk->length - offset)) {
209 chunk = list_next_entry(chunk, list_entry);
210 offset = 0;
211 } else {
212 offset += n / sizeof(u32);
213 }
214 }
215}
216
217static inline void pramin_access_batch_rd_n(struct gk20a *g, u32 start,
218 u32 words, u32 **arg)
219{
220 u32 r = start, *dest_u32 = *arg;
221
222 if (!g->regs) {
223 __gk20a_warn_on_no_regs();
224 return;
225 }
226
227 while (words--) {
228 *dest_u32++ = gk20a_readl(g, r);
229 r += sizeof(u32);
230 }
231
232 *arg = dest_u32;
233}
234
235static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start,
236 u32 words, u32 **arg)
237{
238 u32 r = start, *src_u32 = *arg;
239
240 if (!g->regs) {
241 __gk20a_warn_on_no_regs();
242 return;
243 }
244
245 while (words--) {
246 writel_relaxed(*src_u32++, g->regs + r);
247 r += sizeof(u32);
248 }
249
250 *arg = src_u32;
251}
252
253static inline void pramin_access_batch_set(struct gk20a *g, u32 start,
254 u32 words, u32 **arg)
255{
256 u32 r = start, repeat = **arg;
257
258 if (!g->regs) {
259 __gk20a_warn_on_no_regs();
260 return;
261 }
262
263 while (words--) {
264 writel_relaxed(repeat, g->regs + r);
265 r += sizeof(u32);
266 }
267}
268
269u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) 118u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
270{ 119{
271 u32 data = 0; 120 u32 data = 0;
@@ -282,8 +131,8 @@ u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
282 u32 value; 131 u32 value;
283 u32 *p = &value; 132 u32 *p = &value;
284 133
285 pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), 134 nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
286 pramin_access_batch_rd_n, &p); 135 sizeof(u32), pramin_access_batch_rd_n, &p);
287 136
288 data = value; 137 data = value;
289 138
@@ -319,7 +168,7 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem,
319 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 168 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
320 u32 *dest_u32 = dest; 169 u32 *dest_u32 = dest;
321 170
322 pramin_access_batched(g, mem, offset, size, 171 nvgpu_pramin_access_batched(g, mem, offset, size,
323 pramin_access_batch_rd_n, &dest_u32); 172 pramin_access_batch_rd_n, &dest_u32);
324 } else { 173 } else {
325 WARN_ON("Accessing unallocated mem_desc"); 174 WARN_ON("Accessing unallocated mem_desc");
@@ -340,8 +189,8 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
340 u32 value = data; 189 u32 value = data;
341 u32 *p = &value; 190 u32 *p = &value;
342 191
343 pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), 192 nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
344 pramin_access_batch_wr_n, &p); 193 sizeof(u32), pramin_access_batch_wr_n, &p);
345 if (!mem->skip_wmb) 194 if (!mem->skip_wmb)
346 wmb(); 195 wmb();
347 } else { 196 } else {
@@ -374,7 +223,7 @@ void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
374 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 223 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
375 u32 *src_u32 = src; 224 u32 *src_u32 = src;
376 225
377 pramin_access_batched(g, mem, offset, size, 226 nvgpu_pramin_access_batched(g, mem, offset, size,
378 pramin_access_batch_wr_n, &src_u32); 227 pramin_access_batch_wr_n, &src_u32);
379 if (!mem->skip_wmb) 228 if (!mem->skip_wmb)
380 wmb(); 229 wmb();
@@ -406,7 +255,7 @@ void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
406 u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); 255 u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
407 u32 *p = &repeat_value; 256 u32 *p = &repeat_value;
408 257
409 pramin_access_batched(g, mem, offset, size, 258 nvgpu_pramin_access_batched(g, mem, offset, size,
410 pramin_access_batch_set, &p); 259 pramin_access_batch_set, &p);
411 if (!mem->skip_wmb) 260 if (!mem->skip_wmb)
412 wmb(); 261 wmb();
@@ -844,13 +693,6 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
844 return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); 693 return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
845} 694}
846 695
847static void gk20a_init_pramin(struct mm_gk20a *mm)
848{
849 mm->pramin_window = 0;
850 nvgpu_spinlock_init(&mm->pramin_window_lock);
851 mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
852}
853
854#if defined(CONFIG_GK20A_VIDMEM) 696#if defined(CONFIG_GK20A_VIDMEM)
855static int gk20a_vidmem_clear_all(struct gk20a *g) 697static int gk20a_vidmem_clear_all(struct gk20a *g)
856{ 698{
@@ -1013,7 +855,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
1013 (int)(mm->channel.user_size >> 20), 855 (int)(mm->channel.user_size >> 20),
1014 (int)(mm->channel.kernel_size >> 20)); 856 (int)(mm->channel.kernel_size >> 20));
1015 857
1016 gk20a_init_pramin(mm); 858 nvgpu_init_pramin(mm);
1017 859
1018 mm->vidmem.ce_ctx_id = (u32)~0; 860 mm->vidmem.ce_ctx_id = (u32)~0;
1019 861