diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-03-15 18:49:18 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-31 20:21:34 -0400 |
commit | dd88aed5cc3088285c5d0b900aebf705f52178c5 (patch) | |
tree | 371ffb74c97305be99fe312b45e30793dab36926 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |
parent | 56df8c58088b5c8b4a09ce6f5e195614251bf8d0 (diff) |
gpu: nvgpu: Split out pramin code
Split out the pramin interface code in preparation for splitting
out the mem_desc code.
JIRA NVGPU-12
Change-Id: I3f03447ea213cc15669b0934fa706e7cb22599b7
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1323323
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 178 |
1 files changed, 10 insertions, 168 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 05535412..94d31273 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -32,6 +32,7 @@ | |||
32 | 32 | ||
33 | #include <nvgpu/kmem.h> | 33 | #include <nvgpu/kmem.h> |
34 | #include <nvgpu/timers.h> | 34 | #include <nvgpu/timers.h> |
35 | #include <nvgpu/pramin.h> | ||
35 | #include <nvgpu/allocator.h> | 36 | #include <nvgpu/allocator.h> |
36 | #include <nvgpu/semaphore.h> | 37 | #include <nvgpu/semaphore.h> |
37 | #include <nvgpu/page_allocator.h> | 38 | #include <nvgpu/page_allocator.h> |
@@ -50,13 +51,6 @@ | |||
50 | #include <nvgpu/hw/gk20a/hw_flush_gk20a.h> | 51 | #include <nvgpu/hw/gk20a/hw_flush_gk20a.h> |
51 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> | 52 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> |
52 | 53 | ||
53 | /* | ||
54 | * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the | ||
55 | * boot, even for buffers that would work via cpu_va. In runtime, the flag is | ||
56 | * in debugfs, called "force_pramin". | ||
57 | */ | ||
58 | #define GK20A_FORCE_PRAMIN_DEFAULT false | ||
59 | |||
60 | #if defined(CONFIG_GK20A_VIDMEM) | 54 | #if defined(CONFIG_GK20A_VIDMEM) |
61 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); | 55 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); |
62 | #endif | 56 | #endif |
@@ -74,7 +68,7 @@ is_vidmem_page_alloc(u64 addr) | |||
74 | return !!(addr & 1ULL); | 68 | return !!(addr & 1ULL); |
75 | } | 69 | } |
76 | 70 | ||
77 | static inline struct nvgpu_page_alloc * | 71 | struct nvgpu_page_alloc * |
78 | get_vidmem_page_alloc(struct scatterlist *sgl) | 72 | get_vidmem_page_alloc(struct scatterlist *sgl) |
79 | { | 73 | { |
80 | u64 addr; | 74 | u64 addr; |
@@ -121,151 +115,6 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | |||
121 | mem->cpu_va = NULL; | 115 | mem->cpu_va = NULL; |
122 | } | 116 | } |
123 | 117 | ||
124 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ | ||
125 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, | ||
126 | struct page_alloc_chunk *chunk, u32 w) | ||
127 | { | ||
128 | u64 bufbase = chunk->base; | ||
129 | u64 addr = bufbase + w * sizeof(u32); | ||
130 | u32 hi = (u32)((addr & ~(u64)0xfffff) | ||
131 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | ||
132 | u32 lo = (u32)(addr & 0xfffff); | ||
133 | u32 win = gk20a_aperture_mask(g, mem, | ||
134 | bus_bar0_window_target_sys_mem_noncoherent_f(), | ||
135 | bus_bar0_window_target_vid_mem_f()) | | ||
136 | bus_bar0_window_base_f(hi); | ||
137 | |||
138 | gk20a_dbg(gpu_dbg_mem, | ||
139 | "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", | ||
140 | hi, lo, mem, chunk, bufbase, | ||
141 | bufbase + chunk->length, chunk->length); | ||
142 | |||
143 | WARN_ON(!bufbase); | ||
144 | |||
145 | nvgpu_spinlock_acquire(&g->mm.pramin_window_lock); | ||
146 | |||
147 | if (g->mm.pramin_window != win) { | ||
148 | gk20a_writel(g, bus_bar0_window_r(), win); | ||
149 | gk20a_readl(g, bus_bar0_window_r()); | ||
150 | g->mm.pramin_window = win; | ||
151 | } | ||
152 | |||
153 | return lo; | ||
154 | } | ||
155 | |||
156 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem, | ||
157 | struct page_alloc_chunk *chunk) | ||
158 | { | ||
159 | gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); | ||
160 | |||
161 | nvgpu_spinlock_release(&g->mm.pramin_window_lock); | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * Batch innerloop for the function below once per each PRAMIN range (some | ||
166 | * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}. | ||
167 | */ | ||
168 | typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, | ||
169 | u32 **arg); | ||
170 | |||
171 | /* | ||
172 | * The PRAMIN range is 1 MB, must change base addr if a buffer crosses that. | ||
173 | * This same loop is used for read/write/memset. Offset and size in bytes. | ||
174 | * One call to "loop" is done per range, with "arg" supplied. | ||
175 | */ | ||
176 | static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, | ||
177 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | ||
178 | { | ||
179 | struct nvgpu_page_alloc *alloc = NULL; | ||
180 | struct page_alloc_chunk *chunk = NULL; | ||
181 | u32 byteoff, start_reg, until_end, n; | ||
182 | |||
183 | alloc = get_vidmem_page_alloc(mem->sgt->sgl); | ||
184 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | ||
185 | if (offset >= chunk->length) | ||
186 | offset -= chunk->length; | ||
187 | else | ||
188 | break; | ||
189 | } | ||
190 | |||
191 | offset /= sizeof(u32); | ||
192 | |||
193 | while (size) { | ||
194 | byteoff = gk20a_pramin_enter(g, mem, chunk, offset); | ||
195 | start_reg = pram_data032_r(byteoff / sizeof(u32)); | ||
196 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); | ||
197 | |||
198 | n = min3(size, until_end, (u32)(chunk->length - offset)); | ||
199 | |||
200 | loop(g, start_reg, n / sizeof(u32), arg); | ||
201 | |||
202 | /* read back to synchronize accesses */ | ||
203 | gk20a_readl(g, start_reg); | ||
204 | gk20a_pramin_exit(g, mem, chunk); | ||
205 | |||
206 | size -= n; | ||
207 | |||
208 | if (n == (chunk->length - offset)) { | ||
209 | chunk = list_next_entry(chunk, list_entry); | ||
210 | offset = 0; | ||
211 | } else { | ||
212 | offset += n / sizeof(u32); | ||
213 | } | ||
214 | } | ||
215 | } | ||
216 | |||
217 | static inline void pramin_access_batch_rd_n(struct gk20a *g, u32 start, | ||
218 | u32 words, u32 **arg) | ||
219 | { | ||
220 | u32 r = start, *dest_u32 = *arg; | ||
221 | |||
222 | if (!g->regs) { | ||
223 | __gk20a_warn_on_no_regs(); | ||
224 | return; | ||
225 | } | ||
226 | |||
227 | while (words--) { | ||
228 | *dest_u32++ = gk20a_readl(g, r); | ||
229 | r += sizeof(u32); | ||
230 | } | ||
231 | |||
232 | *arg = dest_u32; | ||
233 | } | ||
234 | |||
235 | static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start, | ||
236 | u32 words, u32 **arg) | ||
237 | { | ||
238 | u32 r = start, *src_u32 = *arg; | ||
239 | |||
240 | if (!g->regs) { | ||
241 | __gk20a_warn_on_no_regs(); | ||
242 | return; | ||
243 | } | ||
244 | |||
245 | while (words--) { | ||
246 | writel_relaxed(*src_u32++, g->regs + r); | ||
247 | r += sizeof(u32); | ||
248 | } | ||
249 | |||
250 | *arg = src_u32; | ||
251 | } | ||
252 | |||
253 | static inline void pramin_access_batch_set(struct gk20a *g, u32 start, | ||
254 | u32 words, u32 **arg) | ||
255 | { | ||
256 | u32 r = start, repeat = **arg; | ||
257 | |||
258 | if (!g->regs) { | ||
259 | __gk20a_warn_on_no_regs(); | ||
260 | return; | ||
261 | } | ||
262 | |||
263 | while (words--) { | ||
264 | writel_relaxed(repeat, g->regs + r); | ||
265 | r += sizeof(u32); | ||
266 | } | ||
267 | } | ||
268 | |||
269 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | 118 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) |
270 | { | 119 | { |
271 | u32 data = 0; | 120 | u32 data = 0; |
@@ -282,8 +131,8 @@ u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | |||
282 | u32 value; | 131 | u32 value; |
283 | u32 *p = &value; | 132 | u32 *p = &value; |
284 | 133 | ||
285 | pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), | 134 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), |
286 | pramin_access_batch_rd_n, &p); | 135 | sizeof(u32), pramin_access_batch_rd_n, &p); |
287 | 136 | ||
288 | data = value; | 137 | data = value; |
289 | 138 | ||
@@ -319,7 +168,7 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | |||
319 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | 168 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { |
320 | u32 *dest_u32 = dest; | 169 | u32 *dest_u32 = dest; |
321 | 170 | ||
322 | pramin_access_batched(g, mem, offset, size, | 171 | nvgpu_pramin_access_batched(g, mem, offset, size, |
323 | pramin_access_batch_rd_n, &dest_u32); | 172 | pramin_access_batch_rd_n, &dest_u32); |
324 | } else { | 173 | } else { |
325 | WARN_ON("Accessing unallocated mem_desc"); | 174 | WARN_ON("Accessing unallocated mem_desc"); |
@@ -340,8 +189,8 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | |||
340 | u32 value = data; | 189 | u32 value = data; |
341 | u32 *p = &value; | 190 | u32 *p = &value; |
342 | 191 | ||
343 | pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), | 192 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), |
344 | pramin_access_batch_wr_n, &p); | 193 | sizeof(u32), pramin_access_batch_wr_n, &p); |
345 | if (!mem->skip_wmb) | 194 | if (!mem->skip_wmb) |
346 | wmb(); | 195 | wmb(); |
347 | } else { | 196 | } else { |
@@ -374,7 +223,7 @@ void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | |||
374 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | 223 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { |
375 | u32 *src_u32 = src; | 224 | u32 *src_u32 = src; |
376 | 225 | ||
377 | pramin_access_batched(g, mem, offset, size, | 226 | nvgpu_pramin_access_batched(g, mem, offset, size, |
378 | pramin_access_batch_wr_n, &src_u32); | 227 | pramin_access_batch_wr_n, &src_u32); |
379 | if (!mem->skip_wmb) | 228 | if (!mem->skip_wmb) |
380 | wmb(); | 229 | wmb(); |
@@ -406,7 +255,7 @@ void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | |||
406 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); | 255 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); |
407 | u32 *p = &repeat_value; | 256 | u32 *p = &repeat_value; |
408 | 257 | ||
409 | pramin_access_batched(g, mem, offset, size, | 258 | nvgpu_pramin_access_batched(g, mem, offset, size, |
410 | pramin_access_batch_set, &p); | 259 | pramin_access_batch_set, &p); |
411 | if (!mem->skip_wmb) | 260 | if (!mem->skip_wmb) |
412 | wmb(); | 261 | wmb(); |
@@ -844,13 +693,6 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g) | |||
844 | return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); | 693 | return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); |
845 | } | 694 | } |
846 | 695 | ||
847 | static void gk20a_init_pramin(struct mm_gk20a *mm) | ||
848 | { | ||
849 | mm->pramin_window = 0; | ||
850 | nvgpu_spinlock_init(&mm->pramin_window_lock); | ||
851 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; | ||
852 | } | ||
853 | |||
854 | #if defined(CONFIG_GK20A_VIDMEM) | 696 | #if defined(CONFIG_GK20A_VIDMEM) |
855 | static int gk20a_vidmem_clear_all(struct gk20a *g) | 697 | static int gk20a_vidmem_clear_all(struct gk20a *g) |
856 | { | 698 | { |
@@ -1013,7 +855,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
1013 | (int)(mm->channel.user_size >> 20), | 855 | (int)(mm->channel.user_size >> 20), |
1014 | (int)(mm->channel.kernel_size >> 20)); | 856 | (int)(mm->channel.kernel_size >> 20)); |
1015 | 857 | ||
1016 | gk20a_init_pramin(mm); | 858 | nvgpu_init_pramin(mm); |
1017 | 859 | ||
1018 | mm->vidmem.ce_ctx_id = (u32)~0; | 860 | mm->vidmem.ce_ctx_id = (u32)~0; |
1019 | 861 | ||