summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2016-08-17 08:17:41 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-01 12:11:07 -0400
commit713f1ddcdfa5aec04bbb992e52a7d60fc68e966f (patch)
tree6068cc6e9d64970bf6840a9f147967a9fdde7507 /drivers
parent9ebd051779827a05d62fbba7ffa65cf401c256b3 (diff)
gpu: nvgpu: support pramin access for non-contiguous vidmem
API pramin_access_batched() currenly only supports contiguous allocations. Modify this API to support non-contiguous allocations from page allocator as well Update gk20a_mem_wr32() and gk20a_mem_rd32()to reuse pramin_access_batched() Use gk20a_memset() in gk20a_gmmu_free_attr_vid() to clear vidmem pages for kernel buffers Jira DNVGPU-30 Change-Id: I43630912f4837d8ebc6b9c58f4f427218ef9725b Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1204303 (cherry picked from commit 2f84f141d02fd2f641cb18a48896fb3ae5f7e51f) Reviewed-on: http://git-master/r/1210954 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c139
1 files changed, 72 insertions, 67 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 89390c30..179e6fc1 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -89,31 +89,11 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
89 mem->cpu_va = NULL; 89 mem->cpu_va = NULL;
90} 90}
91 91
92static u64 gk20a_mem_get_vidmem_addr(struct gk20a *g, struct mem_desc *mem)
93{
94 struct gk20a_page_alloc *alloc;
95 struct page_alloc_chunk *chunk;
96
97 if (mem && mem->aperture == APERTURE_VIDMEM) {
98 alloc = (struct gk20a_page_alloc *)
99 sg_dma_address(mem->sgt->sgl);
100
101 /* This API should not be used with > 1 chunks */
102 if (alloc->nr_chunks != 1)
103 return 0;
104
105 chunk = list_first_entry(&alloc->alloc_chunks,
106 struct page_alloc_chunk, list_entry);
107 return chunk->base;
108 }
109
110 return 0;
111}
112
113/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ 92/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
114static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) 93static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem,
94 struct page_alloc_chunk *chunk, u32 w)
115{ 95{
116 u64 bufbase = gk20a_mem_get_vidmem_addr(g, mem); 96 u64 bufbase = chunk->base;
117 u64 addr = bufbase + w * sizeof(u32); 97 u64 addr = bufbase + w * sizeof(u32);
118 u32 hi = (u32)((addr & ~(u64)0xfffff) 98 u32 hi = (u32)((addr & ~(u64)0xfffff)
119 >> bus_bar0_window_target_bar0_window_base_shift_v()); 99 >> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -124,8 +104,9 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
124 bus_bar0_window_base_f(hi); 104 bus_bar0_window_base_f(hi);
125 105
126 gk20a_dbg(gpu_dbg_mem, 106 gk20a_dbg(gpu_dbg_mem,
127 "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", 107 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
128 hi, lo, mem, bufbase, bufbase + mem->size, mem->size); 108 hi, lo, mem, chunk, bufbase,
109 bufbase + chunk->length, chunk->length);
129 110
130 WARN_ON(!bufbase); 111 WARN_ON(!bufbase);
131 112
@@ -140,42 +121,14 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
140 return lo; 121 return lo;
141} 122}
142 123
143static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) 124static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem,
125 struct page_alloc_chunk *chunk)
144{ 126{
145 gk20a_dbg(gpu_dbg_mem, "end for %p", mem); 127 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk);
146 128
147 spin_unlock(&g->mm.pramin_window_lock); 129 spin_unlock(&g->mm.pramin_window_lock);
148} 130}
149 131
150u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
151{
152 u32 data = 0;
153
154 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
155 u32 *ptr = mem->cpu_va;
156
157 WARN_ON(!ptr);
158 data = ptr[w];
159#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
160 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
161#endif
162 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
163 u32 addr = gk20a_pramin_enter(g, mem, w);
164 data = gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
165 gk20a_pramin_exit(g, mem);
166 } else {
167 WARN_ON("Accessing unallocated mem_desc");
168 }
169
170 return data;
171}
172
173u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset)
174{
175 WARN_ON(offset & 3);
176 return gk20a_mem_rd32(g, mem, offset / sizeof(u32));
177}
178
179/* 132/*
180 * Batch innerloop for the function below once per each PRAMIN range (some 133 * Batch innerloop for the function below once per each PRAMIN range (some
181 * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}. 134 * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}.
@@ -191,22 +144,40 @@ typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words,
191static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, 144static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem,
192 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 145 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
193{ 146{
147 struct gk20a_page_alloc *alloc = NULL;
148 struct page_alloc_chunk *chunk = NULL;
149 u32 byteoff, start_reg, until_end, n;
150
151 alloc = (struct gk20a_page_alloc *)sg_dma_address(mem->sgt->sgl);
152 list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
153 if (offset >= chunk->length)
154 offset -= chunk->length;
155 else
156 break;
157 }
158
194 offset /= sizeof(u32); 159 offset /= sizeof(u32);
195 160
196 while (size) { 161 while (size) {
197 u32 byteoff = gk20a_pramin_enter(g, mem, offset); 162 byteoff = gk20a_pramin_enter(g, mem, chunk, offset);
198 u32 start_reg = pram_data032_r(byteoff / sizeof(u32)); 163 start_reg = pram_data032_r(byteoff / sizeof(u32));
199 u32 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); 164 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
200 u32 n = min(size, until_end); 165
166 n = min3(size, until_end, (u32)(chunk->length - offset));
201 167
202 loop(g, start_reg, n / sizeof(u32), arg); 168 loop(g, start_reg, n / sizeof(u32), arg);
203 169
204 /* read back to synchronize accesses */ 170 /* read back to synchronize accesses */
205 gk20a_readl(g, start_reg); 171 gk20a_readl(g, start_reg);
206 gk20a_pramin_exit(g, mem); 172 gk20a_pramin_exit(g, mem, chunk);
207 173
208 offset += n / sizeof(u32); 174 offset += n / sizeof(u32);
209 size -= n; 175 size -= n;
176
177 if (n == (chunk->length - offset)) {
178 chunk = list_next_entry(chunk, list_entry);
179 offset = 0;
180 }
210 } 181 }
211} 182}
212 183
@@ -247,6 +218,40 @@ static inline void pramin_access_batch_set(struct gk20a *g, u32 start,
247 } 218 }
248} 219}
249 220
221u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
222{
223 u32 data = 0;
224
225 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
226 u32 *ptr = mem->cpu_va;
227
228 WARN_ON(!ptr);
229 data = ptr[w];
230#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
231 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
232#endif
233 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
234 u32 value;
235 u32 *p = &value;
236
237 pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),
238 pramin_access_batch_rd_n, &p);
239
240 data = value;
241
242 } else {
243 WARN_ON("Accessing unallocated mem_desc");
244 }
245
246 return data;
247}
248
249u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset)
250{
251 WARN_ON(offset & 3);
252 return gk20a_mem_rd32(g, mem, offset / sizeof(u32));
253}
254
250void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, 255void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem,
251 u32 offset, void *dest, u32 size) 256 u32 offset, void *dest, u32 size)
252{ 257{
@@ -284,11 +289,11 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
284#endif 289#endif
285 ptr[w] = data; 290 ptr[w] = data;
286 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 291 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
287 u32 addr = gk20a_pramin_enter(g, mem, w); 292 u32 value = data;
288 gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); 293 u32 *p = &value;
289 /* read back to synchronize accesses */ 294
290 gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); 295 pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),
291 gk20a_pramin_exit(g, mem); 296 pramin_access_batch_wr_n, &p);
292 } else { 297 } else {
293 WARN_ON("Accessing unallocated mem_desc"); 298 WARN_ON("Accessing unallocated mem_desc");
294 } 299 }
@@ -3000,7 +3005,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr,
3000 schedule_work(&g->mm.vidmem_clear_mem_worker); 3005 schedule_work(&g->mm.vidmem_clear_mem_worker);
3001 } 3006 }
3002 } else { 3007 } else {
3003 /* TODO: clear with PRAMIN here */ 3008 gk20a_memset(g, mem, 0, 0, mem->size);
3004 gk20a_free(mem->allocator, 3009 gk20a_free(mem->allocator,
3005 sg_dma_address(mem->sgt->sgl)); 3010 sg_dma_address(mem->sgt->sgl));
3006 gk20a_free_sgtable(&mem->sgt); 3011 gk20a_free_sgtable(&mem->sgt);