gpu: nvgpu: Split out pramin code

Split out the pramin interface code in preparation for splitting out the mem_desc code. JIRA NVGPU-12 Change-Id: I3f03447ea213cc15669b0934fa706e7cb22599b7 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1323323 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-03-15 18:49:18 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-03-31 20:21:34 -0400
commit: dd88aed5cc3088285c5d0b900aebf705f52178c5 (patch)
tree: 371ffb74c97305be99fe312b45e30793dab36926 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent: 56df8c58088b5c8b4a09ce6f5e195614251bf8d0 (diff)
1 files changed, 10 insertions, 168 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 05535412..94d31273 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -32,6 +32,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
+#include <nvgpu/pramin.h>
 #include <nvgpu/allocator.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/page_allocator.h>
@@ -50,13 +51,6 @@
 #include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
-/*
- * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the
- * boot, even for buffers that would work via cpu_va. In runtime, the flag is
- * in debugfs, called "force_pramin".
- */
-#define GK20A_FORCE_PRAMIN_DEFAULT false
 #if defined(CONFIG_GK20A_VIDMEM)
 static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);
 #endif
@@ -74,7 +68,7 @@ is_vidmem_page_alloc(u64 addr)
        return !!(addr & 1ULL);
 }
-static inline struct nvgpu_page_alloc *
+struct nvgpu_page_alloc *
 get_vidmem_page_alloc(struct scatterlist *sgl)
 {
        u64 addr;
@@ -121,151 +115,6 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
        mem->cpu_va = NULL;
 }
-/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
-static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem,
-                struct page_alloc_chunk *chunk, u32 w)
-{
-        u64 bufbase = chunk->base;
-        u64 addr = bufbase + w * sizeof(u32);
-        u32 hi = (u32)((addr & ~(u64)0xfffff)
-                >> bus_bar0_window_target_bar0_window_base_shift_v());
-        u32 lo = (u32)(addr & 0xfffff);
-        u32 win = gk20a_aperture_mask(g, mem,
-                        bus_bar0_window_target_sys_mem_noncoherent_f(),
-                        bus_bar0_window_target_vid_mem_f()) |
-                bus_bar0_window_base_f(hi);
-        gk20a_dbg(gpu_dbg_mem,
-                        "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
-                        hi, lo, mem, chunk, bufbase,
-                        bufbase + chunk->length, chunk->length);
-        WARN_ON(!bufbase);
-        nvgpu_spinlock_acquire(&g->mm.pramin_window_lock);
-        if (g->mm.pramin_window != win) {
-                gk20a_writel(g, bus_bar0_window_r(), win);
-                gk20a_readl(g, bus_bar0_window_r());
-                g->mm.pramin_window = win;
-        }
-        return lo;
-}
-static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem,
-                        struct page_alloc_chunk *chunk)
-{
-        gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk);
-        nvgpu_spinlock_release(&g->mm.pramin_window_lock);
-}
-/*
- * Batch innerloop for the function below once per each PRAMIN range (some
- * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}.
- */
-typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words,
-                u32 **arg);
-/*
- * The PRAMIN range is 1 MB, must change base addr if a buffer crosses that.
- * This same loop is used for read/write/memset. Offset and size in bytes.
- * One call to "loop" is done per range, with "arg" supplied.
- */
-static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem,
-                u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
-{
-        struct nvgpu_page_alloc *alloc = NULL;
-        struct page_alloc_chunk *chunk = NULL;
-        u32 byteoff, start_reg, until_end, n;
-        alloc = get_vidmem_page_alloc(mem->sgt->sgl);
-        list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
-                if (offset >= chunk->length)
-                        offset -= chunk->length;
-                else
-                        break;
-        }
-        offset /= sizeof(u32);
-        while (size) {
-                byteoff = gk20a_pramin_enter(g, mem, chunk, offset);
-                start_reg = pram_data032_r(byteoff / sizeof(u32));
-                until_end = SZ_1M - (byteoff & (SZ_1M - 1));
-                n = min3(size, until_end, (u32)(chunk->length - offset));
-                loop(g, start_reg, n / sizeof(u32), arg);
-                /* read back to synchronize accesses */
-                gk20a_readl(g, start_reg);
-                gk20a_pramin_exit(g, mem, chunk);
-                size -= n;
-                if (n == (chunk->length - offset)) {
-                        chunk = list_next_entry(chunk, list_entry);
-                        offset = 0;
-                } else {
-                        offset += n / sizeof(u32);
-                }
-        }
-}
-static inline void pramin_access_batch_rd_n(struct gk20a *g, u32 start,
-        u32 words, u32 **arg)
-{
-        u32 r = start, *dest_u32 = *arg;
-        if (!g->regs) {
-                __gk20a_warn_on_no_regs();
-                return;
-        }
-        while (words--) {
-                *dest_u32++ = gk20a_readl(g, r);
-                r += sizeof(u32);
-        }
-        *arg = dest_u32;
-}
-static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start,
-                u32 words, u32 **arg)
-{
-        u32 r = start, *src_u32 = *arg;
-        if (!g->regs) {
-                __gk20a_warn_on_no_regs();
-                return;
-        }
-        while (words--) {
-                writel_relaxed(*src_u32++, g->regs + r);
-                r += sizeof(u32);
-        }
-        *arg = src_u32;
-}
-static inline void pramin_access_batch_set(struct gk20a *g, u32 start,
-                u32 words, u32 **arg)
-{
-        u32 r = start, repeat = **arg;
-        if (!g->regs) {
-                __gk20a_warn_on_no_regs();
-                return;
-        }
-        while (words--) {
-                writel_relaxed(repeat, g->regs + r);
-                r += sizeof(u32);
-        }
-}
 u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
 {
        u32 data = 0;
@@ -282,8 +131,8 @@ u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
                u32 value;
                u32 *p = &value;
-                pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),
+                nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
-                                pramin_access_batch_rd_n, &p);
+                                sizeof(u32), pramin_access_batch_rd_n, &p);
                data = value;
@@ -319,7 +168,7 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem,
        } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
                u32 *dest_u32 = dest;
-                pramin_access_batched(g, mem, offset, size,
+                nvgpu_pramin_access_batched(g, mem, offset, size,
                                pramin_access_batch_rd_n, &dest_u32);
        } else {
                WARN_ON("Accessing unallocated mem_desc");
@@ -340,8 +189,8 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
                u32 value = data;
                u32 *p = &value;
-                pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),
+                nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
-                                pramin_access_batch_wr_n, &p);
+                                sizeof(u32), pramin_access_batch_wr_n, &p);
                if (!mem->skip_wmb)
                        wmb();
        } else {
@@ -374,7 +223,7 @@ void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
        } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
                u32 *src_u32 = src;
-                pramin_access_batched(g, mem, offset, size,
+                nvgpu_pramin_access_batched(g, mem, offset, size,
                                pramin_access_batch_wr_n, &src_u32);
                if (!mem->skip_wmb)
                        wmb();
@@ -406,7 +255,7 @@ void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
                u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
                u32 *p = &repeat_value;
-                pramin_access_batched(g, mem, offset, size,
+                nvgpu_pramin_access_batched(g, mem, offset, size,
                                pramin_access_batch_set, &p);
                if (!mem->skip_wmb)
                        wmb();
@@ -844,13 +693,6 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
        return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
 }
-static void gk20a_init_pramin(struct mm_gk20a *mm)
-{
-        mm->pramin_window = 0;
-        nvgpu_spinlock_init(&mm->pramin_window_lock);
-        mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
-}
 #if defined(CONFIG_GK20A_VIDMEM)
 static int gk20a_vidmem_clear_all(struct gk20a *g)
 {
@@ -1013,7 +855,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
                       (int)(mm->channel.user_size >> 20),
                       (int)(mm->channel.kernel_size >> 20));
-        gk20a_init_pramin(mm);
+        nvgpu_init_pramin(mm);
        mm->vidmem.ce_ctx_id = (u32)~0;
author	Alex Waterman <alexw@nvidia.com>	2017-03-15 18:49:18 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-03-31 20:21:34 -0400
commit	dd88aed5cc3088285c5d0b900aebf705f52178c5 (patch)
tree	371ffb74c97305be99fe312b45e30793dab36926 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent	56df8c58088b5c8b4a09ce6f5e195614251bf8d0 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 05535412..94d31273 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -32,6 +32,7 @@
32		32
33	#include <nvgpu/kmem.h>	33	#include <nvgpu/kmem.h>
34	#include <nvgpu/timers.h>	34	#include <nvgpu/timers.h>
		35	#include <nvgpu/pramin.h>
35	#include <nvgpu/allocator.h>	36	#include <nvgpu/allocator.h>
36	#include <nvgpu/semaphore.h>	37	#include <nvgpu/semaphore.h>
37	#include <nvgpu/page_allocator.h>	38	#include <nvgpu/page_allocator.h>
@@ -50,13 +51,6 @@
50	#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>	51	#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
51	#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>	52	#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
52		53
53	/*
54	* Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the
55	* boot, even for buffers that would work via cpu_va. In runtime, the flag is
56	* in debugfs, called "force_pramin".
57	*/
58	#define GK20A_FORCE_PRAMIN_DEFAULT false
59
60	#if defined(CONFIG_GK20A_VIDMEM)	54	#if defined(CONFIG_GK20A_VIDMEM)
61	static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);	55	static void gk20a_vidmem_clear_mem_worker(struct work_struct *work);
62	#endif	56	#endif
@@ -74,7 +68,7 @@ is_vidmem_page_alloc(u64 addr)
74	return !!(addr & 1ULL);	68	return !!(addr & 1ULL);
75	}	69	}
76		70
77	static inline struct nvgpu_page_alloc *	71	struct nvgpu_page_alloc *
78	get_vidmem_page_alloc(struct scatterlist *sgl)	72	get_vidmem_page_alloc(struct scatterlist *sgl)
79	{	73	{
80	u64 addr;	74	u64 addr;
@@ -121,151 +115,6 @@ void gk20a_mem_end(struct gk20a g, struct mem_desc mem)
121	mem->cpu_va = NULL;	115	mem->cpu_va = NULL;
122	}	116	}
123		117
124	/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
125	static u32 gk20a_pramin_enter(struct gk20a g, struct mem_desc mem,
126	struct page_alloc_chunk *chunk, u32 w)
127	{
128	u64 bufbase = chunk->base;
129	u64 addr = bufbase + w * sizeof(u32);
130	u32 hi = (u32)((addr & ~(u64)0xfffff)
131	>> bus_bar0_window_target_bar0_window_base_shift_v());
132	u32 lo = (u32)(addr & 0xfffff);
133	u32 win = gk20a_aperture_mask(g, mem,
134	bus_bar0_window_target_sys_mem_noncoherent_f(),
135	bus_bar0_window_target_vid_mem_f()) \|
136	bus_bar0_window_base_f(hi);
137
138	gk20a_dbg(gpu_dbg_mem,
139	"0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
140	hi, lo, mem, chunk, bufbase,
141	bufbase + chunk->length, chunk->length);
142
143	WARN_ON(!bufbase);
144
145	nvgpu_spinlock_acquire(&g->mm.pramin_window_lock);
146
147	if (g->mm.pramin_window != win) {
148	gk20a_writel(g, bus_bar0_window_r(), win);
149	gk20a_readl(g, bus_bar0_window_r());
150	g->mm.pramin_window = win;
151	}
152
153	return lo;
154	}
155
156	static void gk20a_pramin_exit(struct gk20a g, struct mem_desc mem,
157	struct page_alloc_chunk *chunk)
158	{
159	gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk);
160
161	nvgpu_spinlock_release(&g->mm.pramin_window_lock);
162	}
163
164	/*
165	* Batch innerloop for the function below once per each PRAMIN range (some
166	* 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}.
167	*/
168	typedef void (pramin_access_batch_fn)(struct gk20a g, u32 start, u32 words,
169	u32 **arg);
170
171	/*
172	* The PRAMIN range is 1 MB, must change base addr if a buffer crosses that.
173	* This same loop is used for read/write/memset. Offset and size in bytes.
174	* One call to "loop" is done per range, with "arg" supplied.
175	*/
176	static inline void pramin_access_batched(struct gk20a g, struct mem_desc mem,
177	u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
178	{
179	struct nvgpu_page_alloc *alloc = NULL;
180	struct page_alloc_chunk *chunk = NULL;
181	u32 byteoff, start_reg, until_end, n;
182
183	alloc = get_vidmem_page_alloc(mem->sgt->sgl);
184	list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
185	if (offset >= chunk->length)
186	offset -= chunk->length;
187	else
188	break;
189	}
190
191	offset /= sizeof(u32);
192
193	while (size) {
194	byteoff = gk20a_pramin_enter(g, mem, chunk, offset);
195	start_reg = pram_data032_r(byteoff / sizeof(u32));
196	until_end = SZ_1M - (byteoff & (SZ_1M - 1));
197
198	n = min3(size, until_end, (u32)(chunk->length - offset));
199
200	loop(g, start_reg, n / sizeof(u32), arg);
201
202	/* read back to synchronize accesses */
203	gk20a_readl(g, start_reg);
204	gk20a_pramin_exit(g, mem, chunk);
205
206	size -= n;
207
208	if (n == (chunk->length - offset)) {
209	chunk = list_next_entry(chunk, list_entry);
210	offset = 0;
211	} else {
212	offset += n / sizeof(u32);
213	}
214	}
215	}
216
217	static inline void pramin_access_batch_rd_n(struct gk20a *g, u32 start,
218	u32 words, u32 **arg)
219	{
220	u32 r = start, dest_u32 = arg;
221
222	if (!g->regs) {
223	__gk20a_warn_on_no_regs();
224	return;
225	}
226
227	while (words--) {
228	*dest_u32++ = gk20a_readl(g, r);
229	r += sizeof(u32);
230	}
231
232	*arg = dest_u32;
233	}
234
235	static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start,
236	u32 words, u32 **arg)
237	{
238	u32 r = start, src_u32 = arg;
239
240	if (!g->regs) {
241	__gk20a_warn_on_no_regs();
242	return;
243	}
244
245	while (words--) {
246	writel_relaxed(*src_u32++, g->regs + r);
247	r += sizeof(u32);
248	}
249
250	*arg = src_u32;
251	}
252
253	static inline void pramin_access_batch_set(struct gk20a *g, u32 start,
254	u32 words, u32 **arg)
255	{
256	u32 r = start, repeat = **arg;
257
258	if (!g->regs) {
259	__gk20a_warn_on_no_regs();
260	return;
261	}
262
263	while (words--) {
264	writel_relaxed(repeat, g->regs + r);
265	r += sizeof(u32);
266	}
267	}
268
269	u32 gk20a_mem_rd32(struct gk20a g, struct mem_desc mem, u32 w)	118	u32 gk20a_mem_rd32(struct gk20a g, struct mem_desc mem, u32 w)
270	{	119	{
271	u32 data = 0;	120	u32 data = 0;
@@ -282,8 +131,8 @@ u32 gk20a_mem_rd32(struct gk20a g, struct mem_desc mem, u32 w)
282	u32 value;	131	u32 value;
283	u32 *p = &value;	132	u32 *p = &value;
284		133
285	pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),	134	nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
286	pramin_access_batch_rd_n, &p);	135	sizeof(u32), pramin_access_batch_rd_n, &p);
287		136
288	data = value;	137	data = value;
289		138
@@ -319,7 +168,7 @@ void gk20a_mem_rd_n(struct gk20a g, struct mem_desc mem,
319	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {	168	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {
320	u32 *dest_u32 = dest;	169	u32 *dest_u32 = dest;
321		170
322	pramin_access_batched(g, mem, offset, size,	171	nvgpu_pramin_access_batched(g, mem, offset, size,
323	pramin_access_batch_rd_n, &dest_u32);	172	pramin_access_batch_rd_n, &dest_u32);
324	} else {	173	} else {
325	WARN_ON("Accessing unallocated mem_desc");	174	WARN_ON("Accessing unallocated mem_desc");
@@ -340,8 +189,8 @@ void gk20a_mem_wr32(struct gk20a g, struct mem_desc mem, u32 w, u32 data)
340	u32 value = data;	189	u32 value = data;
341	u32 *p = &value;	190	u32 *p = &value;
342		191
343	pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),	192	nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
344	pramin_access_batch_wr_n, &p);	193	sizeof(u32), pramin_access_batch_wr_n, &p);
345	if (!mem->skip_wmb)	194	if (!mem->skip_wmb)
346	wmb();	195	wmb();
347	} else {	196	} else {
@@ -374,7 +223,7 @@ void gk20a_mem_wr_n(struct gk20a g, struct mem_desc mem, u32 offset,
374	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {	223	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {
375	u32 *src_u32 = src;	224	u32 *src_u32 = src;
376		225
377	pramin_access_batched(g, mem, offset, size,	226	nvgpu_pramin_access_batched(g, mem, offset, size,
378	pramin_access_batch_wr_n, &src_u32);	227	pramin_access_batch_wr_n, &src_u32);
379	if (!mem->skip_wmb)	228	if (!mem->skip_wmb)
380	wmb();	229	wmb();
@@ -406,7 +255,7 @@ void gk20a_memset(struct gk20a g, struct mem_desc mem, u32 offset,
406	u32 repeat_value = c \| (c << 8) \| (c << 16) \| (c << 24);	255	u32 repeat_value = c \| (c << 8) \| (c << 16) \| (c << 24);
407	u32 *p = &repeat_value;	256	u32 *p = &repeat_value;
408		257
409	pramin_access_batched(g, mem, offset, size,	258	nvgpu_pramin_access_batched(g, mem, offset, size,
410	pramin_access_batch_set, &p);	259	pramin_access_batch_set, &p);
411	if (!mem->skip_wmb)	260	if (!mem->skip_wmb)
412	wmb();	261	wmb();
@@ -844,13 +693,6 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
844	return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);	693	return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
845	}	694	}
846		695
847	static void gk20a_init_pramin(struct mm_gk20a *mm)
848	{
849	mm->pramin_window = 0;
850	nvgpu_spinlock_init(&mm->pramin_window_lock);
851	mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
852	}
853
854	#if defined(CONFIG_GK20A_VIDMEM)	696	#if defined(CONFIG_GK20A_VIDMEM)
855	static int gk20a_vidmem_clear_all(struct gk20a *g)	697	static int gk20a_vidmem_clear_all(struct gk20a *g)
856	{	698	{
@@ -1013,7 +855,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
1013	(int)(mm->channel.user_size >> 20),	855	(int)(mm->channel.user_size >> 20),
1014	(int)(mm->channel.kernel_size >> 20));	856	(int)(mm->channel.kernel_size >> 20));
1015		857
1016	gk20a_init_pramin(mm);	858	nvgpu_init_pramin(mm);
1017		859
1018	mm->vidmem.ce_ctx_id = (u32)~0;	860	mm->vidmem.ce_ctx_id = (u32)~0;
1019		861