gpu: nvgpu: Implement common allocator and mem_desc

Introduce mem_desc, which holds all information needed for a buffer. Implement helper functions for allocation and freeing that use this data type. Change-Id: I82c88595d058d4fb8c5c5fbf19d13269e48e422f Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/712699
author: Terje Bergstrom <tbergstrom@nvidia.com> 2015-02-26 17:37:43 -0500
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-04-04 21:59:26 -0400
commit: 7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c (patch)
tree: de452c09f5eef76af273041dc64997fdc351dbd6 /drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
parent: bb51cf9ec6482b50f3020179965ef82f58d91a0a (diff)
1 files changed, 16 insertions, 54 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 45d956a2..25e6e4c7 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -196,19 +196,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
        runlist_size  = sizeof(u16) * f->num_channels;
        for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
-                dma_addr_t iova;
+                int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
+                if (err) {
-                runlist->mem[i].cpuva =
-                        dma_alloc_coherent(d,
-                                        runlist_size,
-                                        &iova,
-                                        GFP_KERNEL);
-                if (!runlist->mem[i].cpuva) {
                        dev_err(d, "memory allocation failed\n");
                        goto clean_up_runlist;
                }
-                runlist->mem[i].iova = iova;
-                runlist->mem[i].size = runlist_size;
        }
        mutex_init(&runlist->mutex);
@@ -220,15 +212,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
        return 0;
 clean_up_runlist:
-        for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+        for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
-                if (runlist->mem[i].cpuva)
+                gk20a_gmmu_free(g, &runlist->mem[i]);
-                        dma_free_coherent(d,
-                                runlist->mem[i].size,
-                                runlist->mem[i].cpuva,
-                                runlist->mem[i].iova);
-                runlist->mem[i].cpuva = NULL;
-                runlist->mem[i].iova = 0;
-        }
        kfree(runlist->active_channels);
        runlist->active_channels = NULL;
@@ -248,7 +233,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
        struct fifo_gk20a *f = &g->fifo;
        struct device *d = dev_from_gk20a(g);
        int chid, err = 0;
-        dma_addr_t iova;
        gk20a_dbg_fn("");
@@ -268,28 +252,16 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
        f->max_engines = ENGINE_INVAL_GK20A;
        f->userd_entry_size = 1 << ram_userd_base_shift_v();
-        f->userd_total_size = f->userd_entry_size * f->num_channels;
-        f->userd.cpuva = dma_alloc_coherent(d,
+        err = gk20a_gmmu_alloc(g, f->userd_entry_size * f->num_channels,
-                                        f->userd_total_size,
+                        &f->userd);
-                                        &iova,
-                                        GFP_KERNEL);
-        if (!f->userd.cpuva) {
-                dev_err(d, "memory allocation failed\n");
-                goto clean_up;
-        }
-        f->userd.iova = iova;
-        err = gk20a_get_sgtable(d, &f->userd.sgt,
-                                f->userd.cpuva, f->userd.iova,
-                                f->userd_total_size);
        if (err) {
-                dev_err(d, "failed to create sg table\n");
+                dev_err(d, "memory allocation failed\n");
                goto clean_up;
        }
        /* bar1 va */
-        f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);
+        f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd.size);
        if (!f->userd.gpu_va) {
                dev_err(d, "gmmu mapping failed\n");
                goto clean_up;
@@ -297,8 +269,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
        gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
-        f->userd.size = f->userd_total_size;
        f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
                                GFP_KERNEL);
        f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
@@ -315,9 +285,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
        for (chid = 0; chid < f->num_channels; chid++) {
                f->channel[chid].userd_cpu_va =
-                        f->userd.cpuva + chid * f->userd_entry_size;
+                        f->userd.cpu_va + chid * f->userd_entry_size;
                f->channel[chid].userd_iova =
-                        gk20a_mm_smmu_vaddr_translate(g, f->userd.iova)
+                        gk20a_mm_iova_addr(g, f->userd.sgt->sgl)
                                + chid * f->userd_entry_size;
                f->channel[chid].userd_gpu_va =
                        f->userd.gpu_va + chid * f->userd_entry_size;
@@ -337,17 +307,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 clean_up:
        gk20a_dbg_fn("fail");
        /* FIXME: unmap from bar1 */
-        if (f->userd.sgt)
+        gk20a_gmmu_free(g, &f->userd);
-                gk20a_free_sgtable(&f->userd.sgt);
-        if (f->userd.cpuva)
+        memset(&f->userd, 0, sizeof(f->userd));
-                dma_free_coherent(d,
-                                f->userd_total_size,
-                                f->userd.cpuva,
-                                f->userd.iova);
-        f->userd.cpuva = NULL;
-        f->userd.iova = 0;
-        memset(&f->userd, 0, sizeof(struct userd_desc));
        kfree(f->channel);
        f->channel = NULL;
@@ -368,7 +330,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g)
                u32 v, v1 = 0x33, v2 = 0x55;
                u32 bar1_vaddr = f->userd.gpu_va;
-                volatile u32 *cpu_vaddr = f->userd.cpuva;
+                volatile u32 *cpu_vaddr = f->userd.cpu_va;
                gk20a_dbg_info("test bar1 @ vaddr 0x%x",
                           bar1_vaddr);
@@ -505,7 +467,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
            add /* resume to add all channels back */) {
                u32 chid;
-                runlist_entry = runlist->mem[0].cpuva;
+                runlist_entry = runlist->mem[0].cpu_va;
                for_each_set_bit(chid,
                        runlist->active_channels, f->num_channels) {
                        gk20a_dbg_info("add channel %d to runlist", chid);
@@ -517,7 +479,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
                count = 0;
        return vgpu_submit_runlist(platform->virt_handle, runlist_id,
-                                runlist->mem[0].cpuva, count);
+                                runlist->mem[0].cpu_va, count);
 }
 /* add/remove a channel from runlist
author	Terje Bergstrom <tbergstrom@nvidia.com>	2015-02-26 17:37:43 -0500
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-04-04 21:59:26 -0400
commit	7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c (patch)
tree	de452c09f5eef76af273041dc64997fdc351dbd6 /drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
parent	bb51cf9ec6482b50f3020179965ef82f58d91a0a (diff)

diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 45d956a2..25e6e4c7 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -196,19 +196,11 @@ static int init_runlist(struct gk20a g, struct fifo_gk20a f)
196		196
197	runlist_size = sizeof(u16) * f->num_channels;	197	runlist_size = sizeof(u16) * f->num_channels;
198	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {	198	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
199	dma_addr_t iova;	199	int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
200		200	if (err) {
201	runlist->mem[i].cpuva =
202	dma_alloc_coherent(d,
203	runlist_size,
204	&iova,
205	GFP_KERNEL);
206	if (!runlist->mem[i].cpuva) {
207	dev_err(d, "memory allocation failed\n");	201	dev_err(d, "memory allocation failed\n");
208	goto clean_up_runlist;	202	goto clean_up_runlist;
209	}	203	}
210	runlist->mem[i].iova = iova;
211	runlist->mem[i].size = runlist_size;
212	}	204	}
213	mutex_init(&runlist->mutex);	205	mutex_init(&runlist->mutex);
214		206
@@ -220,15 +212,8 @@ static int init_runlist(struct gk20a g, struct fifo_gk20a f)
220	return 0;	212	return 0;
221		213
222	clean_up_runlist:	214	clean_up_runlist:
223	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {	215	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
224	if (runlist->mem[i].cpuva)	216	gk20a_gmmu_free(g, &runlist->mem[i]);
225	dma_free_coherent(d,
226	runlist->mem[i].size,
227	runlist->mem[i].cpuva,
228	runlist->mem[i].iova);
229	runlist->mem[i].cpuva = NULL;
230	runlist->mem[i].iova = 0;
231	}
232		217
233	kfree(runlist->active_channels);	218	kfree(runlist->active_channels);
234	runlist->active_channels = NULL;	219	runlist->active_channels = NULL;
@@ -248,7 +233,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
248	struct fifo_gk20a *f = &g->fifo;	233	struct fifo_gk20a *f = &g->fifo;
249	struct device *d = dev_from_gk20a(g);	234	struct device *d = dev_from_gk20a(g);
250	int chid, err = 0;	235	int chid, err = 0;
251	dma_addr_t iova;
252		236
253	gk20a_dbg_fn("");	237	gk20a_dbg_fn("");
254		238
@@ -268,28 +252,16 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
268	f->max_engines = ENGINE_INVAL_GK20A;	252	f->max_engines = ENGINE_INVAL_GK20A;
269		253
270	f->userd_entry_size = 1 << ram_userd_base_shift_v();	254	f->userd_entry_size = 1 << ram_userd_base_shift_v();
271	f->userd_total_size = f->userd_entry_size * f->num_channels;
272		255
273	f->userd.cpuva = dma_alloc_coherent(d,	256	err = gk20a_gmmu_alloc(g, f->userd_entry_size * f->num_channels,
274	f->userd_total_size,	257	&f->userd);
275	&iova,
276	GFP_KERNEL);
277	if (!f->userd.cpuva) {
278	dev_err(d, "memory allocation failed\n");
279	goto clean_up;
280	}
281
282	f->userd.iova = iova;
283	err = gk20a_get_sgtable(d, &f->userd.sgt,
284	f->userd.cpuva, f->userd.iova,
285	f->userd_total_size);
286	if (err) {	258	if (err) {
287	dev_err(d, "failed to create sg table\n");	259	dev_err(d, "memory allocation failed\n");
288	goto clean_up;	260	goto clean_up;
289	}	261	}
290		262
291	/* bar1 va */	263	/* bar1 va */
292	f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);	264	f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd.size);
293	if (!f->userd.gpu_va) {	265	if (!f->userd.gpu_va) {
294	dev_err(d, "gmmu mapping failed\n");	266	dev_err(d, "gmmu mapping failed\n");
295	goto clean_up;	267	goto clean_up;
@@ -297,8 +269,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
297		269
298	gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);	270	gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
299		271
300	f->userd.size = f->userd_total_size;
301
302	f->channel = kzalloc(f->num_channels * sizeof(*f->channel),	272	f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
303	GFP_KERNEL);	273	GFP_KERNEL);
304	f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),	274	f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
@@ -315,9 +285,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
315		285
316	for (chid = 0; chid < f->num_channels; chid++) {	286	for (chid = 0; chid < f->num_channels; chid++) {
317	f->channel[chid].userd_cpu_va =	287	f->channel[chid].userd_cpu_va =
318	f->userd.cpuva + chid * f->userd_entry_size;	288	f->userd.cpu_va + chid * f->userd_entry_size;
319	f->channel[chid].userd_iova =	289	f->channel[chid].userd_iova =
320	gk20a_mm_smmu_vaddr_translate(g, f->userd.iova)	290	gk20a_mm_iova_addr(g, f->userd.sgt->sgl)
321	+ chid * f->userd_entry_size;	291	+ chid * f->userd_entry_size;
322	f->channel[chid].userd_gpu_va =	292	f->channel[chid].userd_gpu_va =
323	f->userd.gpu_va + chid * f->userd_entry_size;	293	f->userd.gpu_va + chid * f->userd_entry_size;
@@ -337,17 +307,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
337	clean_up:	307	clean_up:
338	gk20a_dbg_fn("fail");	308	gk20a_dbg_fn("fail");
339	/* FIXME: unmap from bar1 */	309	/* FIXME: unmap from bar1 */
340	if (f->userd.sgt)	310	gk20a_gmmu_free(g, &f->userd);
341	gk20a_free_sgtable(&f->userd.sgt);	311
342	if (f->userd.cpuva)	312	memset(&f->userd, 0, sizeof(f->userd));
343	dma_free_coherent(d,
344	f->userd_total_size,
345	f->userd.cpuva,
346	f->userd.iova);
347	f->userd.cpuva = NULL;
348	f->userd.iova = 0;
349
350	memset(&f->userd, 0, sizeof(struct userd_desc));
351		313
352	kfree(f->channel);	314	kfree(f->channel);
353	f->channel = NULL;	315	f->channel = NULL;
@@ -368,7 +330,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g)
368	u32 v, v1 = 0x33, v2 = 0x55;	330	u32 v, v1 = 0x33, v2 = 0x55;
369		331
370	u32 bar1_vaddr = f->userd.gpu_va;	332	u32 bar1_vaddr = f->userd.gpu_va;
371	volatile u32 *cpu_vaddr = f->userd.cpuva;	333	volatile u32 *cpu_vaddr = f->userd.cpu_va;
372		334
373	gk20a_dbg_info("test bar1 @ vaddr 0x%x",	335	gk20a_dbg_info("test bar1 @ vaddr 0x%x",
374	bar1_vaddr);	336	bar1_vaddr);
@@ -505,7 +467,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
505	add /* resume to add all channels back */) {	467	add /* resume to add all channels back */) {
506	u32 chid;	468	u32 chid;
507		469
508	runlist_entry = runlist->mem[0].cpuva;	470	runlist_entry = runlist->mem[0].cpu_va;
509	for_each_set_bit(chid,	471	for_each_set_bit(chid,
510	runlist->active_channels, f->num_channels) {	472	runlist->active_channels, f->num_channels) {
511	gk20a_dbg_info("add channel %d to runlist", chid);	473	gk20a_dbg_info("add channel %d to runlist", chid);
@@ -517,7 +479,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
517	count = 0;	479	count = 0;
518		480
519	return vgpu_submit_runlist(platform->virt_handle, runlist_id,	481	return vgpu_submit_runlist(platform->virt_handle, runlist_id,
520	runlist->mem[0].cpuva, count);	482	runlist->mem[0].cpu_va, count);
521	}	483	}
522		484
523	/* add/remove a channel from runlist	485	/* add/remove a channel from runlist