From fda4ddfa79f3a78becfe44fd558974bd274c03a9 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Wed, 27 Jul 2016 17:06:36 -0700 Subject: gpu: nvgpu: userd allocation from sysmem When bar1 memory is not supported then userd will be allocated from sysmem. Functions gp_get and gp_put are updated accordingly. JIRA GV11B-1 Change-Id: Ia895712a110f6cca26474228141488f5f8ace756 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1225384 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 23 +++++++++++++---- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 46 +++++++++++++++++++++------------ drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 6 +++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 4 +++ drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 23 ++++++++++++----- drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 4 +++ drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 6 +++++ 7 files changed, 84 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 6a69de3e..4458430b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -1541,12 +1541,25 @@ clean_up: return err; } +u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c) +{ + return gk20a_bar1_readl(g, + c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w()); +} + +void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c) +{ + gk20a_bar1_writel(g, + c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(), + c->gpfifo.put); +} + /* Update with this periodically to determine how the gpfifo is draining. */ static inline u32 update_gp_get(struct gk20a *g, struct channel_gk20a *c) { - u32 new_get = gk20a_bar1_readl(g, - c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w()); + u32 new_get = g->ops.fifo.userd_gp_get(g, c); + if (new_get < c->gpfifo.get) c->gpfifo.wrap = !c->gpfifo.wrap; c->gpfifo.get = new_get; @@ -2360,9 +2373,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, wait_cmd, incr_cmd, skip_buffer_refcounting); - gk20a_bar1_writel(g, - c->userd_gpu_va + 4 * ram_userd_gp_put_w(), - c->gpfifo.put); + g->ops.fifo.userd_gp_put(g, c); trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev), c->hw_chid, @@ -2988,6 +2999,8 @@ void gk20a_init_channel(struct gpu_ops *gops) gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; gops->fifo.channel_set_priority = gk20a_channel_set_priority; gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice; + gops->fifo.userd_gp_get = gk20a_userd_gp_get; + gops->fifo.userd_gp_put = gk20a_userd_gp_put; } long gk20a_channel_ioctl(struct file *filp, diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 835ef062..1dce435c 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -491,7 +491,10 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) vfree(f->channel); vfree(f->tsg); - gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); + if (g->ops.mm.is_bar1_supported(g)) + gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); + else + gk20a_gmmu_free(g, &f->userd); gk20a_fifo_delete_runlist(f); @@ -797,16 +800,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) f->userd_entry_size = 1 << ram_userd_base_shift_v(); - err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm, - f->userd_entry_size * f->num_channels, - &f->userd); - if (err) { - dev_err(d, "memory allocation failed\n"); - goto clean_up; - } - - gk20a_dbg(gpu_dbg_map_v, "userd bar1 va = 0x%llx", f->userd.gpu_va); - f->channel = vzalloc(f->num_channels * sizeof(*f->channel)); f->tsg = vzalloc(f->num_channels * sizeof(*f->tsg)); f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map), @@ -834,13 +827,26 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) INIT_LIST_HEAD(&f->free_chs); mutex_init(&f->free_chs_mutex); + if (g->ops.mm.is_bar1_supported(g)) + err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm, + f->userd_entry_size * f->num_channels, + &f->userd); + + else + err = gk20a_gmmu_alloc_sys(g, f->userd_entry_size * + f->num_channels, &f->userd); + if (err) { + dev_err(d, "userd memory allocation failed\n"); + goto clean_up; + } + gk20a_dbg(gpu_dbg_map, "userd gpu va = 0x%llx", f->userd.gpu_va); + for (chid = 0; chid < f->num_channels; chid++) { f->channel[chid].userd_iova = g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) - + chid * f->userd_entry_size; + + chid * f->userd_entry_size; f->channel[chid].userd_gpu_va = f->userd.gpu_va + chid * f->userd_entry_size; - gk20a_init_channel_support(g, chid); gk20a_init_tsg_support(g, chid); } @@ -858,7 +864,10 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) clean_up: gk20a_dbg_fn("fail"); - gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); + if (g->ops.mm.is_bar1_supported(g)) + gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); + else + gk20a_gmmu_free(g, &f->userd); vfree(f->channel); f->channel = NULL; @@ -884,7 +893,7 @@ static void gk20a_fifo_handle_runlist_event(struct gk20a *g) gk20a_writel(g, fifo_intr_runlist_r(), runlist_event); } -static int gk20a_init_fifo_setup_hw(struct gk20a *g) +int gk20a_init_fifo_setup_hw(struct gk20a *g) { struct fifo_gk20a *f = &g->fifo; @@ -952,7 +961,8 @@ int gk20a_init_fifo_support(struct gk20a *g) if (err) return err; - err = gk20a_init_fifo_setup_hw(g); + if (g->ops.fifo.init_fifo_setup_hw) + err = g->ops.fifo.init_fifo_setup_hw(g); if (err) return err; @@ -3015,7 +3025,8 @@ int gk20a_fifo_suspend(struct gk20a *g) gk20a_dbg_fn(""); /* stop bar1 snooping */ - gk20a_writel(g, fifo_bar1_base_r(), + if (g->ops.mm.is_bar1_supported(g)) + gk20a_writel(g, fifo_bar1_base_r(), fifo_bar1_base_valid_false_f()); /* disable fifo intr */ @@ -3246,6 +3257,7 @@ void gk20a_fifo_debugfs_init(struct device *dev) void gk20a_init_fifo(struct gpu_ops *gops) { gk20a_init_channel(gops); + gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw; gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; gops->fifo.update_runlist = gk20a_fifo_update_runlist; diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 228f7730..64bdeabb 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -167,6 +167,8 @@ static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level) int gk20a_init_fifo_support(struct gk20a *g); +int gk20a_init_fifo_setup_hw(struct gk20a *g); + void gk20a_fifo_isr(struct gk20a *g); void gk20a_fifo_nonstall_isr(struct gk20a *g); @@ -263,4 +265,8 @@ int gk20a_fifo_init_engine_info(struct fifo_gk20a *f); void gk20a_get_tsg_runlist_entry(struct tsg_gk20a *tsg, u32 *runlist); void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist); +u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c); +void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c); + + #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 2b55dd26..042bd9d7 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -342,6 +342,7 @@ struct gpu_ops { void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod); } clock_gating; struct { + int (*init_fifo_setup_hw)(struct gk20a *g); void (*bind_channel)(struct channel_gk20a *ch_gk20a); void (*unbind_channel)(struct channel_gk20a *ch_gk20a); void (*disable_channel)(struct channel_gk20a *ch); @@ -386,6 +387,8 @@ struct gpu_ops { u32 *runlist); void (*get_ch_runlist_entry)(struct channel_gk20a *ch, u32 *runlist); + u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch); + void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch); } fifo; struct pmu_v { /*used for change of enum zbc update cmd id from ver 0 to ver1*/ @@ -552,6 +555,7 @@ struct gpu_ops { u32 (*get_big_page_sizes)(void); u32 (*get_physical_addr_bits)(struct gk20a *g); int (*init_mm_setup_hw)(struct gk20a *g); + bool (*is_bar1_supported)(struct gk20a *g); int (*init_bar2_vm)(struct gk20a *g); int (*init_bar2_mm_hw_setup)(struct gk20a *g); void (*remove_bar2_vm)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a84d8ff0..570a3708 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -797,7 +797,10 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) if (g->ops.mm.remove_bar2_vm) g->ops.mm.remove_bar2_vm(g); - gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); + + if (g->ops.mm.is_bar1_supported(g)) + gk20a_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); + gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); gk20a_vm_remove_support_nofree(&mm->cde.vm); @@ -1001,10 +1004,11 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) if (err) return err; - err = gk20a_init_bar1_vm(mm); - if (err) - return err; - + if (g->ops.mm.is_bar1_supported(g)) { + err = gk20a_init_bar1_vm(mm); + if (err) + return err; + } if (g->ops.mm.init_bar2_vm) { err = g->ops.mm.init_bar2_vm(g); if (err) @@ -1055,7 +1059,8 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) >> 8); - g->ops.mm.bar1_bind(g, &mm->bar1.inst_block); + if (g->ops.mm.bar1_bind) + g->ops.mm.bar1_bind(g, &mm->bar1.inst_block); if (g->ops.mm.init_bar2_mm_hw_setup) { err = g->ops.mm.init_bar2_mm_hw_setup(g); @@ -5249,6 +5254,11 @@ clean_up: return err; } +static bool gk20a_mm_is_bar1_supported(struct gk20a *g) +{ + return true; +} + #ifdef CONFIG_DEBUG_FS void gk20a_mm_debugfs_init(struct device *dev) { @@ -5284,4 +5294,5 @@ void gk20a_init_mm(struct gpu_ops *gops) gops->mm.init_pdb = gk20a_mm_init_pdb; gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; gops->mm.bar1_bind = gk20a_mm_bar1_bind; + gops->mm.is_bar1_supported = gk20a_mm_is_bar1_supported; } diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index b5b9c52b..07ad322a 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -15,6 +15,7 @@ #include #include "gk20a/gk20a.h" +#include "gk20a/fifo_gk20a.h" #include "fifo_gm20b.h" #include "hw_ccsr_gm20b.h" #include "hw_ram_gm20b.h" @@ -133,6 +134,7 @@ static void gm20b_device_info_data_parse(struct gk20a *g, } void gm20b_init_fifo(struct gpu_ops *gops) { + gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw; gops->fifo.bind_channel = channel_gm20b_bind; gops->fifo.unbind_channel = channel_gk20a_unbind; gops->fifo.disable_channel = channel_gk20a_disable; @@ -142,6 +144,8 @@ void gm20b_init_fifo(struct gpu_ops *gops) gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; gops->fifo.channel_set_priority = gk20a_channel_set_priority; gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice; + gops->fifo.userd_gp_get = gk20a_userd_gp_get; + gops->fifo.userd_gp_put = gk20a_userd_gp_put; gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 8aa14662..36d1192a 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -163,6 +163,11 @@ static int gm20b_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst) return retry ? -EINVAL : 0; } +static bool gm20b_mm_is_bar1_supported(struct gk20a *g) +{ + return true; +} + void gm20b_init_mm(struct gpu_ops *gops) { gops->mm.support_sparse = gm20b_mm_support_sparse; @@ -186,4 +191,5 @@ void gm20b_init_mm(struct gpu_ops *gops) gops->mm.init_pdb = gk20a_mm_init_pdb; gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; gops->mm.bar1_bind = gm20b_mm_bar1_bind; + gops->mm.is_bar1_supported = gm20b_mm_is_bar1_supported; } -- cgit v1.2.2