From f04031e5e8837abb2be3feb0ee30e1af54de7845 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Tue, 14 Mar 2017 13:39:59 -0700 Subject: gpu: nvgpu: Move programming of host registers to fifo Move code that touches host registers and instance block to fifo HAL. This involves adding HAL ops for the fifo HAL functions that get called from outside fifo. This clears responsibility of channel by leaving it only managing channels in software and push buffers. channel had member ramfc defined, but it was not used, to remove it. pbdma_acquire_val consisted both of channel logic and hardware programming. The channel logic was moved to the caller and only hardware programming was moved. Change-Id: Id005787f6cc91276b767e8e86325caf966913de9 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/1322423 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 378 +------------------------------- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 16 -- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 344 ++++++++++++++++++++++++++++- drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 18 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 6 +- drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 22 +- drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 7 +- drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 3 +- 9 files changed, 395 insertions(+), 403 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 5dfd2309..418572a1 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -488,7 +488,7 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev, /* -1 means default channel priority */ if (priority != -1) { - err = gk20a_channel_set_priority(ce_ctx->ch, priority); + err = gk20a_fifo_set_priority(ce_ctx->ch, priority); if (err) { gk20a_err(ce_ctx->dev, "ce: could not set the channel priority for CE context"); @@ -498,7 +498,7 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev, /* -1 means default channel timeslice value */ if (timeslice != -1) { - err = gk20a_channel_set_timeslice(ce_ctx->ch, timeslice); + err = gk20a_fifo_set_timeslice(ce_ctx->ch, timeslice); if (err) { gk20a_err(ce_ctx->dev, "ce: could not set the channel timeslice value for CE context"); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 88495bde..2facb595 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -38,11 +38,7 @@ #include "dbg_gpu_gk20a.h" #include "fence_gk20a.h" -#include -#include #include -#include -#include #define NVMAP_HANDLE_PARAM_SIZE 1 @@ -78,11 +74,6 @@ static void channel_gk20a_joblist_delete(struct channel_gk20a *c, static struct channel_gk20a_job *channel_gk20a_joblist_peek( struct channel_gk20a *c); -static int channel_gk20a_commit_userd(struct channel_gk20a *c); -static int channel_gk20a_setup_userd(struct channel_gk20a *c); - -static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a); - static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add); static void gk20a_free_error_notifiers(struct channel_gk20a *ch); @@ -159,34 +150,6 @@ int channel_gk20a_commit_va(struct channel_gk20a *c) return 0; } -static int channel_gk20a_commit_userd(struct channel_gk20a *c) -{ - u32 addr_lo; - u32 addr_hi; - struct gk20a *g = c->g; - - gk20a_dbg_fn(""); - - addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); - addr_hi = u64_hi32(c->userd_iova); - - gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", - c->hw_chid, (u64)c->userd_iova); - - gk20a_mem_wr32(g, &c->inst_block, - ram_in_ramfc_w() + ram_fc_userd_w(), - gk20a_aperture_mask(g, &g->fifo.userd, - pbdma_userd_target_sys_mem_ncoh_f(), - pbdma_userd_target_vid_mem_f()) | - pbdma_userd_addr_f(addr_lo)); - - gk20a_mem_wr32(g, &c->inst_block, - ram_in_ramfc_w() + ram_fc_userd_hi_w(), - pbdma_userd_hi_addr_f(addr_hi)); - - return 0; -} - int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, int timeslice_period, int *__timeslice_timeout, int *__timeslice_scale) @@ -215,255 +178,11 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, return 0; } -static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) -{ - int shift = 0, value = 0; - - gk20a_channel_get_timescale_from_timeslice(c->g, - c->timeslice_us, &value, &shift); - - /* disable channel */ - c->g->ops.fifo.disable_channel(c); - - /* preempt the channel */ - WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); - - /* set new timeslice */ - gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(), - value | (shift << 12) | - fifo_runlist_timeslice_enable_true_f()); - - /* enable channel */ - gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), - gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | - ccsr_channel_enable_set_true_f()); - - return 0; -} - -u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c) -{ - u32 val, exp, man; - u64 timeout; - unsigned int val_len; - - val = pbdma_acquire_retry_man_2_f() | - pbdma_acquire_retry_exp_2_f(); - - if (!c->g->timeouts_enabled || !c->wdt_enabled) - return val; - - timeout = gk20a_get_channel_watchdog_timeout(c); - timeout *= 80UL; - do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */ - timeout *= 1000000UL; /* ms -> ns */ - do_div(timeout, 1024); /* in unit of 1024ns */ - val_len = fls(timeout >> 32) + 32; - if (val_len == 32) - val_len = fls(timeout); - if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */ - exp = pbdma_acquire_timeout_exp_max_v(); - man = pbdma_acquire_timeout_man_max_v(); - } else if (val_len > 16) { - exp = val_len - 16; - man = timeout >> exp; - } else { - exp = 0; - man = timeout; - } - - val |= pbdma_acquire_timeout_exp_f(exp) | - pbdma_acquire_timeout_man_f(man) | - pbdma_acquire_timeout_en_enable_f(); - - return val; -} - -void gk20a_channel_setup_ramfc_for_privileged_channel(struct channel_gk20a *c) -{ - struct gk20a *g = c->g; - struct mem_desc *mem = &c->inst_block; - - gk20a_dbg_info("channel %d : set ramfc privileged_channel", c->hw_chid); - - /* Enable HCE priv mode for phys mode transfer */ - gk20a_mem_wr32(g, mem, ram_fc_hce_ctrl_w(), - pbdma_hce_ctrl_hce_priv_mode_yes_f()); -} - -int channel_gk20a_setup_ramfc(struct channel_gk20a *c, - u64 gpfifo_base, u32 gpfifo_entries, u32 flags) -{ - struct gk20a *g = c->g; - struct mem_desc *mem = &c->inst_block; - - gk20a_dbg_fn(""); - - gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v()); - - gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(), - pbdma_gp_base_offset_f( - u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); - - gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), - pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | - pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); - - gk20a_mem_wr32(g, mem, ram_fc_signature_w(), - c->g->ops.fifo.get_pbdma_signature(c->g)); - - gk20a_mem_wr32(g, mem, ram_fc_formats_w(), - pbdma_formats_gp_fermi0_f() | - pbdma_formats_pb_fermi1_f() | - pbdma_formats_mp_fermi0_f()); - - gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(), - pbdma_pb_header_priv_user_f() | - pbdma_pb_header_method_zero_f() | - pbdma_pb_header_subchannel_zero_f() | - pbdma_pb_header_level_main_f() | - pbdma_pb_header_first_true_f() | - pbdma_pb_header_type_inc_f()); - - gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(), - pbdma_subdevice_id_f(1) | - pbdma_subdevice_status_active_f() | - pbdma_subdevice_channel_dma_enable_f()); - - gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); - - gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), - channel_gk20a_pbdma_acquire_val(c)); - - gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), - fifo_runlist_timeslice_timeout_128_f() | - fifo_runlist_timeslice_timescale_3_f() | - fifo_runlist_timeslice_enable_true_f()); - - gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(), - fifo_pb_timeslice_timeout_16_f() | - fifo_pb_timeslice_timescale_0_f() | - fifo_pb_timeslice_enable_true_f()); - - gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); - - if (c->is_privileged_channel) - gk20a_channel_setup_ramfc_for_privileged_channel(c); - - return channel_gk20a_commit_userd(c); -} - -static int channel_gk20a_setup_userd(struct channel_gk20a *c) -{ - struct gk20a *g = c->g; - struct mem_desc *mem = &g->fifo.userd; - u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32); - - gk20a_dbg_fn(""); - - gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0); - gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0); - - return 0; -} - -static void channel_gk20a_bind(struct channel_gk20a *c) -{ - struct gk20a *g = c->g; - u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) - >> ram_in_base_shift_v(); - - gk20a_dbg_info("bind channel %d inst ptr 0x%08x", - c->hw_chid, inst_ptr); - - - gk20a_writel(g, ccsr_channel_r(c->hw_chid), - (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) & - ~ccsr_channel_runlist_f(~0)) | - ccsr_channel_runlist_f(c->runlist_id)); - - gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), - ccsr_channel_inst_ptr_f(inst_ptr) | - gk20a_aperture_mask(g, &c->inst_block, - ccsr_channel_inst_target_sys_mem_ncoh_f(), - ccsr_channel_inst_target_vid_mem_f()) | - ccsr_channel_inst_bind_true_f()); - - gk20a_writel(g, ccsr_channel_r(c->hw_chid), - (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) & - ~ccsr_channel_enable_set_f(~0)) | - ccsr_channel_enable_set_true_f()); - - wmb(); - atomic_set(&c->bound, true); - -} - -void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) -{ - struct gk20a *g = ch_gk20a->g; - - gk20a_dbg_fn(""); - - if (atomic_cmpxchg(&ch_gk20a->bound, true, false)) { - gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), - ccsr_channel_inst_ptr_f(0) | - ccsr_channel_inst_bind_false_f()); - } -} - -int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) -{ - int err; - - gk20a_dbg_fn(""); - - err = gk20a_alloc_inst_block(g, &ch->inst_block); - if (err) - return err; - - gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", - ch->hw_chid, gk20a_mm_inst_block_addr(g, &ch->inst_block)); - - gk20a_dbg_fn("done"); - return 0; -} - -void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch) -{ - gk20a_free_inst_block(g, &ch->inst_block); -} - static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) { return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->hw_chid, add, true); } -void channel_gk20a_enable(struct channel_gk20a *ch) -{ - /* enable channel */ - gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), - gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) | - ccsr_channel_enable_set_true_f()); -} - -void channel_gk20a_disable(struct channel_gk20a *ch) -{ - /* disable channel */ - gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), - gk20a_readl(ch->g, - ccsr_channel_r(ch->hw_chid)) | - ccsr_channel_enable_clr_true_f()); -} - int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) { struct tsg_gk20a *tsg; @@ -991,8 +710,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) gk20a_gr_flush_channel_tlb(gr); - memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); - gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); nvgpu_big_free(g, ch->gpfifo.pipe); memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); @@ -1834,6 +1551,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, struct vm_gk20a *ch_vm; u32 gpfifo_size; int err = 0; + unsigned long acquire_timeout; gpfifo_size = args->num_entries; @@ -1852,9 +1570,6 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, } ch_vm = c->vm; - c->ramfc.offset = 0; - c->ramfc.size = ram_in_ramfc_s() / 8; - if (c->gpfifo.mem.size) { gk20a_err(d, "channel %d :" "gpfifo already allocated", c->hw_chid); @@ -1884,7 +1599,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num); - channel_gk20a_setup_userd(c); + g->ops.fifo.setup_userd(c); if (!platform->aggressive_sync_destroy_thresh) { nvgpu_mutex_acquire(&c->sync_lock); @@ -1903,8 +1618,14 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, } } + if (!c->g->timeouts_enabled || !c->wdt_enabled) + acquire_timeout = 0; + else + acquire_timeout = gk20a_get_channel_watchdog_timeout(c); + err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, - c->gpfifo.entry_num, args->flags); + c->gpfifo.entry_num, + acquire_timeout, args->flags); if (err) goto clean_up_sync; @@ -1949,19 +1670,6 @@ clean_up: return err; } -u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c) -{ - return gk20a_bar1_readl(g, - c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w()); -} - -void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c) -{ - gk20a_bar1_writel(g, - c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(), - c->gpfifo.put); -} - /* Update with this periodically to determine how the gpfifo is draining. */ static inline u32 update_gp_get(struct gk20a *g, struct channel_gk20a *c) @@ -2093,7 +1801,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) { - ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch); + ch->timeout.gp_get = ch->g->ops.fifo.userd_gp_get(ch->g, ch); ch->timeout.running = true; nvgpu_timeout_init(ch->g, &ch->timeout.timer, gk20a_get_channel_watchdog_timeout(ch), @@ -2225,7 +1933,7 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) ch->timeout.running = false; nvgpu_raw_spinlock_release(&ch->timeout.lock); - if (gk20a_userd_gp_get(ch->g, ch) != gp_get) { + if (g->ops.fifo.userd_gp_get(ch->g, ch) != gp_get) { /* Channel has advanced, reschedule */ gk20a_channel_timeout_start(ch); return; @@ -3693,55 +3401,6 @@ static int gk20a_channel_event_id_ctrl(struct channel_gk20a *ch, return err; } -int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) -{ - if (gk20a_is_channel_marked_as_tsg(ch)) { - gk20a_err(dev_from_gk20a(ch->g), - "invalid operation for TSG!\n"); - return -EINVAL; - } - - /* set priority of graphics channel */ - switch (priority) { - case NVGPU_PRIORITY_LOW: - ch->timeslice_us = ch->g->timeslice_low_priority_us; - break; - case NVGPU_PRIORITY_MEDIUM: - ch->timeslice_us = ch->g->timeslice_medium_priority_us; - break; - case NVGPU_PRIORITY_HIGH: - ch->timeslice_us = ch->g->timeslice_high_priority_us; - break; - default: - pr_err("Unsupported priority"); - return -EINVAL; - } - - return channel_gk20a_set_schedule_params(ch); -} - -int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice) -{ - struct gk20a *g = ch->g; - - if (gk20a_is_channel_marked_as_tsg(ch)) { - gk20a_err(dev_from_gk20a(ch->g), - "invalid operation for TSG!\n"); - return -EINVAL; - } - - if (timeslice < g->min_timeslice_us || - timeslice > g->max_timeslice_us) - return -EINVAL; - - ch->timeslice_us = timeslice; - - gk20a_dbg(gpu_dbg_sched, "chid=%u timeslice=%u us", - ch->hw_chid, timeslice); - - return channel_gk20a_set_schedule_params(ch); -} - static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, struct nvgpu_zcull_bind_args *args) { @@ -3924,21 +3583,6 @@ clean_up: return ret; } -void gk20a_init_channel(struct gpu_ops *gops) -{ - gops->fifo.bind_channel = channel_gk20a_bind; - gops->fifo.unbind_channel = channel_gk20a_unbind; - gops->fifo.disable_channel = channel_gk20a_disable; - gops->fifo.enable_channel = channel_gk20a_enable; - gops->fifo.alloc_inst = channel_gk20a_alloc_inst; - gops->fifo.free_inst = channel_gk20a_free_inst; - gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; - gops->fifo.channel_set_priority = gk20a_channel_set_priority; - gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice; - gops->fifo.userd_gp_get = gk20a_userd_gp_get; - gops->fifo.userd_gp_put = gk20a_userd_gp_put; -} - long gk20a_channel_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 42550632..d530f47d 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -201,7 +201,6 @@ struct channel_gk20a { struct channel_ctx_gk20a ch_ctx; struct mem_desc inst_block; - struct mem_desc_sub ramfc; u64 userd_iova; u64 userd_gpu_va; @@ -314,8 +313,6 @@ int gk20a_channel_release(struct inode *inode, struct file *filp); struct channel_gk20a *gk20a_get_channel_from_file(int fd); void gk20a_channel_update(struct channel_gk20a *c); -void gk20a_init_channel(struct gpu_ops *gops); - /* returns ch if reference was obtained */ struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch, const char *caller); @@ -336,7 +333,6 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, void *update_fn_data, int runlist_id, bool is_privileged_channel); -void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, struct nvgpu_gpfifo *gpfifo, @@ -351,14 +347,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, struct nvgpu_alloc_gpfifo_ex_args *args); -void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); -void channel_gk20a_disable(struct channel_gk20a *ch); -int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch); -void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch); -u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c); -int channel_gk20a_setup_ramfc(struct channel_gk20a *c, - u64 gpfifo_base, u32 gpfifo_entries, u32 flags); -void channel_gk20a_enable(struct channel_gk20a *ch); void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c); @@ -369,13 +357,9 @@ bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c); int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, int timeslice_period, int *__timeslice_timeout, int *__timeslice_scale); -int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority); -int gk20a_channel_set_timeslice(struct channel_gk20a *ch, unsigned int timeslice); int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, u32 level); void gk20a_channel_event_id_post_event(struct channel_gk20a *ch, u32 event_id); -void gk20a_channel_setup_ramfc_for_privileged_channel(struct channel_gk20a *c); - #endif /* CHANNEL_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index fdac40de..743bc1f5 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -3852,9 +3852,342 @@ void gk20a_dump_eng_status(struct gk20a *g, gk20a_debug_output(o, "\n"); } +void gk20a_fifo_enable_channel(struct channel_gk20a *ch) +{ + gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), + gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) | + ccsr_channel_enable_set_true_f()); +} + +void gk20a_fifo_disable_channel(struct channel_gk20a *ch) +{ + gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid), + gk20a_readl(ch->g, + ccsr_channel_r(ch->hw_chid)) | + ccsr_channel_enable_clr_true_f()); +} + +static void gk20a_fifo_channel_bind(struct channel_gk20a *c) +{ + struct gk20a *g = c->g; + u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) >> + ram_in_base_shift_v(); + + gk20a_dbg_info("bind channel %d inst ptr 0x%08x", + c->hw_chid, inst_ptr); + + + gk20a_writel(g, ccsr_channel_r(c->hw_chid), + (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) & + ~ccsr_channel_runlist_f(~0)) | + ccsr_channel_runlist_f(c->runlist_id)); + + gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), + ccsr_channel_inst_ptr_f(inst_ptr) | + gk20a_aperture_mask(g, &c->inst_block, + ccsr_channel_inst_target_sys_mem_ncoh_f(), + ccsr_channel_inst_target_vid_mem_f()) | + ccsr_channel_inst_bind_true_f()); + + gk20a_writel(g, ccsr_channel_r(c->hw_chid), + (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) & + ~ccsr_channel_enable_set_f(~0)) | + ccsr_channel_enable_set_true_f()); + + wmb(); + atomic_set(&c->bound, true); + +} + +void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a) +{ + struct gk20a *g = ch_gk20a->g; + + gk20a_dbg_fn(""); + + if (atomic_cmpxchg(&ch_gk20a->bound, true, false)) { + gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), + ccsr_channel_inst_ptr_f(0) | + ccsr_channel_inst_bind_false_f()); + } +} + +static int gk20a_fifo_commit_userd(struct channel_gk20a *c) +{ + u32 addr_lo; + u32 addr_hi; + struct gk20a *g = c->g; + + gk20a_dbg_fn(""); + + addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); + addr_hi = u64_hi32(c->userd_iova); + + gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", + c->hw_chid, (u64)c->userd_iova); + + gk20a_mem_wr32(g, &c->inst_block, + ram_in_ramfc_w() + ram_fc_userd_w(), + gk20a_aperture_mask(g, &g->fifo.userd, + pbdma_userd_target_sys_mem_ncoh_f(), + pbdma_userd_target_vid_mem_f()) | + pbdma_userd_addr_f(addr_lo)); + + gk20a_mem_wr32(g, &c->inst_block, + ram_in_ramfc_w() + ram_fc_userd_hi_w(), + pbdma_userd_hi_addr_f(addr_hi)); + + return 0; +} + +int gk20a_fifo_setup_ramfc(struct channel_gk20a *c, + u64 gpfifo_base, u32 gpfifo_entries, + unsigned long timeout, + u32 flags) +{ + struct gk20a *g = c->g; + struct mem_desc *mem = &c->inst_block; + + gk20a_dbg_fn(""); + + gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v()); + + gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(), + pbdma_gp_base_offset_f( + u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); + + gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), + pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | + pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); + + gk20a_mem_wr32(g, mem, ram_fc_signature_w(), + c->g->ops.fifo.get_pbdma_signature(c->g)); + + gk20a_mem_wr32(g, mem, ram_fc_formats_w(), + pbdma_formats_gp_fermi0_f() | + pbdma_formats_pb_fermi1_f() | + pbdma_formats_mp_fermi0_f()); + + gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(), + pbdma_pb_header_priv_user_f() | + pbdma_pb_header_method_zero_f() | + pbdma_pb_header_subchannel_zero_f() | + pbdma_pb_header_level_main_f() | + pbdma_pb_header_first_true_f() | + pbdma_pb_header_type_inc_f()); + + gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(), + pbdma_subdevice_id_f(1) | + pbdma_subdevice_status_active_f() | + pbdma_subdevice_channel_dma_enable_f()); + + gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); + + gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), + g->ops.fifo.pbdma_acquire_val(timeout)); + + gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), + fifo_runlist_timeslice_timeout_128_f() | + fifo_runlist_timeslice_timescale_3_f() | + fifo_runlist_timeslice_enable_true_f()); + + gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(), + fifo_pb_timeslice_timeout_16_f() | + fifo_pb_timeslice_timescale_0_f() | + fifo_pb_timeslice_enable_true_f()); + + gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); + + if (c->is_privileged_channel) + gk20a_fifo_setup_ramfc_for_privileged_channel(c); + + return gk20a_fifo_commit_userd(c); +} + +static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) +{ + int shift = 0, value = 0; + + gk20a_channel_get_timescale_from_timeslice(c->g, + c->timeslice_us, &value, &shift); + + /* disable channel */ + c->g->ops.fifo.disable_channel(c); + + /* preempt the channel */ + WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); + + /* set new timeslice */ + gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(), + value | (shift << 12) | + fifo_runlist_timeslice_enable_true_f()); + + /* enable channel */ + c->g->ops.fifo.enable_channel(c); + + return 0; +} + +int gk20a_fifo_set_timeslice(struct channel_gk20a *ch, u32 timeslice) +{ + struct gk20a *g = ch->g; + + if (gk20a_is_channel_marked_as_tsg(ch)) { + gk20a_err(dev_from_gk20a(ch->g), + "invalid operation for TSG!\n"); + return -EINVAL; + } + + if (timeslice < g->min_timeslice_us || + timeslice > g->max_timeslice_us) + return -EINVAL; + + ch->timeslice_us = timeslice; + + gk20a_dbg(gpu_dbg_sched, "chid=%u timeslice=%u us", + ch->hw_chid, timeslice); + + return channel_gk20a_set_schedule_params(ch); +} + +int gk20a_fifo_set_priority(struct channel_gk20a *ch, u32 priority) +{ + if (gk20a_is_channel_marked_as_tsg(ch)) { + gk20a_err(dev_from_gk20a(ch->g), + "invalid operation for TSG!\n"); + return -EINVAL; + } + + /* set priority of graphics channel */ + switch (priority) { + case NVGPU_PRIORITY_LOW: + ch->timeslice_us = ch->g->timeslice_low_priority_us; + break; + case NVGPU_PRIORITY_MEDIUM: + ch->timeslice_us = ch->g->timeslice_medium_priority_us; + break; + case NVGPU_PRIORITY_HIGH: + ch->timeslice_us = ch->g->timeslice_high_priority_us; + break; + default: + pr_err("Unsupported priority"); + return -EINVAL; + } + + return channel_gk20a_set_schedule_params(ch); +} + +void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c) +{ + struct gk20a *g = c->g; + struct mem_desc *mem = &c->inst_block; + + gk20a_dbg_info("channel %d : set ramfc privileged_channel", c->hw_chid); + + /* Enable HCE priv mode for phys mode transfer */ + gk20a_mem_wr32(g, mem, ram_fc_hce_ctrl_w(), + pbdma_hce_ctrl_hce_priv_mode_yes_f()); +} + +int gk20a_fifo_setup_userd(struct channel_gk20a *c) +{ + struct gk20a *g = c->g; + struct mem_desc *mem = &g->fifo.userd; + u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32); + + gk20a_dbg_fn(""); + + gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0); + + return 0; +} + +int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) +{ + int err; + + gk20a_dbg_fn(""); + + err = gk20a_alloc_inst_block(g, &ch->inst_block); + if (err) + return err; + + gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", + ch->hw_chid, gk20a_mm_inst_block_addr(g, &ch->inst_block)); + + gk20a_dbg_fn("done"); + return 0; +} + +void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch) +{ + gk20a_free_inst_block(g, &ch->inst_block); +} + +u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c) +{ + return gk20a_bar1_readl(g, + c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w()); +} + +void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c) +{ + gk20a_bar1_writel(g, + c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(), + c->gpfifo.put); +} + +u32 gk20a_fifo_pbdma_acquire_val(u64 timeout) +{ + u32 val, exp, man; + unsigned int val_len; + + val = pbdma_acquire_retry_man_2_f() | + pbdma_acquire_retry_exp_2_f(); + + if (!timeout) + return val; + + timeout *= 80UL; + do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */ + timeout *= 1000000UL; /* ms -> ns */ + do_div(timeout, 1024); /* in unit of 1024ns */ + val_len = fls(timeout >> 32) + 32; + if (val_len == 32) + val_len = fls(timeout); + if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */ + exp = pbdma_acquire_timeout_exp_max_v(); + man = pbdma_acquire_timeout_man_max_v(); + } else if (val_len > 16) { + exp = val_len - 16; + man = timeout >> exp; + } else { + exp = 0; + man = timeout; + } + + val |= pbdma_acquire_timeout_exp_f(exp) | + pbdma_acquire_timeout_man_f(man) | + pbdma_acquire_timeout_en_enable_f(); + + return val; +} + void gk20a_init_fifo(struct gpu_ops *gops) { - gk20a_init_channel(gops); + gops->fifo.disable_channel = gk20a_fifo_disable_channel; + gops->fifo.enable_channel = gk20a_fifo_enable_channel; + gops->fifo.bind_channel = gk20a_fifo_channel_bind; + gops->fifo.unbind_channel = gk20a_fifo_channel_unbind; gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw; gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; @@ -3883,4 +4216,13 @@ void gk20a_init_fifo(struct gpu_ops *gops) gops->fifo.is_preempt_pending = gk20a_fifo_is_preempt_pending; gops->fifo.init_pbdma_intr_descs = gk20a_fifo_init_pbdma_intr_descs; gops->fifo.reset_enable_hw = gk20a_init_fifo_reset_enable_hw; + gops->fifo.setup_ramfc = gk20a_fifo_setup_ramfc; + gops->fifo.channel_set_priority = gk20a_fifo_set_priority; + gops->fifo.channel_set_timeslice = gk20a_fifo_set_timeslice; + gops->fifo.alloc_inst = gk20a_fifo_alloc_inst; + gops->fifo.free_inst = gk20a_fifo_free_inst; + gops->fifo.setup_userd = gk20a_fifo_setup_userd; + gops->fifo.userd_gp_get = gk20a_fifo_userd_gp_get; + gops->fifo.userd_gp_put = gk20a_fifo_userd_gp_put; + gops->fifo.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val; } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index a9703385..06269fa5 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -331,8 +331,8 @@ void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist); void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask, u32 runlist_state, int runlist_mutex_state); -u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c); -void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c); +u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c); +void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c); bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid); #ifdef CONFIG_DEBUG_FS @@ -351,8 +351,11 @@ void gk20a_dump_eng_status(struct gk20a *g, struct gk20a_debug_output *o); const char *gk20a_decode_ccsr_chan_status(u32 index); const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index); +void gk20a_fifo_enable_channel(struct channel_gk20a *ch); +void gk20a_fifo_disable_channel(struct channel_gk20a *ch); struct channel_gk20a *gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr); +void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a); u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g); @@ -361,5 +364,16 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, unsigned int id_type, int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg); void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, unsigned int id_type); +int gk20a_fifo_setup_ramfc(struct channel_gk20a *c, + u64 gpfifo_base, u32 gpfifo_entries, + unsigned long timeout, u32 flags); +int gk20a_fifo_set_priority(struct channel_gk20a *ch, u32 priority); +int gk20a_fifo_set_timeslice(struct channel_gk20a *ch, unsigned int timeslice); +void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c); +int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch); +void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch); +int gk20a_fifo_setup_userd(struct channel_gk20a *c); +u32 gk20a_fifo_pbdma_acquire_val(u64 timeout); + #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c8932d63..4f50ae36 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -398,7 +398,9 @@ struct gpu_ops { int (*alloc_inst)(struct gk20a *g, struct channel_gk20a *ch); void (*free_inst)(struct gk20a *g, struct channel_gk20a *ch); int (*setup_ramfc)(struct channel_gk20a *c, u64 gpfifo_base, - u32 gpfifo_entries, u32 flags); + u32 gpfifo_entries, + unsigned long acquire_timeout, + u32 flags); int (*resetup_ramfc)(struct channel_gk20a *c); int (*preempt_channel)(struct gk20a *g, u32 hw_chid); int (*preempt_tsg)(struct gk20a *g, u32 tsgid); @@ -456,6 +458,8 @@ struct gpu_ops { unsigned int id_type, unsigned int timeout_rc_type); void (*init_pbdma_intr_descs)(struct fifo_gk20a *f); int (*reset_enable_hw)(struct gk20a *g); + int (*setup_userd)(struct channel_gk20a *c); + u32 (*pbdma_acquire_val)(u64 timeout); } fifo; struct pmu_v { /*used for change of enum zbc update cmd id from ver 0 to ver1*/ diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index f09da825..fc653357 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -188,16 +188,18 @@ void gm20b_init_fifo(struct gpu_ops *gops) { gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw; gops->fifo.bind_channel = channel_gm20b_bind; - gops->fifo.unbind_channel = channel_gk20a_unbind; - gops->fifo.disable_channel = channel_gk20a_disable; - gops->fifo.enable_channel = channel_gk20a_enable; - gops->fifo.alloc_inst = channel_gk20a_alloc_inst; - gops->fifo.free_inst = channel_gk20a_free_inst; - gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; - gops->fifo.channel_set_priority = gk20a_channel_set_priority; - gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice; - gops->fifo.userd_gp_get = gk20a_userd_gp_get; - gops->fifo.userd_gp_put = gk20a_userd_gp_put; + gops->fifo.unbind_channel = gk20a_fifo_channel_unbind; + gops->fifo.disable_channel = gk20a_fifo_disable_channel; + gops->fifo.enable_channel = gk20a_fifo_enable_channel; + gops->fifo.alloc_inst = gk20a_fifo_alloc_inst; + gops->fifo.free_inst = gk20a_fifo_free_inst; + gops->fifo.setup_ramfc = gk20a_fifo_setup_ramfc; + gops->fifo.channel_set_priority = gk20a_fifo_set_priority; + gops->fifo.channel_set_timeslice = gk20a_fifo_set_timeslice; + gops->fifo.setup_userd = gk20a_fifo_setup_userd; + gops->fifo.userd_gp_get = gk20a_fifo_userd_gp_get; + gops->fifo.userd_gp_put = gk20a_fifo_userd_gp_put; + gops->fifo.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val; gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c index 77ea1b47..6f576e3f 100644 --- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c @@ -80,7 +80,8 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c) } static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, - u64 gpfifo_base, u32 gpfifo_entries, u32 flags) + u64 gpfifo_base, u32 gpfifo_entries, + unsigned long acquire_timeout, u32 flags) { struct gk20a *g = c->g; struct mem_desc *mem = &c->inst_block; @@ -121,7 +122,7 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), - channel_gk20a_pbdma_acquire_val(c)); + g->ops.fifo.pbdma_acquire_val(acquire_timeout)); gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), pbdma_runlist_timeslice_timeout_128_f() | @@ -139,7 +140,7 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, gk20a_mem_wr32(g, mem, ram_fc_config_w(), pbdma_config_auth_level_privileged_f()); - gk20a_channel_setup_ramfc_for_privileged_channel(c); + gk20a_fifo_setup_ramfc_for_privileged_channel(c); } return channel_gp10b_commit_userd(c); diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 0c93a2ed..497f8c91 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -134,7 +134,8 @@ static void vgpu_channel_disable(struct channel_gk20a *ch) } static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base, - u32 gpfifo_entries, u32 flags) + u32 gpfifo_entries, + unsigned long acquire_timeout, u32 flags) { struct device __maybe_unused *d = dev_from_gk20a(ch->g); struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); -- cgit v1.2.2