summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-03-14 16:39:59 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-28 18:55:48 -0400
commitf04031e5e8837abb2be3feb0ee30e1af54de7845 (patch)
tree02a61de9f9a283a2c0fb02d7a204d2dd2176f5ff /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
parent3e39798997f0726472e18a17462216094c084074 (diff)
gpu: nvgpu: Move programming of host registers to fifo
Move code that touches host registers and instance block to fifo HAL. This involves adding HAL ops for the fifo HAL functions that get called from outside fifo. This clears responsibility of channel by leaving it only managing channels in software and push buffers. channel had member ramfc defined, but it was not used, to remove it. pbdma_acquire_val consisted both of channel logic and hardware programming. The channel logic was moved to the caller and only hardware programming was moved. Change-Id: Id005787f6cc91276b767e8e86325caf966913de9 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1322423 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c344
1 files changed, 343 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index fdac40de..743bc1f5 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3852,9 +3852,342 @@ void gk20a_dump_eng_status(struct gk20a *g,
3852 gk20a_debug_output(o, "\n"); 3852 gk20a_debug_output(o, "\n");
3853} 3853}
3854 3854
3855void gk20a_fifo_enable_channel(struct channel_gk20a *ch)
3856{
3857 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
3858 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
3859 ccsr_channel_enable_set_true_f());
3860}
3861
3862void gk20a_fifo_disable_channel(struct channel_gk20a *ch)
3863{
3864 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
3865 gk20a_readl(ch->g,
3866 ccsr_channel_r(ch->hw_chid)) |
3867 ccsr_channel_enable_clr_true_f());
3868}
3869
3870static void gk20a_fifo_channel_bind(struct channel_gk20a *c)
3871{
3872 struct gk20a *g = c->g;
3873 u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) >>
3874 ram_in_base_shift_v();
3875
3876 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
3877 c->hw_chid, inst_ptr);
3878
3879
3880 gk20a_writel(g, ccsr_channel_r(c->hw_chid),
3881 (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
3882 ~ccsr_channel_runlist_f(~0)) |
3883 ccsr_channel_runlist_f(c->runlist_id));
3884
3885 gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
3886 ccsr_channel_inst_ptr_f(inst_ptr) |
3887 gk20a_aperture_mask(g, &c->inst_block,
3888 ccsr_channel_inst_target_sys_mem_ncoh_f(),
3889 ccsr_channel_inst_target_vid_mem_f()) |
3890 ccsr_channel_inst_bind_true_f());
3891
3892 gk20a_writel(g, ccsr_channel_r(c->hw_chid),
3893 (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
3894 ~ccsr_channel_enable_set_f(~0)) |
3895 ccsr_channel_enable_set_true_f());
3896
3897 wmb();
3898 atomic_set(&c->bound, true);
3899
3900}
3901
3902void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a)
3903{
3904 struct gk20a *g = ch_gk20a->g;
3905
3906 gk20a_dbg_fn("");
3907
3908 if (atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
3909 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
3910 ccsr_channel_inst_ptr_f(0) |
3911 ccsr_channel_inst_bind_false_f());
3912 }
3913}
3914
3915static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
3916{
3917 u32 addr_lo;
3918 u32 addr_hi;
3919 struct gk20a *g = c->g;
3920
3921 gk20a_dbg_fn("");
3922
3923 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
3924 addr_hi = u64_hi32(c->userd_iova);
3925
3926 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
3927 c->hw_chid, (u64)c->userd_iova);
3928
3929 gk20a_mem_wr32(g, &c->inst_block,
3930 ram_in_ramfc_w() + ram_fc_userd_w(),
3931 gk20a_aperture_mask(g, &g->fifo.userd,
3932 pbdma_userd_target_sys_mem_ncoh_f(),
3933 pbdma_userd_target_vid_mem_f()) |
3934 pbdma_userd_addr_f(addr_lo));
3935
3936 gk20a_mem_wr32(g, &c->inst_block,
3937 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
3938 pbdma_userd_hi_addr_f(addr_hi));
3939
3940 return 0;
3941}
3942
3943int gk20a_fifo_setup_ramfc(struct channel_gk20a *c,
3944 u64 gpfifo_base, u32 gpfifo_entries,
3945 unsigned long timeout,
3946 u32 flags)
3947{
3948 struct gk20a *g = c->g;
3949 struct mem_desc *mem = &c->inst_block;
3950
3951 gk20a_dbg_fn("");
3952
3953 gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
3954
3955 gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
3956 pbdma_gp_base_offset_f(
3957 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
3958
3959 gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
3960 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
3961 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
3962
3963 gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
3964 c->g->ops.fifo.get_pbdma_signature(c->g));
3965
3966 gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
3967 pbdma_formats_gp_fermi0_f() |
3968 pbdma_formats_pb_fermi1_f() |
3969 pbdma_formats_mp_fermi0_f());
3970
3971 gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
3972 pbdma_pb_header_priv_user_f() |
3973 pbdma_pb_header_method_zero_f() |
3974 pbdma_pb_header_subchannel_zero_f() |
3975 pbdma_pb_header_level_main_f() |
3976 pbdma_pb_header_first_true_f() |
3977 pbdma_pb_header_type_inc_f());
3978
3979 gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
3980 pbdma_subdevice_id_f(1) |
3981 pbdma_subdevice_status_active_f() |
3982 pbdma_subdevice_channel_dma_enable_f());
3983
3984 gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
3985
3986 gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
3987 g->ops.fifo.pbdma_acquire_val(timeout));
3988
3989 gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
3990 fifo_runlist_timeslice_timeout_128_f() |
3991 fifo_runlist_timeslice_timescale_3_f() |
3992 fifo_runlist_timeslice_enable_true_f());
3993
3994 gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
3995 fifo_pb_timeslice_timeout_16_f() |
3996 fifo_pb_timeslice_timescale_0_f() |
3997 fifo_pb_timeslice_enable_true_f());
3998
3999 gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
4000
4001 if (c->is_privileged_channel)
4002 gk20a_fifo_setup_ramfc_for_privileged_channel(c);
4003
4004 return gk20a_fifo_commit_userd(c);
4005}
4006
4007static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
4008{
4009 int shift = 0, value = 0;
4010
4011 gk20a_channel_get_timescale_from_timeslice(c->g,
4012 c->timeslice_us, &value, &shift);
4013
4014 /* disable channel */
4015 c->g->ops.fifo.disable_channel(c);
4016
4017 /* preempt the channel */
4018 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
4019
4020 /* set new timeslice */
4021 gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(),
4022 value | (shift << 12) |
4023 fifo_runlist_timeslice_enable_true_f());
4024
4025 /* enable channel */
4026 c->g->ops.fifo.enable_channel(c);
4027
4028 return 0;
4029}
4030
4031int gk20a_fifo_set_timeslice(struct channel_gk20a *ch, u32 timeslice)
4032{
4033 struct gk20a *g = ch->g;
4034
4035 if (gk20a_is_channel_marked_as_tsg(ch)) {
4036 gk20a_err(dev_from_gk20a(ch->g),
4037 "invalid operation for TSG!\n");
4038 return -EINVAL;
4039 }
4040
4041 if (timeslice < g->min_timeslice_us ||
4042 timeslice > g->max_timeslice_us)
4043 return -EINVAL;
4044
4045 ch->timeslice_us = timeslice;
4046
4047 gk20a_dbg(gpu_dbg_sched, "chid=%u timeslice=%u us",
4048 ch->hw_chid, timeslice);
4049
4050 return channel_gk20a_set_schedule_params(ch);
4051}
4052
4053int gk20a_fifo_set_priority(struct channel_gk20a *ch, u32 priority)
4054{
4055 if (gk20a_is_channel_marked_as_tsg(ch)) {
4056 gk20a_err(dev_from_gk20a(ch->g),
4057 "invalid operation for TSG!\n");
4058 return -EINVAL;
4059 }
4060
4061 /* set priority of graphics channel */
4062 switch (priority) {
4063 case NVGPU_PRIORITY_LOW:
4064 ch->timeslice_us = ch->g->timeslice_low_priority_us;
4065 break;
4066 case NVGPU_PRIORITY_MEDIUM:
4067 ch->timeslice_us = ch->g->timeslice_medium_priority_us;
4068 break;
4069 case NVGPU_PRIORITY_HIGH:
4070 ch->timeslice_us = ch->g->timeslice_high_priority_us;
4071 break;
4072 default:
4073 pr_err("Unsupported priority");
4074 return -EINVAL;
4075 }
4076
4077 return channel_gk20a_set_schedule_params(ch);
4078}
4079
4080void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c)
4081{
4082 struct gk20a *g = c->g;
4083 struct mem_desc *mem = &c->inst_block;
4084
4085 gk20a_dbg_info("channel %d : set ramfc privileged_channel", c->hw_chid);
4086
4087 /* Enable HCE priv mode for phys mode transfer */
4088 gk20a_mem_wr32(g, mem, ram_fc_hce_ctrl_w(),
4089 pbdma_hce_ctrl_hce_priv_mode_yes_f());
4090}
4091
4092int gk20a_fifo_setup_userd(struct channel_gk20a *c)
4093{
4094 struct gk20a *g = c->g;
4095 struct mem_desc *mem = &g->fifo.userd;
4096 u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32);
4097
4098 gk20a_dbg_fn("");
4099
4100 gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
4101 gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
4102 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
4103 gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
4104 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
4105 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
4106 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
4107 gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
4108 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
4109 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
4110
4111 return 0;
4112}
4113
4114int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
4115{
4116 int err;
4117
4118 gk20a_dbg_fn("");
4119
4120 err = gk20a_alloc_inst_block(g, &ch->inst_block);
4121 if (err)
4122 return err;
4123
4124 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
4125 ch->hw_chid, gk20a_mm_inst_block_addr(g, &ch->inst_block));
4126
4127 gk20a_dbg_fn("done");
4128 return 0;
4129}
4130
4131void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
4132{
4133 gk20a_free_inst_block(g, &ch->inst_block);
4134}
4135
4136u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
4137{
4138 return gk20a_bar1_readl(g,
4139 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
4140}
4141
4142void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
4143{
4144 gk20a_bar1_writel(g,
4145 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
4146 c->gpfifo.put);
4147}
4148
4149u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
4150{
4151 u32 val, exp, man;
4152 unsigned int val_len;
4153
4154 val = pbdma_acquire_retry_man_2_f() |
4155 pbdma_acquire_retry_exp_2_f();
4156
4157 if (!timeout)
4158 return val;
4159
4160 timeout *= 80UL;
4161 do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */
4162 timeout *= 1000000UL; /* ms -> ns */
4163 do_div(timeout, 1024); /* in unit of 1024ns */
4164 val_len = fls(timeout >> 32) + 32;
4165 if (val_len == 32)
4166 val_len = fls(timeout);
4167 if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */
4168 exp = pbdma_acquire_timeout_exp_max_v();
4169 man = pbdma_acquire_timeout_man_max_v();
4170 } else if (val_len > 16) {
4171 exp = val_len - 16;
4172 man = timeout >> exp;
4173 } else {
4174 exp = 0;
4175 man = timeout;
4176 }
4177
4178 val |= pbdma_acquire_timeout_exp_f(exp) |
4179 pbdma_acquire_timeout_man_f(man) |
4180 pbdma_acquire_timeout_en_enable_f();
4181
4182 return val;
4183}
4184
3855void gk20a_init_fifo(struct gpu_ops *gops) 4185void gk20a_init_fifo(struct gpu_ops *gops)
3856{ 4186{
3857 gk20a_init_channel(gops); 4187 gops->fifo.disable_channel = gk20a_fifo_disable_channel;
4188 gops->fifo.enable_channel = gk20a_fifo_enable_channel;
4189 gops->fifo.bind_channel = gk20a_fifo_channel_bind;
4190 gops->fifo.unbind_channel = gk20a_fifo_channel_unbind;
3858 gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw; 4191 gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
3859 gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; 4192 gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
3860 gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; 4193 gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;
@@ -3883,4 +4216,13 @@ void gk20a_init_fifo(struct gpu_ops *gops)
3883 gops->fifo.is_preempt_pending = gk20a_fifo_is_preempt_pending; 4216 gops->fifo.is_preempt_pending = gk20a_fifo_is_preempt_pending;
3884 gops->fifo.init_pbdma_intr_descs = gk20a_fifo_init_pbdma_intr_descs; 4217 gops->fifo.init_pbdma_intr_descs = gk20a_fifo_init_pbdma_intr_descs;
3885 gops->fifo.reset_enable_hw = gk20a_init_fifo_reset_enable_hw; 4218 gops->fifo.reset_enable_hw = gk20a_init_fifo_reset_enable_hw;
4219 gops->fifo.setup_ramfc = gk20a_fifo_setup_ramfc;
4220 gops->fifo.channel_set_priority = gk20a_fifo_set_priority;
4221 gops->fifo.channel_set_timeslice = gk20a_fifo_set_timeslice;
4222 gops->fifo.alloc_inst = gk20a_fifo_alloc_inst;
4223 gops->fifo.free_inst = gk20a_fifo_free_inst;
4224 gops->fifo.setup_userd = gk20a_fifo_setup_userd;
4225 gops->fifo.userd_gp_get = gk20a_fifo_userd_gp_get;
4226 gops->fifo.userd_gp_put = gk20a_fifo_userd_gp_put;
4227 gops->fifo.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val;
3886} 4228}