summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c344
1 files changed, 343 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index fdac40de..743bc1f5 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3852,9 +3852,342 @@ void gk20a_dump_eng_status(struct gk20a *g,
3852 gk20a_debug_output(o, "\n"); 3852 gk20a_debug_output(o, "\n");
3853} 3853}
3854 3854
3855void gk20a_fifo_enable_channel(struct channel_gk20a *ch)
3856{
3857 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
3858 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
3859 ccsr_channel_enable_set_true_f());
3860}
3861
3862void gk20a_fifo_disable_channel(struct channel_gk20a *ch)
3863{
3864 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
3865 gk20a_readl(ch->g,
3866 ccsr_channel_r(ch->hw_chid)) |
3867 ccsr_channel_enable_clr_true_f());
3868}
3869
3870static void gk20a_fifo_channel_bind(struct channel_gk20a *c)
3871{
3872 struct gk20a *g = c->g;
3873 u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) >>
3874 ram_in_base_shift_v();
3875
3876 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
3877 c->hw_chid, inst_ptr);
3878
3879
3880 gk20a_writel(g, ccsr_channel_r(c->hw_chid),
3881 (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
3882 ~ccsr_channel_runlist_f(~0)) |
3883 ccsr_channel_runlist_f(c->runlist_id));
3884
3885 gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
3886 ccsr_channel_inst_ptr_f(inst_ptr) |
3887 gk20a_aperture_mask(g, &c->inst_block,
3888 ccsr_channel_inst_target_sys_mem_ncoh_f(),
3889 ccsr_channel_inst_target_vid_mem_f()) |
3890 ccsr_channel_inst_bind_true_f());
3891
3892 gk20a_writel(g, ccsr_channel_r(c->hw_chid),
3893 (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
3894 ~ccsr_channel_enable_set_f(~0)) |
3895 ccsr_channel_enable_set_true_f());
3896
3897 wmb();
3898 atomic_set(&c->bound, true);
3899
3900}
3901
3902void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a)
3903{
3904 struct gk20a *g = ch_gk20a->g;
3905
3906 gk20a_dbg_fn("");
3907
3908 if (atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
3909 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
3910 ccsr_channel_inst_ptr_f(0) |
3911 ccsr_channel_inst_bind_false_f());
3912 }
3913}
3914
3915static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
3916{
3917 u32 addr_lo;
3918 u32 addr_hi;
3919 struct gk20a *g = c->g;
3920
3921 gk20a_dbg_fn("");
3922
3923 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
3924 addr_hi = u64_hi32(c->userd_iova);
3925
3926 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
3927 c->hw_chid, (u64)c->userd_iova);
3928
3929 gk20a_mem_wr32(g, &c->inst_block,
3930 ram_in_ramfc_w() + ram_fc_userd_w(),
3931 gk20a_aperture_mask(g, &g->fifo.userd,
3932 pbdma_userd_target_sys_mem_ncoh_f(),
3933 pbdma_userd_target_vid_mem_f()) |
3934 pbdma_userd_addr_f(addr_lo));
3935
3936 gk20a_mem_wr32(g, &c->inst_block,
3937 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
3938 pbdma_userd_hi_addr_f(addr_hi));
3939
3940 return 0;
3941}
3942
3943int gk20a_fifo_setup_ramfc(struct channel_gk20a *c,
3944 u64 gpfifo_base, u32 gpfifo_entries,
3945 unsigned long timeout,
3946 u32 flags)
3947{
3948 struct gk20a *g = c->g;
3949 struct mem_desc *mem = &c->inst_block;
3950
3951 gk20a_dbg_fn("");
3952
3953 gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
3954
3955 gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
3956 pbdma_gp_base_offset_f(
3957 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
3958
3959 gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
3960 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
3961 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
3962
3963 gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
3964 c->g->ops.fifo.get_pbdma_signature(c->g));
3965
3966 gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
3967 pbdma_formats_gp_fermi0_f() |
3968 pbdma_formats_pb_fermi1_f() |
3969 pbdma_formats_mp_fermi0_f());
3970
3971 gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
3972 pbdma_pb_header_priv_user_f() |
3973 pbdma_pb_header_method_zero_f() |
3974 pbdma_pb_header_subchannel_zero_f() |
3975 pbdma_pb_header_level_main_f() |
3976 pbdma_pb_header_first_true_f() |
3977 pbdma_pb_header_type_inc_f());
3978
3979 gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
3980 pbdma_subdevice_id_f(1) |
3981 pbdma_subdevice_status_active_f() |
3982 pbdma_subdevice_channel_dma_enable_f());
3983
3984 gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
3985
3986 gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
3987 g->ops.fifo.pbdma_acquire_val(timeout));
3988
3989 gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
3990 fifo_runlist_timeslice_timeout_128_f() |
3991 fifo_runlist_timeslice_timescale_3_f() |
3992 fifo_runlist_timeslice_enable_true_f());
3993
3994 gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
3995 fifo_pb_timeslice_timeout_16_f() |
3996 fifo_pb_timeslice_timescale_0_f() |
3997 fifo_pb_timeslice_enable_true_f());
3998
3999 gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
4000
4001 if (c->is_privileged_channel)
4002 gk20a_fifo_setup_ramfc_for_privileged_channel(c);
4003
4004 return gk20a_fifo_commit_userd(c);
4005}
4006
4007static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
4008{
4009 int shift = 0, value = 0;
4010
4011 gk20a_channel_get_timescale_from_timeslice(c->g,
4012 c->timeslice_us, &value, &shift);
4013
4014 /* disable channel */
4015 c->g->ops.fifo.disable_channel(c);
4016
4017 /* preempt the channel */
4018 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
4019
4020 /* set new timeslice */
4021 gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(),
4022 value | (shift << 12) |
4023 fifo_runlist_timeslice_enable_true_f());
4024
4025 /* enable channel */
4026 c->g->ops.fifo.enable_channel(c);
4027
4028 return 0;
4029}
4030
4031int gk20a_fifo_set_timeslice(struct channel_gk20a *ch, u32 timeslice)
4032{
4033 struct gk20a *g = ch->g;
4034
4035 if (gk20a_is_channel_marked_as_tsg(ch)) {
4036 gk20a_err(dev_from_gk20a(ch->g),
4037 "invalid operation for TSG!\n");
4038 return -EINVAL;
4039 }
4040
4041 if (timeslice < g->min_timeslice_us ||
4042 timeslice > g->max_timeslice_us)
4043 return -EINVAL;
4044
4045 ch->timeslice_us = timeslice;
4046
4047 gk20a_dbg(gpu_dbg_sched, "chid=%u timeslice=%u us",
4048 ch->hw_chid, timeslice);
4049
4050 return channel_gk20a_set_schedule_params(ch);
4051}
4052
4053int gk20a_fifo_set_priority(struct channel_gk20a *ch, u32 priority)
4054{
4055 if (gk20a_is_channel_marked_as_tsg(ch)) {
4056 gk20a_err(dev_from_gk20a(ch->g),
4057 "invalid operation for TSG!\n");
4058 return -EINVAL;
4059 }
4060
4061 /* set priority of graphics channel */
4062 switch (priority) {
4063 case NVGPU_PRIORITY_LOW:
4064 ch->timeslice_us = ch->g->timeslice_low_priority_us;
4065 break;
4066 case NVGPU_PRIORITY_MEDIUM:
4067 ch->timeslice_us = ch->g->timeslice_medium_priority_us;
4068 break;
4069 case NVGPU_PRIORITY_HIGH:
4070 ch->timeslice_us = ch->g->timeslice_high_priority_us;
4071 break;
4072 default:
4073 pr_err("Unsupported priority");
4074 return -EINVAL;
4075 }
4076
4077 return channel_gk20a_set_schedule_params(ch);
4078}
4079
4080void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c)
4081{
4082 struct gk20a *g = c->g;
4083 struct mem_desc *mem = &c->inst_block;
4084
4085 gk20a_dbg_info("channel %d : set ramfc privileged_channel", c->hw_chid);
4086
4087 /* Enable HCE priv mode for phys mode transfer */
4088 gk20a_mem_wr32(g, mem, ram_fc_hce_ctrl_w(),
4089 pbdma_hce_ctrl_hce_priv_mode_yes_f());
4090}
4091
4092int gk20a_fifo_setup_userd(struct channel_gk20a *c)
4093{
4094 struct gk20a *g = c->g;
4095 struct mem_desc *mem = &g->fifo.userd;
4096 u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32);
4097
4098 gk20a_dbg_fn("");
4099
4100 gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
4101 gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
4102 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
4103 gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
4104 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
4105 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
4106 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
4107 gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
4108 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
4109 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
4110
4111 return 0;
4112}
4113
4114int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
4115{
4116 int err;
4117
4118 gk20a_dbg_fn("");
4119
4120 err = gk20a_alloc_inst_block(g, &ch->inst_block);
4121 if (err)
4122 return err;
4123
4124 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
4125 ch->hw_chid, gk20a_mm_inst_block_addr(g, &ch->inst_block));
4126
4127 gk20a_dbg_fn("done");
4128 return 0;
4129}
4130
4131void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
4132{
4133 gk20a_free_inst_block(g, &ch->inst_block);
4134}
4135
4136u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
4137{
4138 return gk20a_bar1_readl(g,
4139 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
4140}
4141
4142void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
4143{
4144 gk20a_bar1_writel(g,
4145 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
4146 c->gpfifo.put);
4147}
4148
4149u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
4150{
4151 u32 val, exp, man;
4152 unsigned int val_len;
4153
4154 val = pbdma_acquire_retry_man_2_f() |
4155 pbdma_acquire_retry_exp_2_f();
4156
4157 if (!timeout)
4158 return val;
4159
4160 timeout *= 80UL;
4161 do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */
4162 timeout *= 1000000UL; /* ms -> ns */
4163 do_div(timeout, 1024); /* in unit of 1024ns */
4164 val_len = fls(timeout >> 32) + 32;
4165 if (val_len == 32)
4166 val_len = fls(timeout);
4167 if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */
4168 exp = pbdma_acquire_timeout_exp_max_v();
4169 man = pbdma_acquire_timeout_man_max_v();
4170 } else if (val_len > 16) {
4171 exp = val_len - 16;
4172 man = timeout >> exp;
4173 } else {
4174 exp = 0;
4175 man = timeout;
4176 }
4177
4178 val |= pbdma_acquire_timeout_exp_f(exp) |
4179 pbdma_acquire_timeout_man_f(man) |
4180 pbdma_acquire_timeout_en_enable_f();
4181
4182 return val;
4183}
4184
3855void gk20a_init_fifo(struct gpu_ops *gops) 4185void gk20a_init_fifo(struct gpu_ops *gops)
3856{ 4186{
3857 gk20a_init_channel(gops); 4187 gops->fifo.disable_channel = gk20a_fifo_disable_channel;
4188 gops->fifo.enable_channel = gk20a_fifo_enable_channel;
4189 gops->fifo.bind_channel = gk20a_fifo_channel_bind;
4190 gops->fifo.unbind_channel = gk20a_fifo_channel_unbind;
3858 gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw; 4191 gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw;
3859 gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; 4192 gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
3860 gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; 4193 gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg;
@@ -3883,4 +4216,13 @@ void gk20a_init_fifo(struct gpu_ops *gops)
3883 gops->fifo.is_preempt_pending = gk20a_fifo_is_preempt_pending; 4216 gops->fifo.is_preempt_pending = gk20a_fifo_is_preempt_pending;
3884 gops->fifo.init_pbdma_intr_descs = gk20a_fifo_init_pbdma_intr_descs; 4217 gops->fifo.init_pbdma_intr_descs = gk20a_fifo_init_pbdma_intr_descs;
3885 gops->fifo.reset_enable_hw = gk20a_init_fifo_reset_enable_hw; 4218 gops->fifo.reset_enable_hw = gk20a_init_fifo_reset_enable_hw;
4219 gops->fifo.setup_ramfc = gk20a_fifo_setup_ramfc;
4220 gops->fifo.channel_set_priority = gk20a_fifo_set_priority;
4221 gops->fifo.channel_set_timeslice = gk20a_fifo_set_timeslice;
4222 gops->fifo.alloc_inst = gk20a_fifo_alloc_inst;
4223 gops->fifo.free_inst = gk20a_fifo_free_inst;
4224 gops->fifo.setup_userd = gk20a_fifo_setup_userd;
4225 gops->fifo.userd_gp_get = gk20a_fifo_userd_gp_get;
4226 gops->fifo.userd_gp_put = gk20a_fifo_userd_gp_put;
4227 gops->fifo.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val;
3886} 4228}