diff options
author | Lakshmanan M <lm@nvidia.com> | 2016-06-02 00:04:46 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-06-07 15:31:34 -0400 |
commit | 6299b00beb9dabdd53c211b02658d022827b3232 (patch) | |
tree | 941d8dd8aae8f7f8c73329e182984c36a5a9bf88 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |
parent | 3d7263d3cafdcfc57a6d6b9f829562845d116294 (diff) |
gpu: nvgpu: Add multiple engine and runlist support
This CL covers the following modification,
1) Added multiple engine_info support
2) Added multiple runlist_info support
3) Initial changes for ASYNC CE support
4) Added ASYNC CE interrupt handling support
for gm206 GPU family
5) Added generic mechanism to identify the
CE engine pri_base address for gm206
(CE0, CE1 and CE2)
6) Removed hard coded engine_id logic and
made generic way
7) Code cleanup for readability
JIRA DNVGPU-26
Change-Id: I2c3846c40bcc8d10c2dfb225caa4105fc9123b65
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1155963
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 73 |
1 files changed, 56 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 17307b63..fb777948 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -326,9 +326,12 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, | |||
326 | bool gr_enabled; | 326 | bool gr_enabled; |
327 | bool ctxsw_active; | 327 | bool ctxsw_active; |
328 | bool gr_busy; | 328 | bool gr_busy; |
329 | u32 gr_engine_id; | ||
329 | 330 | ||
330 | gk20a_dbg_fn(""); | 331 | gk20a_dbg_fn(""); |
331 | 332 | ||
333 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
334 | |||
332 | do { | 335 | do { |
333 | /* fmodel: host gets fifo_engine_status(gr) from gr | 336 | /* fmodel: host gets fifo_engine_status(gr) from gr |
334 | only when gr_status is read */ | 337 | only when gr_status is read */ |
@@ -338,7 +341,7 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, | |||
338 | mc_enable_pgraph_enabled_f(); | 341 | mc_enable_pgraph_enabled_f(); |
339 | 342 | ||
340 | ctxsw_active = gk20a_readl(g, | 343 | ctxsw_active = gk20a_readl(g, |
341 | fifo_engine_status_r(ENGINE_GR_GK20A)) & | 344 | fifo_engine_status_r(gr_engine_id)) & |
342 | fifo_engine_status_ctxsw_in_progress_f(); | 345 | fifo_engine_status_ctxsw_in_progress_f(); |
343 | 346 | ||
344 | gr_busy = gk20a_readl(g, gr_engine_status_r()) & | 347 | gr_busy = gk20a_readl(g, gr_engine_status_r()) & |
@@ -3905,11 +3908,14 @@ int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, | |||
3905 | void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) | 3908 | void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) |
3906 | { | 3909 | { |
3907 | struct fifo_gk20a *f = &g->fifo; | 3910 | struct fifo_gk20a *f = &g->fifo; |
3908 | struct fifo_engine_info_gk20a *gr_info = | 3911 | struct fifo_engine_info_gk20a *gr_info = NULL; |
3909 | f->engine_info + ENGINE_GR_GK20A; | ||
3910 | unsigned long end_jiffies = jiffies + | 3912 | unsigned long end_jiffies = jiffies + |
3911 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | 3913 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); |
3912 | u32 ret; | 3914 | u32 ret; |
3915 | u32 engine_id; | ||
3916 | |||
3917 | engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
3918 | gr_info = (f->engine_info + engine_id); | ||
3913 | 3919 | ||
3914 | ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); | 3920 | ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); |
3915 | if (ret) { | 3921 | if (ret) { |
@@ -4187,9 +4193,13 @@ int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, | |||
4187 | struct zbc_entry *zbc_val) | 4193 | struct zbc_entry *zbc_val) |
4188 | { | 4194 | { |
4189 | struct fifo_gk20a *f = &g->fifo; | 4195 | struct fifo_gk20a *f = &g->fifo; |
4190 | struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; | 4196 | struct fifo_engine_info_gk20a *gr_info = NULL; |
4191 | unsigned long end_jiffies; | 4197 | unsigned long end_jiffies; |
4192 | int ret; | 4198 | int ret; |
4199 | u32 engine_id; | ||
4200 | |||
4201 | engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
4202 | gr_info = (f->engine_info + engine_id); | ||
4193 | 4203 | ||
4194 | ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); | 4204 | ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); |
4195 | if (ret) { | 4205 | if (ret) { |
@@ -4308,6 +4318,29 @@ void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) | |||
4308 | gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter); | 4318 | gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter); |
4309 | } | 4319 | } |
4310 | 4320 | ||
4321 | void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config) | ||
4322 | { | ||
4323 | u32 engine_idx; | ||
4324 | u32 active_engine_id = 0; | ||
4325 | struct fifo_engine_info_gk20a *engine_info = NULL; | ||
4326 | struct fifo_gk20a *f = &g->fifo; | ||
4327 | |||
4328 | for (engine_idx = 0; engine_idx < f->num_engines; ++engine_idx) { | ||
4329 | active_engine_id = f->active_engines_list[engine_idx]; | ||
4330 | engine_info = &f->engine_info[active_engine_id]; | ||
4331 | |||
4332 | /* gr_engine supports both BLCG and ELCG */ | ||
4333 | if ((cgmode == BLCG_MODE) && | ||
4334 | (engine_info->engine_enum == ENGINE_GR_GK20A)) { | ||
4335 | gr_gk20a_init_blcg_mode(g, mode_config, active_engine_id); | ||
4336 | break; | ||
4337 | } else if (cgmode == ELCG_MODE) | ||
4338 | gr_gk20a_init_elcg_mode(g, mode_config, active_engine_id); | ||
4339 | else | ||
4340 | gk20a_err(dev_from_gk20a(g), "invalid cg mode %d %d", cgmode, mode_config); | ||
4341 | } | ||
4342 | } | ||
4343 | |||
4311 | static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | 4344 | static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) |
4312 | { | 4345 | { |
4313 | u32 gpc_index, gpc_tpc_count, gpc_zcull_count; | 4346 | u32 gpc_index, gpc_tpc_count, gpc_zcull_count; |
@@ -4710,8 +4743,9 @@ static int gk20a_init_gr_prepare(struct gk20a *g) | |||
4710 | { | 4743 | { |
4711 | u32 gpfifo_ctrl, pmc_en; | 4744 | u32 gpfifo_ctrl, pmc_en; |
4712 | u32 err = 0; | 4745 | u32 err = 0; |
4713 | struct fifo_engine_info_gk20a *ce_info = | 4746 | u32 ce_reset_mask; |
4714 | g->fifo.engine_info + ENGINE_CE2_GK20A; | 4747 | |
4748 | ce_reset_mask = gk20a_fifo_get_all_ce_engine_reset_mask(g); | ||
4715 | 4749 | ||
4716 | /* disable fifo access */ | 4750 | /* disable fifo access */ |
4717 | pmc_en = gk20a_readl(g, mc_enable_r()); | 4751 | pmc_en = gk20a_readl(g, mc_enable_r()); |
@@ -4725,12 +4759,12 @@ static int gk20a_init_gr_prepare(struct gk20a *g) | |||
4725 | gk20a_reset(g, mc_enable_pgraph_enabled_f() | 4759 | gk20a_reset(g, mc_enable_pgraph_enabled_f() |
4726 | | mc_enable_blg_enabled_f() | 4760 | | mc_enable_blg_enabled_f() |
4727 | | mc_enable_perfmon_enabled_f() | 4761 | | mc_enable_perfmon_enabled_f() |
4728 | | ce_info->reset_mask); | 4762 | | ce_reset_mask); |
4729 | 4763 | ||
4730 | gr_gk20a_load_gating_prod(g); | 4764 | gr_gk20a_load_gating_prod(g); |
4765 | |||
4731 | /* Disable elcg until it gets enabled later in the init*/ | 4766 | /* Disable elcg until it gets enabled later in the init*/ |
4732 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); | 4767 | gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); |
4733 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); | ||
4734 | 4768 | ||
4735 | /* enable fifo access */ | 4769 | /* enable fifo access */ |
4736 | gk20a_writel(g, gr_gpfifo_ctl_r(), | 4770 | gk20a_writel(g, gr_gpfifo_ctl_r(), |
@@ -5210,11 +5244,9 @@ int gk20a_enable_gr_hw(struct gk20a *g) | |||
5210 | static void gr_gk20a_enable_elcg(struct gk20a *g) | 5244 | static void gr_gk20a_enable_elcg(struct gk20a *g) |
5211 | { | 5245 | { |
5212 | if (g->elcg_enabled) { | 5246 | if (g->elcg_enabled) { |
5213 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); | 5247 | gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO); |
5214 | gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); | ||
5215 | } else { | 5248 | } else { |
5216 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); | 5249 | gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); |
5217 | gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); | ||
5218 | } | 5250 | } |
5219 | } | 5251 | } |
5220 | 5252 | ||
@@ -6106,6 +6138,7 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6106 | u32 gr_intr = gk20a_readl(g, gr_intr_r()); | 6138 | u32 gr_intr = gk20a_readl(g, gr_intr_r()); |
6107 | struct channel_gk20a *ch = NULL; | 6139 | struct channel_gk20a *ch = NULL; |
6108 | int tsgid = NVGPU_INVALID_TSG_ID; | 6140 | int tsgid = NVGPU_INVALID_TSG_ID; |
6141 | u32 gr_engine_id; | ||
6109 | 6142 | ||
6110 | gk20a_dbg_fn(""); | 6143 | gk20a_dbg_fn(""); |
6111 | gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); | 6144 | gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); |
@@ -6113,6 +6146,8 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6113 | if (!gr_intr) | 6146 | if (!gr_intr) |
6114 | return 0; | 6147 | return 0; |
6115 | 6148 | ||
6149 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
6150 | |||
6116 | grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); | 6151 | grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); |
6117 | grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1); | 6152 | grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1); |
6118 | grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1); | 6153 | grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1); |
@@ -6283,13 +6318,13 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6283 | 6318 | ||
6284 | if (need_reset) { | 6319 | if (need_reset) { |
6285 | if (tsgid != NVGPU_INVALID_TSG_ID) | 6320 | if (tsgid != NVGPU_INVALID_TSG_ID) |
6286 | gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), | 6321 | gk20a_fifo_recover(g, BIT(gr_engine_id), |
6287 | tsgid, true, true, true); | 6322 | tsgid, true, true, true); |
6288 | else if (ch) | 6323 | else if (ch) |
6289 | gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), | 6324 | gk20a_fifo_recover(g, BIT(gr_engine_id), |
6290 | ch->hw_chid, false, true, true); | 6325 | ch->hw_chid, false, true, true); |
6291 | else | 6326 | else |
6292 | gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), | 6327 | gk20a_fifo_recover(g, BIT(gr_engine_id), |
6293 | 0, false, false, true); | 6328 | 0, false, false, true); |
6294 | } | 6329 | } |
6295 | 6330 | ||
@@ -8441,6 +8476,10 @@ static u32 *gr_gk20a_rop_l2_en_mask(struct gk20a *g) | |||
8441 | static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, | 8476 | static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, |
8442 | struct gk20a_debug_output *o) | 8477 | struct gk20a_debug_output *o) |
8443 | { | 8478 | { |
8479 | u32 gr_engine_id; | ||
8480 | |||
8481 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
8482 | |||
8444 | gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", | 8483 | gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", |
8445 | gk20a_readl(g, gr_status_r())); | 8484 | gk20a_readl(g, gr_status_r())); |
8446 | gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", | 8485 | gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", |
@@ -8460,7 +8499,7 @@ static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, | |||
8460 | gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", | 8499 | gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", |
8461 | gk20a_readl(g, gr_fecs_intr_r())); | 8500 | gk20a_readl(g, gr_fecs_intr_r())); |
8462 | gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", | 8501 | gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", |
8463 | gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A))); | 8502 | gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); |
8464 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", | 8503 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", |
8465 | gk20a_readl(g, gr_activity_0_r())); | 8504 | gk20a_readl(g, gr_activity_0_r())); |
8466 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", | 8505 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", |