From 6299b00beb9dabdd53c211b02658d022827b3232 Mon Sep 17 00:00:00 2001 From: Lakshmanan M Date: Thu, 2 Jun 2016 09:34:46 +0530 Subject: gpu: nvgpu: Add multiple engine and runlist support This CL covers the following modification, 1) Added multiple engine_info support 2) Added multiple runlist_info support 3) Initial changes for ASYNC CE support 4) Added ASYNC CE interrupt handling support for gm206 GPU family 5) Added generic mechanism to identify the CE engine pri_base address for gm206 (CE0, CE1 and CE2) 6) Removed hard coded engine_id logic and made generic way 7) Code cleanup for readability JIRA DNVGPU-26 Change-Id: I2c3846c40bcc8d10c2dfb225caa4105fc9123b65 Signed-off-by: Lakshmanan M Reviewed-on: http://git-master/r/1155963 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/Makefile | 3 +- drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 6 +- drivers/gpu/nvgpu/gk20a/ce2_gk20a.h | 6 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 12 +- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 10 +- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 489 +++++++++++++++++++++++--------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 19 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 6 +- drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | 7 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 73 +++-- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 8 + drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 3 + drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h | 4 + drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h | 16 ++ drivers/gpu/nvgpu/gk20a/mc_gk20a.c | 57 +++- drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 38 ++- drivers/gpu/nvgpu/gm206/ce_gm206.c | 107 +++++++ drivers/gpu/nvgpu/gm206/ce_gm206.h | 26 ++ drivers/gpu/nvgpu/gm206/fifo_gm206.c | 2 + drivers/gpu/nvgpu/gm206/hal_gm206.c | 7 +- drivers/gpu/nvgpu/gm206/hw_proj_gm206.h | 4 + drivers/gpu/nvgpu/gm206/hw_top_gm206.h | 16 ++ drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 15 +- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 5 +- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 3 + drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h | 4 + drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h | 16 ++ drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 64 ++--- 29 files changed, 791 insertions(+), 237 deletions(-) create mode 100644 drivers/gpu/nvgpu/gm206/ce_gm206.c create mode 100644 drivers/gpu/nvgpu/gm206/ce_gm206.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 2857cd5a..9e60e6a5 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -80,7 +80,8 @@ nvgpu-y := \ gm206/hal_gm206.o \ gm206/gr_gm206.o \ gm206/acr_gm206.o \ - gm206/pmu_gm206.o + gm206/pmu_gm206.o \ + gm206/ce_gm206.o nvgpu-$(CONFIG_TEGRA_GK20A) += gk20a/platform_gk20a_tegra.o nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 00635c4d..96d38b11 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -1,7 +1,7 @@ /* * GK20A Graphics Copy Engine (gr host) * - * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -57,7 +57,7 @@ static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr) return ce2_intr_status_launcherr_pending_f(); } -void gk20a_ce2_isr(struct gk20a *g) +void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base) { u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r()); u32 clear_intr = 0; @@ -75,7 +75,7 @@ void gk20a_ce2_isr(struct gk20a *g) return; } -void gk20a_ce2_nonstall_isr(struct gk20a *g) +void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) { u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r()); diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h index bf4e3066..5ceb69e1 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h @@ -3,7 +3,7 @@ * * GK20A graphics copy engine (gr host) * - * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -25,7 +25,7 @@ #include "tsg_gk20a.h" void gk20a_init_ce2(struct gpu_ops *gops); -void gk20a_ce2_isr(struct gk20a *g); -void gk20a_ce2_nonstall_isr(struct gk20a *g); +void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base); +void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); #endif /*__CE2_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 3159f026..a5243834 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -329,11 +329,17 @@ static void channel_gk20a_bind(struct channel_gk20a *c) { struct gk20a *g = c->g; struct fifo_gk20a *f = &g->fifo; - struct fifo_engine_info_gk20a *engine_info = - f->engine_info + ENGINE_GR_GK20A; + u32 engine_id; + struct fifo_engine_info_gk20a *engine_info = NULL; u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) >> ram_in_base_shift_v(); + /* TODO:Need to handle non GR engine channel bind path */ + engine_id = gk20a_fifo_get_gr_engine_id(g); + + /* Consider 1st available GR engine */ + engine_info = (f->engine_info + engine_id); + gk20a_dbg_info("bind channel %d inst ptr 0x%08x", c->hw_chid, inst_ptr); @@ -1716,7 +1722,7 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) /* Get failing engine data */ engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg); - if (engine_id >= g->fifo.max_engines) { + if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) { /* If no failing engine, abort the channels */ if (gk20a_is_channel_marked_as_tsg(ch)) { struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid]; diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 6b9238e9..5e57502e 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -1102,11 +1102,10 @@ static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, false); g->ops.clock_gating.slcg_ltc_load_gating_prod(g, false); - gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A); + gr_gk20a_init_cg_mode(g, BLCG_MODE, BLCG_RUN); g->elcg_enabled = false; - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); } @@ -1126,9 +1125,8 @@ static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, --g->dbg_powergating_disabled_refcount == 0) { g->elcg_enabled = true; - gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); - gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); - gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A); + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO); + gr_gk20a_init_cg_mode(g, BLCG_MODE, BLCG_AUTO); g->ops.clock_gating.slcg_ltc_load_gating_prod(g, g->slcg_enabled); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 17efe5ca..766ea749 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -40,6 +40,131 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, u32 hw_chid, bool add, bool wait_for_finish); +u32 gk20a_fifo_get_engine_ids(struct gk20a *g, + u32 engine_id[], u32 engine_id_sz, + u32 engine_enum) +{ + struct fifo_gk20a *f = NULL; + u32 instance_cnt = 0; + u32 engine_id_idx; + u32 active_engine_id = 0; + struct fifo_engine_info_gk20a *info = NULL; + + if (g && engine_id_sz && (engine_enum < ENGINE_INVAL_GK20A)) { + f = &g->fifo; + for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) { + active_engine_id = f->active_engines_list[engine_id_idx]; + info = &f->engine_info[active_engine_id]; + + if (info->engine_enum == engine_enum) { + if (instance_cnt < engine_id_sz) { + engine_id[instance_cnt] = active_engine_id; + ++instance_cnt; + } else { + gk20a_dbg_info("warning engine_id table sz is small %d", + engine_id_sz); + } + } + } + } + return instance_cnt; +} + +struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id) +{ + struct fifo_gk20a *f = NULL; + u32 engine_id_idx; + struct fifo_engine_info_gk20a *info = NULL; + + if (!g) + return info; + + f = &g->fifo; + + if (engine_id < f->max_engines) { + for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) { + if (engine_id == f->active_engines_list[engine_id_idx]) { + info = &f->engine_info[engine_id]; + break; + } + } + } + + if (!info) + gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id); + + return info; +} + +bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id) +{ + struct fifo_gk20a *f = NULL; + u32 engine_id_idx; + bool valid = false; + + if (!g) + return valid; + + f = &g->fifo; + + if (engine_id < f->max_engines) { + for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) { + if (engine_id == f->active_engines_list[engine_id_idx]) { + valid = true; + break; + } + } + } + + if (!valid) + gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id); + + return valid; +} + +u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g) +{ + u32 gr_engine_cnt = 0; + u32 gr_engine_id = FIFO_INVAL_ENGINE_ID; + + /* Consider 1st available GR engine */ + gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id, + 1, ENGINE_GR_GK20A); + + if (!gr_engine_cnt) { + gk20a_err(dev_from_gk20a(g), "No GR engine available on this device!\n"); + } + + return gr_engine_id; +} + +u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g) +{ + u32 reset_mask = 0; + u32 engine_enum = ENGINE_INVAL_GK20A; + struct fifo_gk20a *f = NULL; + u32 engine_id_idx; + struct fifo_engine_info_gk20a *engine_info; + u32 active_engine_id = 0; + + if (!g) + return reset_mask; + + f = &g->fifo; + + for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) { + active_engine_id = f->active_engines_list[engine_id_idx]; + engine_info = &f->engine_info[active_engine_id]; + engine_enum = engine_info->engine_enum; + + if ((engine_enum == ENGINE_GRCE_GK20A) || + (engine_enum == ENGINE_ASYNC_CE_GK20A)) + reset_mask |= engine_info->reset_mask; + } + + return reset_mask; +} + /* * Link engine IDs to MMU IDs and vice versa. */ @@ -47,12 +172,14 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id) { u32 fault_id = ~0; + struct fifo_engine_info_gk20a *engine_info; - if (engine_id < ENGINE_INVAL_GK20A) { - struct fifo_engine_info_gk20a *info = - &g->fifo.engine_info[engine_id]; + engine_info = gk20a_fifo_get_engine_info(g, engine_id); - fault_id = info->fault_id; + if (engine_info) { + fault_id = engine_info->fault_id; + } else { + gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id); } return fault_id; } @@ -60,18 +187,19 @@ static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id) static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id) { u32 engine_id; - u32 return_engine_id = ~0; + u32 active_engine_id; + struct fifo_engine_info_gk20a *engine_info; + struct fifo_gk20a *f = &g->fifo; - for (engine_id = 0; engine_id < ENGINE_INVAL_GK20A; engine_id++) { - struct fifo_engine_info_gk20a *info = - &g->fifo.engine_info[engine_id]; + for (engine_id = 0; engine_id < f->num_engines; engine_id++) { + active_engine_id = f->active_engines_list[engine_id]; + engine_info = &g->fifo.engine_info[active_engine_id]; - if (info->fault_id == fault_id) { - return_engine_id = engine_id; + if (engine_info->fault_id == fault_id) break; - } + active_engine_id = FIFO_INVAL_ENGINE_ID; } - return return_engine_id; + return active_engine_id; } int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type, @@ -82,10 +210,15 @@ int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type, gk20a_dbg_info("engine type %d", engine_type); if (engine_type == top_device_info_type_enum_graphics_v()) ret = ENGINE_GR_GK20A; - else if (engine_type == top_device_info_type_enum_copy2_v()) { - ret = ENGINE_CE2_GK20A; + else if ((engine_type >= top_device_info_type_enum_copy0_v()) && + (engine_type <= top_device_info_type_enum_copy2_v())) { + /* Lets consider all the CE engine have separate runlist at this point + * We can identify the ENGINE_GRCE_GK20A type CE using runlist_id + * comparsion logic with GR runlist_id in init_engine_info() */ + ret = ENGINE_ASYNC_CE_GK20A; + /* inst_id starts from CE0 to CE2 */ if (inst_id) - *inst_id = 0x2; + *inst_id = (engine_type - top_device_info_type_enum_copy0_v()); } else gk20a_err(g->dev, "unknown engine %d", engine_type); @@ -108,12 +241,11 @@ static int init_engine_info(struct fifo_gk20a *f) u32 inst_id = 0; u32 pri_base = 0; u32 fault_id = 0; + u32 gr_runlist_id = ~0; gk20a_dbg_fn(""); - /* all we really care about finding is the graphics entry */ - /* especially early on in sim it probably thinks it has more */ - f->num_engines = 2; + f->num_engines = 0; for (i = 0; i < max_info_entries; i++) { u32 table_entry = gk20a_readl(f->g, top_device_info_r(i)); @@ -168,8 +300,7 @@ static int init_engine_info(struct fifo_gk20a *f) g->ops.fifo.engine_enum_from_type(g, engine_type, &inst_id); } else if (entry == top_device_info_entry_data_v()) { - /* gk20a don't support device_info_data - packet parsing */ + /* gk20a doesn't support device_info_data packet parsing */ if (g->ops.fifo.device_info_data_parse) g->ops.fifo.device_info_data_parse(g, table_entry, &inst_id, &pri_base, @@ -179,7 +310,7 @@ static int init_engine_info(struct fifo_gk20a *f) if (!top_device_info_chain_v(table_entry)) { if (engine_enum < ENGINE_INVAL_GK20A) { struct fifo_engine_info_gk20a *info = - &g->fifo.engine_info[engine_enum]; + &g->fifo.engine_info[engine_id]; info->intr_mask |= BIT(intr_id); info->reset_mask |= BIT(reset_id); @@ -188,11 +319,25 @@ static int init_engine_info(struct fifo_gk20a *f) info->inst_id = inst_id; info->pri_base = pri_base; - if (!fault_id && - (engine_enum == ENGINE_CE2_GK20A)) + if (engine_enum == ENGINE_GR_GK20A) + gr_runlist_id = runlist_id; + + /* GR and GR_COPY shares same runlist_id */ + if ((engine_enum == ENGINE_ASYNC_CE_GK20A) && + (gr_runlist_id == runlist_id)) + engine_enum = ENGINE_GRCE_GK20A; + + info->engine_enum = engine_enum; + + if (!fault_id && (engine_enum == ENGINE_GRCE_GK20A)) fault_id = 0x1b; info->fault_id = fault_id; + /* engine_id starts from 0 to NV_HOST_NUM_ENGINES */ + f->active_engines_list[f->num_engines] = engine_id; + + ++f->num_engines; + engine_enum = ENGINE_INVAL_GK20A; } } @@ -204,13 +349,19 @@ static int init_engine_info(struct fifo_gk20a *f) u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g) { u32 eng_intr_mask = 0; - int i = 0; + int i; + u32 active_engine_id = 0; + u32 engine_enum = ENGINE_INVAL_GK20A; - for (i = 0; i < g->fifo.max_engines; i++) { - u32 intr_mask = g->fifo.engine_info[i].intr_mask; - if (i == ENGINE_CE2_GK20A && + for (i = 0; i < g->fifo.num_engines; i++) { + u32 intr_mask; + active_engine_id = g->fifo.active_engines_list[i]; + intr_mask = g->fifo.engine_info[active_engine_id].intr_mask; + engine_enum = g->fifo.engine_info[active_engine_id].engine_enum; + if (((engine_enum == ENGINE_GRCE_GK20A) || + (engine_enum == ENGINE_ASYNC_CE_GK20A)) && (!g->ops.ce2.isr_stall || !g->ops.ce2.isr_nonstall)) - continue; + continue; eng_intr_mask |= intr_mask; } @@ -218,13 +369,44 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g) return eng_intr_mask; } +void gk20a_fifo_delete_runlist(struct fifo_gk20a *f) +{ + u32 i; + u32 runlist_id; + struct fifo_runlist_info_gk20a *runlist; + struct gk20a *g = NULL; + + if (!f || !f->runlist_info) + return; + + g = f->g; + + for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) { + runlist = &f->runlist_info[runlist_id]; + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + gk20a_gmmu_free(g, &runlist->mem[i]); + } + + kfree(runlist->active_channels); + runlist->active_channels = NULL; + + kfree(runlist->active_tsgs); + runlist->active_tsgs = NULL; + + mutex_destroy(&runlist->mutex); + + } + memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) * + f->max_runlists)); + + kfree(f->runlist_info); + f->runlist_info = NULL; + f->max_runlists = 0; +} + static void gk20a_remove_fifo_support(struct fifo_gk20a *f) { struct gk20a *g = f->g; - struct fifo_engine_info_gk20a *engine_info; - struct fifo_runlist_info_gk20a *runlist; - u32 runlist_id; - u32 i; gk20a_dbg_fn(""); @@ -232,19 +414,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) vfree(f->tsg); gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); - engine_info = f->engine_info + ENGINE_GR_GK20A; - runlist_id = engine_info->runlist_id; - runlist = &f->runlist_info[runlist_id]; - - for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) - gk20a_gmmu_free(g, &runlist->mem[i]); + gk20a_fifo_delete_runlist(f); - kfree(runlist->active_channels); - kfree(runlist->active_tsgs); - - kfree(f->runlist_info); kfree(f->pbdma_map); + f->pbdma_map = NULL; kfree(f->engine_info); + f->engine_info = NULL; + kfree(f->active_engines_list); + f->active_engines_list = NULL; } /* reads info from hardware and fills in pbmda exception info record */ @@ -327,69 +504,58 @@ static void fifo_engine_exception_status(struct gk20a *g, static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) { - struct fifo_engine_info_gk20a *engine_info; struct fifo_runlist_info_gk20a *runlist; struct device *d = dev_from_gk20a(g); - u32 runlist_id; + s32 runlist_id = -1; u32 i; u64 runlist_size; gk20a_dbg_fn(""); - f->max_runlists = fifo_eng_runlist_base__size_1_v(); + f->max_runlists = g->ops.fifo.eng_runlist_base_size(); f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) * f->max_runlists, GFP_KERNEL); if (!f->runlist_info) - goto clean_up; + goto clean_up_runlist; - engine_info = f->engine_info + ENGINE_GR_GK20A; - runlist_id = engine_info->runlist_id; - runlist = &f->runlist_info[runlist_id]; + memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) * + f->max_runlists)); - runlist->active_channels = - kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), - GFP_KERNEL); - if (!runlist->active_channels) - goto clean_up_runlist_info; - - runlist->active_tsgs = - kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), - GFP_KERNEL); - if (!runlist->active_tsgs) - goto clean_up_runlist_info; - - runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; - for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { - int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); - if (err) { - dev_err(d, "memory allocation failed\n"); + for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) { + runlist = &f->runlist_info[runlist_id]; + + runlist->active_channels = + kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), + GFP_KERNEL); + if (!runlist->active_channels) goto clean_up_runlist; + + runlist->active_tsgs = + kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), + GFP_KERNEL); + if (!runlist->active_tsgs) + goto clean_up_runlist; + + runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); + if (err) { + dev_err(d, "memory allocation failed\n"); + goto clean_up_runlist; + } } - } - mutex_init(&runlist->mutex); + mutex_init(&runlist->mutex); - /* None of buffers is pinned if this value doesn't change. - Otherwise, one of them (cur_buffer) must have been pinned. */ - runlist->cur_buffer = MAX_RUNLIST_BUFFERS; + /* None of buffers is pinned if this value doesn't change. + Otherwise, one of them (cur_buffer) must have been pinned. */ + runlist->cur_buffer = MAX_RUNLIST_BUFFERS; + } gk20a_dbg_fn("done"); return 0; clean_up_runlist: - for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) - gk20a_gmmu_free(g, &runlist->mem[i]); - -clean_up_runlist_info: - kfree(runlist->active_channels); - runlist->active_channels = NULL; - - kfree(runlist->active_tsgs); - runlist->active_tsgs = NULL; - - kfree(f->runlist_info); - f->runlist_info = NULL; - -clean_up: + gk20a_fifo_delete_runlist(f); gk20a_dbg_fn("fail"); return -ENOMEM; } @@ -543,7 +709,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) f->num_channels = g->ops.fifo.get_num_fifos(g); f->num_runlist_entries = fifo_eng_runlist_length_max_v(); f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA); - f->max_engines = ENGINE_INVAL_GK20A; + f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES); f->userd_entry_size = 1 << ram_userd_base_shift_v(); @@ -563,11 +729,15 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) GFP_KERNEL); f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), GFP_KERNEL); + f->active_engines_list = kzalloc(f->max_engines * sizeof(u32), + GFP_KERNEL); - if (!(f->channel && f->pbdma_map && f->engine_info)) { + if (!(f->channel && f->pbdma_map && f->engine_info && + f->active_engines_list)) { err = -ENOMEM; goto clean_up; } + memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32))); /* pbdma map needs to be in place before calling engine info init */ for (i = 0; i < f->num_pbdma; ++i) @@ -614,6 +784,8 @@ clean_up: f->pbdma_map = NULL; kfree(f->engine_info); f->engine_info = NULL; + kfree(f->active_engines_list); + f->active_engines_list = NULL; return err; } @@ -829,9 +1001,29 @@ static inline void get_exception_mmu_fault_info( void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) { + struct fifo_gk20a *f = NULL; + u32 engine_enum = ENGINE_INVAL_GK20A; + u32 inst_id = 0; + struct fifo_engine_info_gk20a *engine_info; + gk20a_dbg_fn(""); - if (engine_id == ENGINE_GR_GK20A) { + if (!g) + return; + + f = &g->fifo; + + engine_info = gk20a_fifo_get_engine_info(g, engine_id); + + if (engine_info) { + engine_enum = engine_info->engine_enum; + inst_id = engine_info->inst_id; + } + + if (engine_enum == ENGINE_INVAL_GK20A) + gk20a_err(dev_from_gk20a(g), "unsupported engine_id %d", engine_id); + + if (engine_enum == ENGINE_GR_GK20A) { if (support_gk20a_pmu(g->dev) && g->elpg_enabled) gk20a_pmu_disable_elpg(g); /* resetting engine will alter read/write index. @@ -848,8 +1040,10 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) if (support_gk20a_pmu(g->dev) && g->elpg_enabled) gk20a_pmu_enable_elpg(g); } - if (engine_id == ENGINE_CE2_GK20A) - gk20a_reset(g, mc_enable_ce2_m()); + if ((engine_enum == ENGINE_GRCE_GK20A) || + (engine_enum == ENGINE_ASYNC_CE_GK20A)) { + gk20a_reset(g, engine_info->reset_mask); + } } static void gk20a_fifo_handle_chsw_fault(struct gk20a *g) @@ -872,6 +1066,24 @@ static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g) static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, struct fifo_mmu_fault_info_gk20a *f, bool fake_fault) { + u32 engine_enum = ENGINE_INVAL_GK20A; + struct fifo_gk20a *fifo = NULL; + struct fifo_engine_info_gk20a *engine_info; + + if (!g || !f) + return false; + + fifo = &g->fifo; + + engine_info = gk20a_fifo_get_engine_info(g, engine_id); + + if (engine_info) { + engine_enum = engine_info->engine_enum; + } + + if (engine_enum == ENGINE_INVAL_GK20A) + return false; + /* channel recovery is only deferred if an sm debugger is attached and has MMU debug mode is enabled */ if (!gk20a_gr_sm_debugger_attached(g) || @@ -882,7 +1094,7 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, if (fake_fault) return false; - if (engine_id != ENGINE_GR_GK20A || + if (engine_enum != ENGINE_GR_GK20A || f->engine_subid_v != fifo_intr_mmu_fault_info_engine_subid_gpc_v()) return false; @@ -1001,8 +1213,8 @@ static bool gk20a_fifo_handle_mmu_fault( false); g->ops.clock_gating.slcg_ltc_load_gating_prod(g, false); - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); + + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); /* Disable fifo access */ grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); @@ -1219,7 +1431,7 @@ static void gk20a_fifo_trigger_mmu_fault(struct gk20a *g, /* trigger faults for all bad engines */ for_each_set_bit(engine_id, &engine_ids, 32) { - if (engine_id > g->fifo.max_engines) { + if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) { WARN_ON(true); break; } @@ -1257,8 +1469,9 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg) int i; u32 engines = 0; - for (i = 0; i < g->fifo.max_engines; i++) { - u32 status = gk20a_readl(g, fifo_engine_status_r(i)); + for (i = 0; i < g->fifo.num_engines; i++) { + u32 active_engine_id = g->fifo.active_engines_list[i]; + u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id)); u32 ctx_status = fifo_engine_status_ctx_status_v(status); u32 ctx_id = (ctx_status == @@ -1276,7 +1489,7 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg) fifo_engine_status_id_type_tsgid_v()) || (!is_tsg && type == fifo_engine_status_id_type_chid_v())) - engines |= BIT(i); + engines |= BIT(active_engine_id); } } @@ -1382,15 +1595,16 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, ref_id_is_tsg = false; /* Reset *all* engines that use the * same channel as faulty engine */ - for (i = 0; i < g->fifo.max_engines; i++) { + for (i = 0; i < g->fifo.num_engines; i++) { + u32 active_engine_id = g->fifo.active_engines_list[i]; u32 type; u32 id; - gk20a_fifo_get_faulty_id_type(g, i, &id, &type); + gk20a_fifo_get_faulty_id_type(g, active_engine_id, &id, &type); if (ref_type == type && ref_id == id) { - engine_ids |= BIT(i); + engine_ids |= BIT(active_engine_id); mmu_fault_engines |= - BIT(gk20a_engine_id_to_mmu_id(g, i)); + BIT(gk20a_engine_id_to_mmu_id(g, active_engine_id)); } } } @@ -1453,16 +1667,21 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, int *__id, bool *__is_tsg) { - u32 engine_id = -1; + u32 engine_id; int id = -1; bool is_tsg = false; u32 mailbox2; + u32 active_engine_id = FIFO_INVAL_ENGINE_ID; - for (engine_id = 0; engine_id < g->fifo.max_engines; engine_id++) { - u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); - u32 ctx_status = fifo_engine_status_ctx_status_v(status); + for (engine_id = 0; engine_id < g->fifo.num_engines; engine_id++) { + u32 status; + u32 ctx_status; bool failing_engine; + active_engine_id = g->fifo.active_engines_list[engine_id]; + status = gk20a_readl(g, fifo_engine_status_r(active_engine_id)); + ctx_status = fifo_engine_status_ctx_status_v(status); + /* we are interested in busy engines */ failing_engine = fifo_engine_status_engine_v(status) == fifo_engine_status_engine_busy_v(); @@ -1476,8 +1695,10 @@ u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, || ctx_status == fifo_engine_status_ctx_status_ctxsw_load_v()); - if (!failing_engine) + if (!failing_engine) { + active_engine_id = FIFO_INVAL_ENGINE_ID; continue; + } if (ctx_status == fifo_engine_status_ctx_status_ctxsw_load_v()) { @@ -1500,7 +1721,7 @@ u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, *__id = id; *__is_tsg = is_tsg; - return engine_id; + return active_engine_id; } static bool gk20a_fifo_handle_sched_error(struct gk20a *g) @@ -1517,7 +1738,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg); /* could not find the engine - should never happen */ - if (unlikely(engine_id >= g->fifo.max_engines)) { + if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) { gk20a_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, failed to find engine\n", sched_error); ret = false; @@ -1627,14 +1848,16 @@ static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) "channel reset initiated from %s; intr=0x%08x", __func__, fifo_intr); for (engine_id = 0; - engine_id < g->fifo.max_engines; + engine_id < g->fifo.num_engines; engine_id++) { - gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_id, - g->fifo.engine_info[engine_id].engine_id); - fifo_pbdma_exception_status(g, - &g->fifo.engine_info[engine_id]); - fifo_engine_exception_status(g, - &g->fifo.engine_info[engine_id]); + u32 active_engine_id = g->fifo.active_engines_list[engine_id]; + u32 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum; + gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_enum, + active_engine_id); + fifo_pbdma_exception_status(g, + &g->fifo.engine_info[active_engine_id]); + fifo_engine_exception_status(g, + &g->fifo.engine_info[active_engine_id]); } } @@ -2057,12 +2280,13 @@ int gk20a_fifo_enable_all_engine_activity(struct gk20a *g) int i; int err = 0, ret = 0; - for (i = 0; i < g->fifo.max_engines; i++) { + for (i = 0; i < g->fifo.num_engines; i++) { + u32 active_engine_id = g->fifo.active_engines_list[i]; err = gk20a_fifo_enable_engine_activity(g, - &g->fifo.engine_info[i]); + &g->fifo.engine_info[active_engine_id]); if (err) { gk20a_err(dev_from_gk20a(g), - "failed to enable engine %d activity\n", i); + "failed to enable engine %d activity\n", active_engine_id); ret = err; } } @@ -2149,14 +2373,16 @@ int gk20a_fifo_disable_all_engine_activity(struct gk20a *g, { int i; int err = 0, ret = 0; + u32 active_engine_id; - for (i = 0; i < g->fifo.max_engines; i++) { + for (i = 0; i < g->fifo.num_engines; i++) { + active_engine_id = g->fifo.active_engines_list[i]; err = gk20a_fifo_disable_engine_activity(g, - &g->fifo.engine_info[i], + &g->fifo.engine_info[active_engine_id], wait_for_idle); if (err) { gk20a_err(dev_from_gk20a(g), - "failed to disable engine %d activity\n", i); + "failed to disable engine %d activity\n", active_engine_id); ret = err; break; } @@ -2164,11 +2390,12 @@ int gk20a_fifo_disable_all_engine_activity(struct gk20a *g, if (err) { while (--i >= 0) { + active_engine_id = g->fifo.active_engines_list[i]; err = gk20a_fifo_enable_engine_activity(g, - &g->fifo.engine_info[i]); + &g->fifo.engine_info[active_engine_id]); if (err) gk20a_err(dev_from_gk20a(g), - "failed to re-enable engine %d activity\n", i); + "failed to re-enable engine %d activity\n", active_engine_id); } } @@ -2181,14 +2408,15 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id) u32 engines = 0; int i; - for (i = 0; i < f->max_engines; i++) { - u32 status = gk20a_readl(g, fifo_engine_status_r(i)); + for (i = 0; i < f->num_engines; i++) { + u32 active_engine_id = g->fifo.active_engines_list[i]; + u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id)); bool engine_busy = fifo_engine_status_engine_v(status) == fifo_engine_status_engine_busy_v(); if (engine_busy && - (f->engine_info[i].runlist_id == runlist_id)) - engines |= BIT(i); + (f->engine_info[active_engine_id].runlist_id == runlist_id)) + engines |= BIT(active_engine_id); } if (engines) @@ -2669,8 +2897,10 @@ static int gk20a_fifo_sched_debugfs_seq_show( struct fifo_runlist_info_gk20a *runlist; u32 runlist_id; int ret = SEQ_SKIP; + u32 engine_id; - engine_info = f->engine_info + ENGINE_GR_GK20A; + engine_id = gk20a_fifo_get_gr_engine_id(g); + engine_info = (f->engine_info + engine_id); runlist_id = engine_info->runlist_id; runlist = &f->runlist_info[runlist_id]; @@ -2772,6 +3002,7 @@ void gk20a_init_fifo(struct gpu_ops *gops) gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave; gops->fifo.force_reset_ch = gk20a_fifo_force_reset_ch; gops->fifo.engine_enum_from_type = gk20a_fifo_engine_enum_from_type; - /* gk20a don't support device_info_data packet parsing */ + /* gk20a doesn't support device_info_data packet parsing */ gops->fifo.device_info_data_parse = NULL; + gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v; } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 5fb5f550..25d2cd9f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -26,6 +26,8 @@ #define MAX_RUNLIST_BUFFERS 2 +#define FIFO_INVAL_ENGINE_ID ~0 + /* generally corresponds to the "pbdma" engine */ struct fifo_runlist_info_gk20a { @@ -40,10 +42,10 @@ struct fifo_runlist_info_gk20a { struct mutex mutex; /* protect channel preempt and runlist upate */ }; -/* so far gk20a has two engines: gr and ce2(gr_copy) */ enum { ENGINE_GR_GK20A = 0, - ENGINE_CE2_GK20A = 1, + ENGINE_GRCE_GK20A = 1, + ENGINE_ASYNC_CE_GK20A = 2, ENGINE_INVAL_GK20A }; @@ -85,6 +87,7 @@ struct fifo_engine_info_gk20a { u32 inst_id; u32 pri_base; u32 fault_id; + u32 engine_enum; struct fifo_pbdma_exception_info_gk20a pbdma_exception_info; struct fifo_engine_exception_info_gk20a engine_exception_info; struct fifo_mmu_fault_info_gk20a mmu_fault_info; @@ -102,6 +105,7 @@ struct fifo_gk20a { struct fifo_engine_info_gk20a *engine_info; u32 max_engines; u32 num_engines; + u32 *active_engines_list; struct fifo_runlist_info_gk20a *runlist_info; u32 max_runlists; @@ -228,4 +232,15 @@ const char *gk20a_fifo_interleave_level_name(u32 interleave_level); int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type, u32 *inst_id); +u32 gk20a_fifo_get_engine_ids(struct gk20a *g, u32 engine_id[], u32 engine_id_sz, u32 engine_enum); + +void gk20a_fifo_delete_runlist(struct fifo_gk20a *f); + +struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id); + +bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id); + +u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g); + +u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g); #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index a817abcd..d21d0527 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1890,7 +1890,7 @@ void gk20a_enable(struct gk20a *g, u32 units) void gk20a_reset(struct gk20a *g, u32 units) { gk20a_disable(g, units); - if (units & mc_enable_ce2_enabled_f()) + if (units & gk20a_fifo_get_all_ce_engine_reset_mask(g)) udelay(500); else udelay(20); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 3da19cc8..b7f3f6f1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -90,6 +90,7 @@ enum nvgpu_litter_value { GPU_LIT_ROP_BASE, GPU_LIT_ROP_STRIDE, GPU_LIT_ROP_SHARED_BASE, + GPU_LIT_HOST_NUM_ENGINES, GPU_LIT_HOST_NUM_PBDMA, GPU_LIT_LTC_STRIDE, GPU_LIT_LTS_STRIDE, @@ -120,8 +121,8 @@ struct gpu_ops { void (*flush)(struct gk20a *g); } ltc; struct { - void (*isr_stall)(struct gk20a *g); - void (*isr_nonstall)(struct gk20a *g); + void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base); + void (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base); } ce2; struct { int (*init_fs_state)(struct gk20a *g); @@ -350,6 +351,7 @@ struct gpu_ops { int (*tsg_bind_channel)(struct tsg_gk20a *tsg, struct channel_gk20a *ch); int (*tsg_unbind_channel)(struct channel_gk20a *ch); + u32 (*eng_runlist_base_size)(void); } fifo; struct pmu_v { /*used for change of enum zbc update cmd id from ver 0 to ver1*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c index 0c94e246..6784cd11 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c @@ -50,13 +50,12 @@ static ssize_t elcg_enable_store(struct device *dev, if (val) { g->elcg_enabled = true; - gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); - gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO); } else { g->elcg_enabled = false; - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); } + gk20a_idle(g->dev); dev_info(dev, "ELCG is %s.\n", g->elcg_enabled ? "enabled" : diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 17307b63..fb777948 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -326,9 +326,12 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, bool gr_enabled; bool ctxsw_active; bool gr_busy; + u32 gr_engine_id; gk20a_dbg_fn(""); + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); + do { /* fmodel: host gets fifo_engine_status(gr) from gr only when gr_status is read */ @@ -338,7 +341,7 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, mc_enable_pgraph_enabled_f(); ctxsw_active = gk20a_readl(g, - fifo_engine_status_r(ENGINE_GR_GK20A)) & + fifo_engine_status_r(gr_engine_id)) & fifo_engine_status_ctxsw_in_progress_f(); gr_busy = gk20a_readl(g, gr_engine_status_r()) & @@ -3905,11 +3908,14 @@ int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) { struct fifo_gk20a *f = &g->fifo; - struct fifo_engine_info_gk20a *gr_info = - f->engine_info + ENGINE_GR_GK20A; + struct fifo_engine_info_gk20a *gr_info = NULL; unsigned long end_jiffies = jiffies + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); u32 ret; + u32 engine_id; + + engine_id = gk20a_fifo_get_gr_engine_id(g); + gr_info = (f->engine_info + engine_id); ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); if (ret) { @@ -4187,9 +4193,13 @@ int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *zbc_val) { struct fifo_gk20a *f = &g->fifo; - struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A; + struct fifo_engine_info_gk20a *gr_info = NULL; unsigned long end_jiffies; int ret; + u32 engine_id; + + engine_id = gk20a_fifo_get_gr_engine_id(g); + gr_info = (f->engine_info + engine_id); ret = gk20a_fifo_disable_engine_activity(g, gr_info, true); if (ret) { @@ -4308,6 +4318,29 @@ void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine) gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter); } +void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config) +{ + u32 engine_idx; + u32 active_engine_id = 0; + struct fifo_engine_info_gk20a *engine_info = NULL; + struct fifo_gk20a *f = &g->fifo; + + for (engine_idx = 0; engine_idx < f->num_engines; ++engine_idx) { + active_engine_id = f->active_engines_list[engine_idx]; + engine_info = &f->engine_info[active_engine_id]; + + /* gr_engine supports both BLCG and ELCG */ + if ((cgmode == BLCG_MODE) && + (engine_info->engine_enum == ENGINE_GR_GK20A)) { + gr_gk20a_init_blcg_mode(g, mode_config, active_engine_id); + break; + } else if (cgmode == ELCG_MODE) + gr_gk20a_init_elcg_mode(g, mode_config, active_engine_id); + else + gk20a_err(dev_from_gk20a(g), "invalid cg mode %d %d", cgmode, mode_config); + } +} + static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) { u32 gpc_index, gpc_tpc_count, gpc_zcull_count; @@ -4710,8 +4743,9 @@ static int gk20a_init_gr_prepare(struct gk20a *g) { u32 gpfifo_ctrl, pmc_en; u32 err = 0; - struct fifo_engine_info_gk20a *ce_info = - g->fifo.engine_info + ENGINE_CE2_GK20A; + u32 ce_reset_mask; + + ce_reset_mask = gk20a_fifo_get_all_ce_engine_reset_mask(g); /* disable fifo access */ pmc_en = gk20a_readl(g, mc_enable_r()); @@ -4725,12 +4759,12 @@ static int gk20a_init_gr_prepare(struct gk20a *g) gk20a_reset(g, mc_enable_pgraph_enabled_f() | mc_enable_blg_enabled_f() | mc_enable_perfmon_enabled_f() - | ce_info->reset_mask); + | ce_reset_mask); gr_gk20a_load_gating_prod(g); + /* Disable elcg until it gets enabled later in the init*/ - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); /* enable fifo access */ gk20a_writel(g, gr_gpfifo_ctl_r(), @@ -5210,11 +5244,9 @@ int gk20a_enable_gr_hw(struct gk20a *g) static void gr_gk20a_enable_elcg(struct gk20a *g) { if (g->elcg_enabled) { - gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A); - gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A); + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO); } else { - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A); - gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A); + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); } } @@ -6106,6 +6138,7 @@ int gk20a_gr_isr(struct gk20a *g) u32 gr_intr = gk20a_readl(g, gr_intr_r()); struct channel_gk20a *ch = NULL; int tsgid = NVGPU_INVALID_TSG_ID; + u32 gr_engine_id; gk20a_dbg_fn(""); gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); @@ -6113,6 +6146,8 @@ int gk20a_gr_isr(struct gk20a *g) if (!gr_intr) return 0; + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); + grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r()); grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1); grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1); @@ -6283,13 +6318,13 @@ int gk20a_gr_isr(struct gk20a *g) if (need_reset) { if (tsgid != NVGPU_INVALID_TSG_ID) - gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), + gk20a_fifo_recover(g, BIT(gr_engine_id), tsgid, true, true, true); else if (ch) - gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), + gk20a_fifo_recover(g, BIT(gr_engine_id), ch->hw_chid, false, true, true); else - gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), + gk20a_fifo_recover(g, BIT(gr_engine_id), 0, false, false, true); } @@ -8441,6 +8476,10 @@ static u32 *gr_gk20a_rop_l2_en_mask(struct gk20a *g) static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { + u32 gr_engine_id; + + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); + gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", gk20a_readl(g, gr_status_r())); gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", @@ -8460,7 +8499,7 @@ static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", gk20a_readl(g, gr_fecs_intr_r())); gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", - gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A))); + gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_activity_0_r())); gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index b5d97727..189994ef 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -81,6 +81,12 @@ enum { eUcodeHandshakeMethodFinished }; +enum { + ELCG_MODE = (1 << 0), + BLCG_MODE = (1 << 1), + INVALID_MODE = (1 << 2) +}; + enum { ELCG_RUN, /* clk always run, i.e. disable elcg */ ELCG_STOP, /* clk is stopped */ @@ -476,6 +482,8 @@ int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va); void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine); void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine); +void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config); + /* sm */ bool gk20a_gr_sm_debugger_attached(struct gk20a *g); void gk20a_gr_clear_sm_hww(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index 5112af55..bf58d298 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c @@ -99,6 +99,9 @@ static int gk20a_get_litter_value(struct gk20a *g, case GPU_LIT_ROP_SHARED_BASE: ret = proj_rop_shared_base_v(); break; + case GPU_LIT_HOST_NUM_ENGINES: + ret = proj_host_num_engines_v(); + break; case GPU_LIT_HOST_NUM_PBDMA: ret = proj_host_num_pbdma_v(); break; diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h index ce10db35..7dce9f91 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h @@ -106,6 +106,10 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void) { return 0x00001800; } +static inline u32 proj_host_num_engines_v(void) +{ + return 0x00000002; +} static inline u32 proj_host_num_pbdma_v(void) { return 0x00000001; diff --git a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h index 517cffef..d99e6135 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h @@ -118,6 +118,22 @@ static inline u32 top_device_info_type_enum_graphics_f(void) { return 0x0; } +static inline u32 top_device_info_type_enum_copy0_v(void) +{ + return 0x00000001; +} +static inline u32 top_device_info_type_enum_copy0_f(void) +{ + return 0x4; +} +static inline u32 top_device_info_type_enum_copy1_v(void) +{ + return 0x00000002; +} +static inline u32 top_device_info_type_enum_copy1_f(void) +{ + return 0x8; +} static inline u32 top_device_info_type_enum_copy2_v(void) { return 0x00000003; diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c index 70da7a02..57368235 100644 --- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c @@ -74,6 +74,9 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) { u32 mc_intr_0; int hw_irq_count; + u32 engine_id_idx; + u32 active_engine_id = 0; + u32 engine_enum = ENGINE_INVAL_GK20A; gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); @@ -84,11 +87,26 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); - if (mc_intr_0 & g->fifo.engine_info[ENGINE_GR_GK20A].intr_mask) - gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g)); - if (mc_intr_0 & g->fifo.engine_info[ENGINE_CE2_GK20A].intr_mask - && g->ops.ce2.isr_stall) - g->ops.ce2.isr_stall(g); + for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) { + active_engine_id = g->fifo.active_engines_list[engine_id_idx]; + + if (mc_intr_0 & g->fifo.engine_info[active_engine_id].intr_mask) { + engine_enum = g->fifo.engine_info[active_engine_id].engine_enum; + /* GR Engine */ + if (engine_enum == ENGINE_GR_GK20A) { + gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g)); + } + + /* CE Engine */ + if (((engine_enum == ENGINE_GRCE_GK20A) || + (engine_enum == ENGINE_ASYNC_CE_GK20A)) && + g->ops.ce2.isr_stall){ + g->ops.ce2.isr_stall(g, + g->fifo.engine_info[active_engine_id].inst_id, + g->fifo.engine_info[active_engine_id].pri_base); + } + } + } if (mc_intr_0 & mc_intr_0_pfifo_pending_f()) gk20a_fifo_isr(g); if (mc_intr_0 & mc_intr_0_pmu_pending_f()) @@ -120,6 +138,9 @@ irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) { u32 mc_intr_1; int hw_irq_count; + u32 engine_id_idx; + u32 active_engine_id = 0; + u32 engine_enum = ENGINE_INVAL_GK20A; gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); @@ -132,11 +153,27 @@ irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) gk20a_fifo_nonstall_isr(g); if (mc_intr_1 & mc_intr_0_priv_ring_pending_f()) gk20a_priv_ring_isr(g); - if (mc_intr_1 & g->fifo.engine_info[ENGINE_GR_GK20A].intr_mask) - gk20a_gr_nonstall_isr(g); - if (mc_intr_1 & g->fifo.engine_info[ENGINE_CE2_GK20A].intr_mask - && g->ops.ce2.isr_nonstall) - g->ops.ce2.isr_nonstall(g); + + for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) { + active_engine_id = g->fifo.active_engines_list[engine_id_idx]; + + if (mc_intr_1 & g->fifo.engine_info[active_engine_id].intr_mask) { + engine_enum = g->fifo.engine_info[active_engine_id].engine_enum; + /* GR Engine */ + if (engine_enum == ENGINE_GR_GK20A) { + gk20a_gr_nonstall_isr(g); + } + + /* CE Engine */ + if (((engine_enum == ENGINE_GRCE_GK20A) || + (engine_enum == ENGINE_ASYNC_CE_GK20A)) && + g->ops.ce2.isr_nonstall) { + g->ops.ce2.isr_nonstall(g, + g->fifo.engine_info[active_engine_id].inst_id, + g->fifo.engine_info[active_engine_id].pri_base); + } + } + } /* sync handled irq counter before re-enabling interrupts */ atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count); diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index bca57585..f6e2df00 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -3061,8 +3061,12 @@ int gk20a_init_pmu_bind_fecs(struct gk20a *g) struct pmu_cmd cmd; u32 desc; int err = 0; + u32 gr_engine_id; + gk20a_dbg_fn(""); + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); + memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + @@ -3070,7 +3074,7 @@ int gk20a_init_pmu_bind_fecs(struct gk20a *g) g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg, PMU_PG_CMD_ID_ENG_BUF_LOAD); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg, - ENGINE_GR_GK20A); + gr_engine_id); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg, PMU_PGENG_GR_BUFFER_IDX_FECS); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg, @@ -3095,6 +3099,9 @@ static void pmu_setup_hw_load_zbc(struct gk20a *g) struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 desc; + u32 gr_engine_id; + + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; @@ -3103,7 +3110,7 @@ static void pmu_setup_hw_load_zbc(struct gk20a *g) g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg, PMU_PG_CMD_ID_ENG_BUF_LOAD); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg, - ENGINE_GR_GK20A); + gr_engine_id); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg, PMU_PGENG_GR_BUFFER_IDX_ZBC); g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg, @@ -3288,20 +3295,23 @@ static int pmu_init_powergating(struct gk20a *g) struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq; + u32 gr_engine_id; gk20a_dbg_fn(""); + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); + if (tegra_cpu_is_asim()) { /* TBD: calculate threshold for silicon */ - gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A), + gk20a_writel(g, pwr_pmu_pg_idlefilth_r(gr_engine_id), PMU_PG_IDLE_THRESHOLD_SIM); - gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A), + gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(gr_engine_id), PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM); } else { /* TBD: calculate threshold for silicon */ - gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A), + gk20a_writel(g, pwr_pmu_pg_idlefilth_r(gr_engine_id), PMU_PG_IDLE_THRESHOLD); - gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A), + gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(gr_engine_id), PMU_PG_POST_POWERUP_IDLE_THRESHOLD); } @@ -3316,7 +3326,7 @@ static int pmu_init_powergating(struct gk20a *g) cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; - cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT"); @@ -3329,7 +3339,7 @@ static int pmu_init_powergating(struct gk20a *g) cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat); cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT; - cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.stat.engine_id = gr_engine_id; cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM; cmd.cmd.pg.stat.data = 0; @@ -3344,7 +3354,7 @@ static int pmu_init_powergating(struct gk20a *g) cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; - cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW"); @@ -4429,14 +4439,17 @@ static int gk20a_pmu_enable_elpg_locked(struct gk20a *g) struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; u32 seq, status; + u32 gr_engine_id; gk20a_dbg_fn(""); + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); + memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; - cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW; /* no need to wait ack for ELPG enable but set pending to sync @@ -4503,9 +4516,12 @@ int gk20a_pmu_disable_elpg(struct gk20a *g) struct pmu_cmd cmd; u32 seq; int ret = 0; + u32 gr_engine_id; gk20a_dbg_fn(""); + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); + if (!support_gk20a_pmu(g->dev)) return ret; @@ -4553,7 +4569,7 @@ int gk20a_pmu_disable_elpg(struct gk20a *g) cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; - cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A; + cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id; cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING; diff --git a/drivers/gpu/nvgpu/gm206/ce_gm206.c b/drivers/gpu/nvgpu/gm206/ce_gm206.c new file mode 100644 index 00000000..9ec42831 --- /dev/null +++ b/drivers/gpu/nvgpu/gm206/ce_gm206.c @@ -0,0 +1,107 @@ +/* + * GM206 Copy Engine. + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. + */ + +/*TODO: remove uncecessary */ +#include "gk20a/gk20a.h" +#include "ce_gm206.h" + +/*TODO: remove uncecessary */ +#include +#include +#include +#include +#include +#include + +#include "gk20a/debug_gk20a.h" +#include "gk20a/semaphore_gk20a.h" +#include "hw_ce2_gm206.h" +#include "hw_pbdma_gm206.h" +#include "hw_ccsr_gm206.h" +#include "hw_ram_gm206.h" +#include "hw_top_gm206.h" +#include "hw_mc_gm206.h" +#include "hw_gr_gm206.h" + +/* TODO: We need generic way for query the intr_status register offset. + * As of now, there is no way to query this information from dev_ceN_pri.h */ +#define COP_INTR_STATUS_OFFSET 0x908 + +static u32 ce_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr, u32 inst_id) +{ + gk20a_dbg(gpu_dbg_intr, "ce non-blocking pipe interrupt\n"); + + return ce2_intr_status_nonblockpipe_pending_f(); +} + +static u32 ce_blockpipe_isr(struct gk20a *g, u32 fifo_intr, u32 inst_id) +{ + gk20a_dbg(gpu_dbg_intr, "ce blocking pipe interrupt\n"); + + return ce2_intr_status_blockpipe_pending_f(); +} + +static u32 ce_launcherr_isr(struct gk20a *g, u32 fifo_intr, u32 inst_id) +{ + gk20a_dbg(gpu_dbg_intr, "ce launch error interrupt\n"); + + return ce2_intr_status_launcherr_pending_f(); +} + +void gm206_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base) +{ + u32 ce_intr_status_reg = (pri_base + COP_INTR_STATUS_OFFSET); + u32 ce_intr = gk20a_readl(g, ce_intr_status_reg); + u32 clear_intr = 0; + + gk20a_dbg(gpu_dbg_intr, "ce isr %08x %08x\n", ce_intr, inst_id); + + /* clear blocking interrupts: they exibit broken behavior */ + if (ce_intr & ce2_intr_status_blockpipe_pending_f()) + clear_intr |= ce_blockpipe_isr(g, ce_intr, inst_id); + + if (ce_intr & ce2_intr_status_launcherr_pending_f()) + clear_intr |= ce_launcherr_isr(g, ce_intr, inst_id); + + gk20a_writel(g, ce_intr_status_reg, clear_intr); + return; +} + +void gm206_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) +{ + u32 ce_intr_status_reg = (pri_base + COP_INTR_STATUS_OFFSET); + u32 ce_intr = gk20a_readl(g, ce_intr_status_reg); + + gk20a_dbg(gpu_dbg_intr, "ce nonstall isr %08x %08x\n", ce_intr, inst_id); + + if (ce_intr & ce2_intr_status_nonblockpipe_pending_f()) { + gk20a_writel(g, ce_intr_status_reg, + ce_nonblockpipe_isr(g, ce_intr, inst_id)); + + /* wake threads waiting in this channel */ + gk20a_channel_semaphore_wakeup(g, true); + } + + return; +} + +void gm206_init_ce(struct gpu_ops *gops) +{ + gops->ce2.isr_stall = gm206_ce_isr; + gops->ce2.isr_nonstall = gm206_ce_nonstall_isr; +} diff --git a/drivers/gpu/nvgpu/gm206/ce_gm206.h b/drivers/gpu/nvgpu/gm206/ce_gm206.h new file mode 100644 index 00000000..c2fb1586 --- /dev/null +++ b/drivers/gpu/nvgpu/gm206/ce_gm206.h @@ -0,0 +1,26 @@ +/* + * GM206 copy engine. + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. + */ +#ifndef __CE_GM206_H__ +#define __CE_GM206_H__ + +#include "gk20a/channel_gk20a.h" +#include "gk20a/tsg_gk20a.h" + +void gm206_init_ce(struct gpu_ops *gops); + +#endif /*__CE2_GM20B_H__*/ diff --git a/drivers/gpu/nvgpu/gm206/fifo_gm206.c b/drivers/gpu/nvgpu/gm206/fifo_gm206.c index c78f256c..6caf30f4 100644 --- a/drivers/gpu/nvgpu/gm206/fifo_gm206.c +++ b/drivers/gpu/nvgpu/gm206/fifo_gm206.c @@ -18,6 +18,7 @@ #include "gm20b/fifo_gm20b.h" #include "fifo_gm206.h" #include "hw_ccsr_gm206.h" +#include "hw_fifo_gm206.h" static u32 gm206_fifo_get_num_fifos(struct gk20a *g) { @@ -28,4 +29,5 @@ void gm206_init_fifo(struct gpu_ops *gops) { gm20b_init_fifo(gops); gops->fifo.get_num_fifos = gm206_fifo_get_num_fifos; + gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v; } diff --git a/drivers/gpu/nvgpu/gm206/hal_gm206.c b/drivers/gpu/nvgpu/gm206/hal_gm206.c index 048a109f..6b5c70e2 100644 --- a/drivers/gpu/nvgpu/gm206/hal_gm206.c +++ b/drivers/gpu/nvgpu/gm206/hal_gm206.c @@ -21,7 +21,7 @@ #include "gm20b/mc_gm20b.h" #include "gm20b/ltc_gm20b.h" #include "gm20b/mm_gm20b.h" -#include "gm20b/ce2_gm20b.h" +#include "ce_gm206.h" #include "gm20b/fb_gm20b.h" #include "gm20b/pmu_gm20b.h" #include "gm20b/gr_gm20b.h" @@ -142,6 +142,9 @@ static int gm206_get_litter_value(struct gk20a *g, case GPU_LIT_ROP_SHARED_BASE: ret = proj_rop_shared_base_v(); break; + case GPU_LIT_HOST_NUM_ENGINES: + ret = proj_host_num_engines_v(); + break; case GPU_LIT_HOST_NUM_PBDMA: ret = proj_host_num_pbdma_v(); break; @@ -183,7 +186,7 @@ int gm206_init_hal(struct gk20a *g) gm20b_init_fb(gops); g->ops.fb.set_use_full_comp_tag_line = NULL; gm206_init_fifo(gops); - gm20b_init_ce2(gops); + gm206_init_ce(gops); gm20b_init_gr_ctx(gops); gm20b_init_mm(gops); gm206_init_pmu_ops(gops); diff --git a/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h b/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h index 2f4187d0..6c21b39a 100644 --- a/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h +++ b/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h @@ -106,6 +106,10 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void) { return 0x00001800; } +static inline u32 proj_host_num_engines_v(void) +{ + return 0x00000008; +} static inline u32 proj_host_num_pbdma_v(void) { return 0x00000003; diff --git a/drivers/gpu/nvgpu/gm206/hw_top_gm206.h b/drivers/gpu/nvgpu/gm206/hw_top_gm206.h index e6ec1d27..988f24ea 100644 --- a/drivers/gpu/nvgpu/gm206/hw_top_gm206.h +++ b/drivers/gpu/nvgpu/gm206/hw_top_gm206.h @@ -146,6 +146,22 @@ static inline u32 top_device_info_type_enum_copy0_f(void) { return 0x4; } +static inline u32 top_device_info_type_enum_copy1_v(void) +{ + return 0x00000002; +} +static inline u32 top_device_info_type_enum_copy1_f(void) +{ + return 0x8; +} +static inline u32 top_device_info_type_enum_copy2_v(void) +{ + return 0x00000003; +} +static inline u32 top_device_info_type_enum_copy2_f(void) +{ + return 0xc; +} static inline u32 top_device_info_entry_v(u32 r) { return (r >> 0) & 0x3; diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index bb873bcc..031c5bae 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -49,12 +49,14 @@ static void channel_gm20b_bind(struct channel_gk20a *c) static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id) { u32 fault_id = ~0; + struct fifo_engine_info_gk20a *engine_info; - if (engine_id < ENGINE_INVAL_GK20A) { - struct fifo_engine_info_gk20a *info = - &g->fifo.engine_info[engine_id]; + engine_info = gk20a_fifo_get_engine_info(g, engine_id); - fault_id = info->fault_id; + if (engine_info) { + fault_id = engine_info->fault_id; + } else { + gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id); } return fault_id; } @@ -72,7 +74,7 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g, for_each_set_bit(engine_id, &engine_ids, 32) { u32 engine_mmu_fault_id; - if (engine_id > g->fifo.max_engines) { + if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) { gk20a_err(dev_from_gk20a(g), "faulting unknown engine %ld", engine_id); } else { @@ -109,7 +111,7 @@ static u32 gm20b_fifo_get_num_fifos(struct gk20a *g) return ccsr_channel__size_1_v(); } -void gm20b_device_info_data_parse(struct gk20a *g, +static void gm20b_device_info_data_parse(struct gk20a *g, u32 table_entry, u32 *inst_id, u32 *pri_base, u32 *fault_id) { @@ -152,4 +154,5 @@ void gm20b_init_fifo(struct gpu_ops *gops) gops->fifo.force_reset_ch = gk20a_fifo_force_reset_ch; gops->fifo.engine_enum_from_type = gk20a_fifo_engine_enum_from_type; gops->fifo.device_info_data_parse = gm20b_device_info_data_parse; + gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v; } diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index d5131b7a..cc709c78 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -855,6 +855,9 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { struct gr_gk20a *gr = &g->gr; + u32 gr_engine_id; + + gr_engine_id = gk20a_fifo_get_gr_engine_id(g); gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", gk20a_readl(g, gr_status_r())); @@ -875,7 +878,7 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g, gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", gk20a_readl(g, gr_fecs_intr_r())); gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", - gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A))); + gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", gk20a_readl(g, gr_activity_0_r())); gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index c0113498..cbd3f50b 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -140,6 +140,9 @@ static int gm20b_get_litter_value(struct gk20a *g, case GPU_LIT_ROP_SHARED_BASE: ret = proj_rop_shared_base_v(); break; + case GPU_LIT_HOST_NUM_ENGINES: + ret = proj_host_num_engines_v(); + break; case GPU_LIT_HOST_NUM_PBDMA: ret = proj_host_num_pbdma_v(); break; diff --git a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h index b837918c..d4d412e1 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h @@ -106,6 +106,10 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void) { return 0x00001800; } +static inline u32 proj_host_num_engines_v(void) +{ + return 0x00000002; +} static inline u32 proj_host_num_pbdma_v(void) { return 0x00000001; diff --git a/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h index c70f388c..cca37294 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h @@ -138,6 +138,22 @@ static inline u32 top_device_info_type_enum_graphics_f(void) { return 0x0; } +static inline u32 top_device_info_type_enum_copy0_v(void) +{ + return 0x00000001; +} +static inline u32 top_device_info_type_enum_copy0_f(void) +{ + return 0x4; +} +static inline u32 top_device_info_type_enum_copy1_v(void) +{ + return 0x00000002; +} +static inline u32 top_device_info_type_enum_copy1_f(void) +{ + return 0x8; +} static inline u32 top_device_info_type_enum_copy2_v(void) { return 0x00000003; diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 0895b757..3e89e81f 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -163,60 +163,52 @@ static int init_engine_info(struct fifo_gk20a *f) static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) { - struct fifo_engine_info_gk20a *engine_info; struct fifo_runlist_info_gk20a *runlist; struct device *d = dev_from_gk20a(g); - u32 runlist_id; + s32 runlist_id = -1; u32 i; u64 runlist_size; gk20a_dbg_fn(""); - f->max_runlists = fifo_eng_runlist_base__size_1_v(); + f->max_runlists = g->ops.fifo.eng_runlist_base_size(); f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) * f->max_runlists, GFP_KERNEL); if (!f->runlist_info) - goto clean_up; + goto clean_up_runlist; - engine_info = f->engine_info + ENGINE_GR_GK20A; - runlist_id = engine_info->runlist_id; - runlist = &f->runlist_info[runlist_id]; + memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) * + f->max_runlists)); + + for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) { + runlist = &f->runlist_info[runlist_id]; - runlist->active_channels = - kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), - GFP_KERNEL); - if (!runlist->active_channels) - goto clean_up_runlist_info; - - runlist_size = sizeof(u16) * f->num_channels; - for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { - int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); - if (err) { - dev_err(d, "memory allocation failed\n"); + runlist->active_channels = + kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), + GFP_KERNEL); + if (!runlist->active_channels) goto clean_up_runlist; - } - } - mutex_init(&runlist->mutex); - /* None of buffers is pinned if this value doesn't change. - Otherwise, one of them (cur_buffer) must have been pinned. */ - runlist->cur_buffer = MAX_RUNLIST_BUFFERS; + runlist_size = sizeof(u16) * f->num_channels; + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { + int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); + if (err) { + dev_err(d, "memory allocation failed\n"); + goto clean_up_runlist; + } + } + mutex_init(&runlist->mutex); + + /* None of buffers is pinned if this value doesn't change. + Otherwise, one of them (cur_buffer) must have been pinned. */ + runlist->cur_buffer = MAX_RUNLIST_BUFFERS; + } gk20a_dbg_fn("done"); return 0; clean_up_runlist: - for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) - gk20a_gmmu_free(g, &runlist->mem[i]); - -clean_up_runlist_info: - kfree(runlist->active_channels); - runlist->active_channels = NULL; - - kfree(f->runlist_info); - f->runlist_info = NULL; - -clean_up: + gk20a_fifo_delete_runlist(f); gk20a_dbg_fn("fail"); return -ENOMEM; } @@ -243,7 +235,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) if (err) return -ENXIO; - f->max_engines = ENGINE_INVAL_GK20A; + f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES); f->userd_entry_size = 1 << ram_userd_base_shift_v(); -- cgit v1.2.2