From 0ac3ba2a99b745f577c752ebf9a6b4291730a36d Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Tue, 2 Jan 2018 15:48:46 -0800 Subject: gpu: nvgpu: gv11b: fix for gfx preemption Used chip specific attrib_cb_gfxp_default_size and attrib_cb_gfxp_size buffer sizes during committing global callback buffer when gfx preemption is requested. These sizes are different for gv11b from gp10b. For gp10b used smaller buffer sizes than specified value in hw manuals as per sw requirement. Also used gv11b specific preemption related functions: gr_gv11b_set_ctxsw_preemption_mode gr_gv11b_update_ctxsw_preemption_mode This is required because preemption related buffer sizes are different for gv11b from gp10b. More optimization will be done as part of NVGPU-484. Another issue fixed is: gpu va for preemption buffers still needs to be 8 bit aligned, even though 49 bits available now. This done because of legacy implementation of fecs ucode. Bug 1976694 Change-Id: I2dc923340d34d0dc5fe45419200d0cf4f53cdb23 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/1635027 GVS: Gerrit_Virtual_Submit Reviewed-by: Richard Zhao Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 +- drivers/gpu/nvgpu/gp106/gr_gp106.c | 6 +- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 18 ++- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 248 +++++++++++++++++++++++++++++++++++- drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 13 +- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 6 +- 6 files changed, 281 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index aeb26a6c..b6d5c14b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -1,7 +1,7 @@ /* * GK20A Graphics Engine * - * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -362,6 +362,8 @@ struct gr_gk20a { u32 bundle_cb_token_limit; u32 attrib_cb_default_size; u32 attrib_cb_size; + u32 attrib_cb_gfxp_default_size; + u32 attrib_cb_gfxp_size; u32 alpha_cb_default_size; u32 alpha_cb_size; u32 timeslice_mode; diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index af08aa6c..bedc0b78 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c @@ -1,7 +1,7 @@ /* * GP106 GPU GR * - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -128,6 +128,10 @@ void gr_gp106_cb_size_default(struct gk20a *g) gr->attrib_cb_default_size = 0x800; gr->alpha_cb_default_size = gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); + gr->attrib_cb_gfxp_default_size = + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); + gr->attrib_cb_gfxp_size = + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); } int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index b5194223..9a7f4f97 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B GPU GR * - * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -406,12 +406,8 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, gk20a_dbg_fn(""); if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { - attrib_size_in_chunk = gr->attrib_cb_default_size + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); - cb_attrib_cache_size_init = gr->attrib_cb_default_size + - (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - - gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + attrib_size_in_chunk = gr->attrib_cb_gfxp_size; + cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size; } else { attrib_size_in_chunk = gr->attrib_cb_size; cb_attrib_cache_size_init = gr->attrib_cb_default_size; @@ -738,6 +734,14 @@ void gr_gp10b_cb_size_default(struct gk20a *g) gr->attrib_cb_default_size = 0x800; gr->alpha_cb_default_size = gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); + gr->attrib_cb_gfxp_default_size = + gr->attrib_cb_default_size + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + gr->attrib_cb_gfxp_size = + gr->attrib_cb_default_size + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); } void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index e00277f0..f369e12e 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1,7 +1,7 @@ /* * GV11b GPU GR * - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -1224,6 +1224,10 @@ void gr_gv11b_cb_size_default(struct gk20a *g) gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); gr->alpha_cb_default_size = gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); + gr->attrib_cb_gfxp_default_size = + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); + gr->attrib_cb_gfxp_size = + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); } void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) @@ -1368,6 +1372,245 @@ fail_free: return err; } +int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, + struct gr_ctx_desc *gr_ctx, + struct vm_gk20a *vm, u32 class, + u32 graphics_preempt_mode, + u32 compute_preempt_mode) +{ + int err = 0; + + if (g->ops.gr.is_valid_gfx_class(g, class) && + g->gr.ctx_vars.force_preemption_gfxp) + graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + + if (g->ops.gr.is_valid_compute_class(g, class) && + g->gr.ctx_vars.force_preemption_cilp) + compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + + /* check for invalid combinations */ + if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0)) + return -EINVAL; + + if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) && + (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP)) + return -EINVAL; + + /* Do not allow lower preemption modes than current ones */ + if (graphics_preempt_mode && + (graphics_preempt_mode < gr_ctx->graphics_preempt_mode)) + return -EINVAL; + + if (compute_preempt_mode && + (compute_preempt_mode < gr_ctx->compute_preempt_mode)) + return -EINVAL; + + /* set preemption modes */ + switch (graphics_preempt_mode) { + case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: + { + u32 spill_size = + gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() * + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + u32 pagepool_size = g->ops.gr.pagepool_default_size(g) * + gr_scc_pagepool_total_pages_byte_granularity_v(); + u32 betacb_size = g->gr.attrib_cb_default_size + + (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() - + gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v()); + u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) * + gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * + g->gr.max_tpc_count; + attrib_cb_size = ALIGN(attrib_cb_size, 128); + + gk20a_dbg_info("gfxp context spill_size=%d", spill_size); + gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size); + gk20a_dbg_info("gfxp context attrib_cb_size=%d", + attrib_cb_size); + + err = gr_gp10b_alloc_buffer(vm, + g->gr.ctx_vars.preempt_image_size, + &gr_ctx->preempt_ctxsw_buffer); + if (err) { + nvgpu_err(g, "cannot allocate preempt buffer"); + goto fail; + } + + err = gr_gp10b_alloc_buffer(vm, + spill_size, + &gr_ctx->spill_ctxsw_buffer); + if (err) { + nvgpu_err(g, "cannot allocate spill buffer"); + goto fail_free_preempt; + } + + err = gr_gp10b_alloc_buffer(vm, + attrib_cb_size, + &gr_ctx->betacb_ctxsw_buffer); + if (err) { + nvgpu_err(g, "cannot allocate beta buffer"); + goto fail_free_spill; + } + + err = gr_gp10b_alloc_buffer(vm, + pagepool_size, + &gr_ctx->pagepool_ctxsw_buffer); + if (err) { + nvgpu_err(g, "cannot allocate page pool"); + goto fail_free_betacb; + } + + gr_ctx->graphics_preempt_mode = graphics_preempt_mode; + break; + } + + case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: + gr_ctx->graphics_preempt_mode = graphics_preempt_mode; + break; + + default: + break; + } + + if (g->ops.gr.is_valid_compute_class(g, class) || + g->ops.gr.is_valid_gfx_class(g, class)) { + switch (compute_preempt_mode) { + case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: + case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: + case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: + gr_ctx->compute_preempt_mode = compute_preempt_mode; + break; + default: + break; + } + } + + return 0; + +fail_free_betacb: + nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); +fail_free_spill: + nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); +fail_free_preempt: + nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); +fail: + return err; +} + +void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_mem *mem) +{ + struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + struct ctx_header_desc *ctx = &ch_ctx->ctx_header; + struct nvgpu_mem *ctxheader = &ctx->mem; + + u32 gfxp_preempt_option = + ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); + u32 cilp_preempt_option = + ctxsw_prog_main_image_compute_preemption_options_control_cilp_f(); + u32 cta_preempt_option = + ctxsw_prog_main_image_compute_preemption_options_control_cta_f(); + int err; + + gk20a_dbg_fn(""); + + if (gr_ctx->graphics_preempt_mode == + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { + gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_graphics_preemption_options_o(), + gfxp_preempt_option); + } + + if (gr_ctx->compute_preempt_mode == + NVGPU_PREEMPTION_MODE_COMPUTE_CILP) { + gk20a_dbg_info("CILP: %x", cilp_preempt_option); + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_compute_preemption_options_o(), + cilp_preempt_option); + } + + if (gr_ctx->compute_preempt_mode == + NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { + gk20a_dbg_info("CTA: %x", cta_preempt_option); + nvgpu_mem_wr(g, mem, + ctxsw_prog_main_image_compute_preemption_options_o(), + cta_preempt_option); + } + + if (gr_ctx->preempt_ctxsw_buffer.gpu_va) { + u32 addr; + u32 size; + u32 cbes_reserve; + + if (g->ops.gr.set_preemption_buffer_va) { + if (ctxheader->gpu_va) + g->ops.gr.set_preemption_buffer_va(g, ctxheader, + gr_ctx->preempt_ctxsw_buffer.gpu_va); + else + g->ops.gr.set_preemption_buffer_va(g, mem, + gr_ctx->preempt_ctxsw_buffer.gpu_va); + } + + err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); + if (err) { + nvgpu_err(g, "can't map patch context"); + goto out; + } + + addr = (u64_lo32(gr_ctx->betacb_ctxsw_buffer.gpu_va) >> + gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | + (u64_hi32(gr_ctx->betacb_ctxsw_buffer.gpu_va) << + (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); + + gk20a_dbg_info("attrib cb addr : 0x%016x", addr); + g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); + + addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> + gr_scc_pagepool_base_addr_39_8_align_bits_v()) | + (u64_hi32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) << + (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); + size = gr_ctx->pagepool_ctxsw_buffer.size; + + if (size == g->ops.gr.pagepool_default_size(g)) + size = gr_scc_pagepool_total_pages_hwmax_v(); + + g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); + + addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> + gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | + (u64_hi32(gr_ctx->spill_ctxsw_buffer.gpu_va) << + (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v())); + size = gr_ctx->spill_ctxsw_buffer.size / + gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); + + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_swdx_rm_spill_buffer_addr_r(), + gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), + true); + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpc0_swdx_rm_spill_buffer_size_r(), + gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), + true); + + cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpcs_swdx_beta_cb_ctrl_r(), + gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( + cbes_reserve), + true); + gr_gk20a_ctx_patch_write(g, ch_ctx, + gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), + gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( + cbes_reserve), + true); + + gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); + } + +out: + gk20a_dbg_fn("done"); +} static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g, struct gk20a_debug_output *o, u32 gpc, u32 tpc, u32 sm, u32 offset) @@ -2382,6 +2625,9 @@ void gr_gv11b_set_preemption_buffer_va(struct gk20a *g, { u32 addr_lo, addr_hi; + /* gpu va still needs to be 8 bit aligned */ + gpu_va = gpu_va >> 8; + addr_lo = u64_lo32(gpu_va); addr_hi = u64_hi32(gpu_va); diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 39d12b3f..17e5e9e3 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -1,7 +1,7 @@ /* * GV11B GPU GR * - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -46,6 +46,7 @@ struct zbc_query_params; struct channel_ctx_gk20a; struct nvgpu_warpstate; struct nvgpu_gr_sm_error_state; +struct gr_ctx_desc; struct gr_gk20a_isr_data; struct gk20a_debug_output; @@ -218,4 +219,14 @@ void gr_gv11b_init_gfxp_wfi_timeout_count(struct gk20a *g); unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g); void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g); +int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, + struct gr_ctx_desc *gr_ctx, + struct vm_gk20a *vm, u32 class, + u32 graphics_preempt_mode, + u32 compute_preempt_mode); + +void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + struct nvgpu_mem *mem); + #endif diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index ff26f694..9156d9b8 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -1,7 +1,7 @@ /* * GV11B Tegra HAL interface * - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -290,7 +290,7 @@ static const struct gpu_ops gv11b_ops = { .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx, .free_gr_ctx = gr_gp10b_free_gr_ctx, .update_ctxsw_preemption_mode = - gr_gp10b_update_ctxsw_preemption_mode, + gr_gv11b_update_ctxsw_preemption_mode, .dump_gr_regs = gr_gv11b_dump_gr_status_regs, .update_pc_sampling = gr_gm20b_update_pc_sampling, .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, @@ -365,7 +365,7 @@ static const struct gpu_ops gv11b_ops = { .update_boosted_ctx = gr_gp10b_update_boosted_ctx, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .create_gr_sysfs = gr_gv11b_create_sysfs, - .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, + .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, .handle_tpc_mpc_exception = gr_gv11b_handle_tpc_mpc_exception, -- cgit v1.2.2