From e14fdcd8f1f4125da697433b1744b1e4e4f15b09 Mon Sep 17 00:00:00 2001 From: Vaibhav Kachore Date: Fri, 6 Jul 2018 15:10:03 +0530 Subject: gpu: nvgpu: enable HWPM Mode-E context switch - Write new pm mode to context buffer header. Ucode use this mode to enable mode-e context switch. This is Mode-B context switch of PMs with Mode-E streamout on one context. If this mode is set, Ucode makes sure that Mode-E pipe (perfmons, routers, pma) is idle before it context switches PMs. - This allows us to collect counters in a secure way (i.e. on context basis) with stream out. Bug 2106999 Change-Id: I5a7435f09d1bf053ca428e538b0a57f3a175ac37 Signed-off-by: Vaibhav Kachore Reviewed-on: https://git-master.nvidia.com/r/1760366 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h | 9 +++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 3 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 46 ++++++++++++++++------ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 2 +- drivers/gpu/nvgpu/gv100/gr_gv100.c | 6 +++ drivers/gpu/nvgpu/gv100/gr_gv100.h | 1 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 + .../include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h | 6 ++- drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h | 1 + drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 24 ++++++++++- drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 43 ++++++++++++++------ drivers/gpu/nvgpu/vgpu/gr_vgpu.h | 2 +- 12 files changed, 116 insertions(+), 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h index d9d07844..50002557 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h @@ -135,4 +135,13 @@ int gk20a_perfbuf_disable_locked(struct gk20a *g); void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s); u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s, bool mode); + + /* PM Context Switch Mode */ +/*This mode says that the pms are not to be context switched. */ +#define NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000) +/* This mode says that the pms in Mode-B are to be context switched */ +#define NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW (0x00000001) +/* This mode says that the pms in Mode-E (stream out) are to be context switched. */ +#define NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002) + #endif /* DBG_GPU_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 23ed2f15..ce0a6563 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -307,10 +307,11 @@ struct gpu_ops { int (*update_smpc_ctxsw_mode)(struct gk20a *g, struct channel_gk20a *c, bool enable); + u32 (*get_hw_accessor_stream_out_mode)(void); int (*update_hwpm_ctxsw_mode)(struct gk20a *g, struct channel_gk20a *c, u64 gpu_va, - bool enable); + u32 mode); int (*dump_gr_regs)(struct gk20a *g, struct gk20a_debug_output *o); int (*update_pc_sampling)(struct channel_gk20a *ch, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 7a65f353..3f49fbf7 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1684,14 +1684,14 @@ out: int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, struct channel_gk20a *c, u64 gpu_va, - bool enable_hwpm_ctxsw) + u32 mode) { struct tsg_gk20a *tsg; struct nvgpu_mem *gr_mem = NULL; struct nvgpu_gr_ctx *gr_ctx; struct pm_ctx_desc *pm_ctx; u32 data; - u64 virt_addr; + u64 virt_addr = 0; struct ctx_header_desc *ctx = &c->ctx_header; struct nvgpu_mem *ctxheader = &ctx->mem; int ret; @@ -1710,12 +1710,31 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, return -EFAULT; } - if (enable_hwpm_ctxsw) { - if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) + if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && + (!g->ops.gr.get_hw_accessor_stream_out_mode)) { + nvgpu_err(g, "Mode-E hwpm context switch mode is not supported"); + return -EINVAL; + } + + switch (mode) { + case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW: + if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { return 0; - } else { - if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) + } + break; + case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW: + if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) { + return 0; + } + break; + case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: + if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) { return 0; + } + break; + default: + nvgpu_err(g, "invalid hwpm context switch mode"); + return -EINVAL; } ret = gk20a_disable_channel_tsg(g, c); @@ -1735,7 +1754,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, Flush and invalidate before cpu update. */ g->ops.mm.l2_flush(g, true); - if (enable_hwpm_ctxsw) { + if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { /* Allocate buffer if necessary */ if (pm_ctx->mem.gpu_va == 0) { ret = nvgpu_dma_alloc_sys(g, @@ -1768,11 +1787,16 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); data = data & ~ctxsw_prog_main_image_pm_mode_m(); - if (enable_hwpm_ctxsw) { + switch (mode) { + case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW: pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f(); - virt_addr = pm_ctx->mem.gpu_va; - } else { + break; + case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: + pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode(); + virt_addr = pm_ctx->mem.gpu_va; + break; + case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW: pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); virt_addr = 0; } @@ -1892,7 +1916,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, * for PM context switching, including mode and possibly a pointer to * the PM backing store. */ - if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { + if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) { if (gr_ctx->pm_ctx.mem.gpu_va == 0) { nvgpu_err(g, "context switched pm with no pm buffer!"); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index a77136a6..92e1dff5 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -655,7 +655,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, struct channel_gk20a *c, u64 gpu_va, - bool enable_hwpm_ctxsw); + u32 mode); struct nvgpu_gr_ctx; void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 680bcba3..b1b1cee6 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -38,6 +38,7 @@ #include #include #include +#include /* @@ -453,3 +454,8 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, } } } + +u32 gr_gv100_get_hw_accessor_stream_out_mode() +{ + return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); +} diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 821659aa..457bd701 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -47,4 +47,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, u32 num_fbpas, u32 *priv_addr_table, u32 *t); void gr_gv100_init_gpc_mmu(struct gk20a *g); +u32 gr_gv100_get_hw_accessor_stream_out_mode(void); #endif diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 07c71a04..c84778dd 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -358,6 +358,8 @@ static const struct gpu_ops gv100_ops = { .enable_exceptions = gr_gv11b_enable_exceptions, .get_lrf_tex_ltc_dram_override = get_ecc_override_val, .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, + .get_hw_accessor_stream_out_mode = + gr_gv100_get_hw_accessor_stream_out_mode, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, .record_sm_error_state = gv11b_gr_record_sm_error_state, .update_sm_error_state = gv11b_gr_update_sm_error_state, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h index cd792835..b7f3df24 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -144,6 +144,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) { return 0x0U; } +static inline u32 ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(void) +{ + return 0x2U; +} static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void) { return 0x7U << 3U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h index 6d05e44c..1eb26303 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h @@ -356,6 +356,7 @@ struct tegra_vgpu_fecs_trace_filter { enum { TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0, TEGRA_VGPU_CTXSW_MODE_CTXSW, + TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW, }; enum { diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 9c7608e9..d530492a 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -1039,12 +1039,33 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, return err; } +/* + * Convert linux hwpm ctxsw mode type of the form of NVGPU_DBG_GPU_HWPM_CTXSW_MODE_* + * into common hwpm ctxsw mode type of the form of NVGPU_DBG_HWPM_CTXSW_MODE_* + */ + +static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) +{ + switch (mode){ + case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: + return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; + case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW: + return NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW; + case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: + return NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW; + } + + return mode; +} + + static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) { int err; struct gk20a *g = dbg_s->g; struct channel_gk20a *ch_gk20a; + u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode); nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode); @@ -1080,7 +1101,8 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, goto clean_up; } err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0, - args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW); + mode); + if (err) nvgpu_err(g, "error (%d) during pm ctxsw mode update", err); diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 83d27f17..0077c537 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -1066,7 +1066,7 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, } int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, - struct channel_gk20a *ch, u64 gpu_va, bool enable) + struct channel_gk20a *ch, u64 gpu_va, u32 mode) { struct tsg_gk20a *tsg; struct nvgpu_gr_ctx *ch_ctx; @@ -1089,16 +1089,33 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, ch_ctx = &tsg->gr_ctx; pm_ctx = &ch_ctx->pm_ctx; - if (enable) { + if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) { /* * send command to enable HWPM only once - otherwise server * will return an error due to using the same GPU VA twice. */ - if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) - return 0; + if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { + return 0; + } p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; + } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { + if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) { + return 0; + } + p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; + } else if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && + (g->ops.gr.get_hw_accessor_stream_out_mode)){ + if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) { + return 0; + } + p->mode = TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW; + } else { + nvgpu_err(g, "invalid hwpm context switch mode"); + return -EINVAL; + } + if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { /* Allocate buffer if necessary */ if (pm_ctx->mem.gpu_va == 0) { pm_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch->vm, @@ -1109,11 +1126,6 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, return -ENOMEM; pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; } - } else { - if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) - return 0; - - p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; } msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; @@ -1124,10 +1136,15 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); err = err ? err : msg.ret; - if (!err) - pm_ctx->pm_mode = enable ? - ctxsw_prog_main_image_pm_mode_ctxsw_f() : - ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + if (!err) { + if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) { + pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f(); + } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { + pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + } else { + pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode(); + } + } return err; } diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/vgpu/gr_vgpu.h index c4b3944e..149bd17a 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.h +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.h @@ -58,7 +58,7 @@ int vgpu_gr_set_sm_debug_mode(struct gk20a *g, int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, struct channel_gk20a *ch, bool enable); int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, - struct channel_gk20a *ch, u64 gpu_va, bool enable); + struct channel_gk20a *ch, u64 gpu_va, u32 mode); int vgpu_gr_clear_sm_error_state(struct gk20a *g, struct channel_gk20a *ch, u32 sm_id); int vgpu_gr_suspend_contexts(struct gk20a *g, -- cgit v1.2.2