From 39a9e251da0fb4da8512593d3ce4f6eba47d5e0c Mon Sep 17 00:00:00 2001 From: Peter Boonstoppel Date: Tue, 2 May 2017 12:09:40 -0700 Subject: gpu: nvgpu: Add czf_bypass sysfs node for gp10b This change adds a new sysfs node to allow configuring CZF_BYPASS, to enable platforms with low context-switching latency requirements. /sys/devices/17000000.gp10b/czf_bypass Values: 0 - always 1 - lateZ (default) 2 - single pass 3 - never The specified value will apply only to newly allocated contexts. Bug 1914014 Change-Id: Ibb9a8e86089acaadaa7260b00eedec5c80762d6f Signed-off-by: Peter Boonstoppel Reviewed-on: http://git-master/r/1478567 (cherry picked from commit 3bc022cb385b53f698b04f218db535e8162e8c94) Reviewed-on: http://git-master/r/1473820 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.h | 2 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 70 +++++++++++++--------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 5 ++ drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c | 36 ++++++++++- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 18 ++++++ .../gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h | 16 +++++ 6 files changed, 118 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c36049b9..b5d0572e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -366,6 +366,8 @@ struct gpu_ops { int (*resume_from_pause)(struct gk20a *g); int (*clear_sm_errors)(struct gk20a *g); u32 (*tpc_enabled_exceptions)(struct gk20a *g); + int (*set_czf_bypass)(struct gk20a *g, + struct channel_gk20a *ch); } gr; struct { void (*init_hw)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 04d494fc..25636bbd 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, c->first_init = true; } + if (g->ops.gr.set_czf_bypass) + g->ops.gr.set_czf_bypass(g, c); + gk20a_dbg_fn("done"); return 0; out: @@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) return ret; } -int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, - struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, - u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) +int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, + struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, + u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, + bool ch_is_curr_ctx) { struct gk20a *g = ch->g; struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; bool gr_ctx_ready = false; bool pm_ctx_ready = false; struct nvgpu_mem *current_mem = NULL; - bool ch_is_curr_ctx, restart_gr_ctxsw = false; u32 i, j, offset, v; struct gr_gk20a *gr = &g->gr; u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count; u32 *offsets = NULL; u32 *offset_addrs = NULL; u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; - int err, pass; + int err = 0, pass; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", num_ctx_wr_ops, num_ctx_rd_ops); - /* disable channel switching. - * at that point the hardware state can be inspected to - * determine if the context we're interested in is current. - */ - err = gr_gk20a_disable_ctxsw(g); - if (err) { - nvgpu_err(g, "unable to stop gr ctxsw"); - /* this should probably be ctx-fatal... */ - goto cleanup; - } - - restart_gr_ctxsw = true; - - ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); - - gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); - if (ch_is_curr_ctx) { for (pass = 0; pass < 2; pass++) { ctx_op_nr = 0; @@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, if (pm_ctx_ready) nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); - if (restart_gr_ctxsw) { - int tmp_err = gr_gk20a_enable_ctxsw(g); - if (tmp_err) { - nvgpu_err(g, "unable to restart ctxsw!\n"); - err = tmp_err; - } + return err; +} + +int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, + struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, + u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) +{ + struct gk20a *g = ch->g; + int err, tmp_err; + bool ch_is_curr_ctx; + + /* disable channel switching. + * at that point the hardware state can be inspected to + * determine if the context we're interested in is current. + */ + err = gr_gk20a_disable_ctxsw(g); + if (err) { + nvgpu_err(g, "unable to stop gr ctxsw"); + /* this should probably be ctx-fatal... */ + return err; + } + + ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", + ch_is_curr_ctx); + + err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops, + num_ctx_rd_ops, ch_is_curr_ctx); + + tmp_err = gr_gk20a_enable_ctxsw(g); + if (tmp_err) { + nvgpu_err(g, "unable to restart ctxsw!\n"); + err = tmp_err; } return err; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 06ce96e7..ee528c31 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -320,6 +320,7 @@ struct gr_gk20a { u32 alpha_cb_default_size; u32 alpha_cb_size; u32 timeslice_mode; + u32 czf_bypass; struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; @@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op; int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, u32 num_ctx_wr_ops, u32 num_ctx_rd_ops); +int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, + struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, + u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, + bool ch_is_curr_ctx); int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, u32 addr, u32 max_offsets, diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c index d42afb4c..ee14d00c 100644 --- a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c +++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c @@ -1,7 +1,7 @@ /* * GP10B specific sysfs files * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -18,6 +18,8 @@ #include "gk20a/gk20a.h" #include "gp10b_sysfs.h" +#include + #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) static ssize_t ecc_enable_store(struct device *dev, @@ -49,12 +51,43 @@ static ssize_t ecc_enable_read(struct device *dev, static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store); + +static ssize_t czf_bypass_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val >= 4) + return -EINVAL; + + g->gr.czf_bypass = val; + + return count; +} + +static ssize_t czf_bypass_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return sprintf(buf, "%d\n", g->gr.czf_bypass); +} + +static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store); + void gp10b_create_sysfs(struct device *dev) { struct gk20a *g = get_gk20a(dev); int error = 0; + g->gr.czf_bypass = gr_gpc0_prop_debug1_czf_bypass_init_v(); + error |= device_create_file(dev, &dev_attr_ecc_enable); + error |= device_create_file(dev, &dev_attr_czf_bypass); if (error) nvgpu_err(g, "Failed to create sysfs attributes!\n"); } @@ -62,4 +95,5 @@ void gp10b_create_sysfs(struct device *dev) void gp10b_remove_sysfs(struct device *dev) { device_remove_file(dev, &dev_attr_ecc_enable); + device_remove_file(dev, &dev_attr_czf_bypass); } diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index a43252de..1853aaec 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -27,6 +27,7 @@ #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" +#include "gk20a/regops_gk20a.h" #include "gm20b/gr_gm20b.h" #include "gp10b/gr_gp10b.h" @@ -2304,6 +2305,22 @@ static void gr_gp10b_write_preemption_ptr(struct gk20a *g, } +int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch) +{ + struct nvgpu_dbg_gpu_reg_op ops; + + ops.op = REGOP(WRITE_32); + ops.type = REGOP(TYPE_GR_CTX); + ops.status = REGOP(STATUS_SUCCESS); + ops.value_hi = 0; + ops.and_n_mask_lo = gr_gpc0_prop_debug1_czf_bypass_m(); + ops.and_n_mask_hi = 0; + ops.offset = gr_gpc0_prop_debug1_r(); + ops.value_lo = gr_gpc0_prop_debug1_czf_bypass_f( + g->gr.czf_bypass); + + return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false); +} void gp10b_init_gr(struct gpu_ops *gops) { @@ -2355,4 +2372,5 @@ void gp10b_init_gr(struct gpu_ops *gops) gops->gr.load_smid_config = gr_gp10b_load_smid_config; gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx; gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx; + gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h index 12ba42a9..43591166 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h @@ -4270,4 +4270,20 @@ static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(void) { return 0xff << 0; } +static inline u32 gr_gpc0_prop_debug1_r(void) +{ + return 0x00500400; +} +static inline u32 gr_gpc0_prop_debug1_czf_bypass_f(u32 v) +{ + return (v & 0x3) << 14; +} +static inline u32 gr_gpc0_prop_debug1_czf_bypass_m(void) +{ + return 0x3 << 14; +} +static inline u32 gr_gpc0_prop_debug1_czf_bypass_init_v(void) +{ + return 0x00000001; +} #endif -- cgit v1.2.2