summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorPeter Boonstoppel <pboonstoppel@nvidia.com>2017-05-02 15:09:40 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-05-17 13:24:20 -0400
commit39a9e251da0fb4da8512593d3ce4f6eba47d5e0c (patch)
tree4b0cc5a4c196ba815aff2856034ffbf115cc2fa6 /drivers/gpu
parent65de2a2d65a2d7f748580cbc646438a7b4e99d13 (diff)
gpu: nvgpu: Add czf_bypass sysfs node for gp10b
This change adds a new sysfs node to allow configuring CZF_BYPASS, to enable platforms with low context-switching latency requirements. /sys/devices/17000000.gp10b/czf_bypass Values: 0 - always 1 - lateZ (default) 2 - single pass 3 - never The specified value will apply only to newly allocated contexts. Bug 1914014 Change-Id: Ibb9a8e86089acaadaa7260b00eedec5c80762d6f Signed-off-by: Peter Boonstoppel <pboonstoppel@nvidia.com> Reviewed-on: http://git-master/r/1478567 (cherry picked from commit 3bc022cb385b53f698b04f218db535e8162e8c94) Reviewed-on: http://git-master/r/1473820 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c70
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h5
-rw-r--r--drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c36
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c18
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h16
6 files changed, 118 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index c36049b9..b5d0572e 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -366,6 +366,8 @@ struct gpu_ops {
366 int (*resume_from_pause)(struct gk20a *g); 366 int (*resume_from_pause)(struct gk20a *g);
367 int (*clear_sm_errors)(struct gk20a *g); 367 int (*clear_sm_errors)(struct gk20a *g);
368 u32 (*tpc_enabled_exceptions)(struct gk20a *g); 368 u32 (*tpc_enabled_exceptions)(struct gk20a *g);
369 int (*set_czf_bypass)(struct gk20a *g,
370 struct channel_gk20a *ch);
369 } gr; 371 } gr;
370 struct { 372 struct {
371 void (*init_hw)(struct gk20a *g); 373 void (*init_hw)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 04d494fc..25636bbd 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3312 c->first_init = true; 3312 c->first_init = true;
3313 } 3313 }
3314 3314
3315 if (g->ops.gr.set_czf_bypass)
3316 g->ops.gr.set_czf_bypass(g, c);
3317
3315 gk20a_dbg_fn("done"); 3318 gk20a_dbg_fn("done");
3316 return 0; 3319 return 0;
3317out: 3320out:
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
8236 return ret; 8239 return ret;
8237} 8240}
8238 8241
8239int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, 8242int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8240 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, 8243 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
8241 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) 8244 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
8245 bool ch_is_curr_ctx)
8242{ 8246{
8243 struct gk20a *g = ch->g; 8247 struct gk20a *g = ch->g;
8244 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 8248 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
8245 bool gr_ctx_ready = false; 8249 bool gr_ctx_ready = false;
8246 bool pm_ctx_ready = false; 8250 bool pm_ctx_ready = false;
8247 struct nvgpu_mem *current_mem = NULL; 8251 struct nvgpu_mem *current_mem = NULL;
8248 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
8249 u32 i, j, offset, v; 8252 u32 i, j, offset, v;
8250 struct gr_gk20a *gr = &g->gr; 8253 struct gr_gk20a *gr = &g->gr;
8251 u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count; 8254 u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
8252 u32 *offsets = NULL; 8255 u32 *offsets = NULL;
8253 u32 *offset_addrs = NULL; 8256 u32 *offset_addrs = NULL;
8254 u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; 8257 u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
8255 int err, pass; 8258 int err = 0, pass;
8256 8259
8257 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", 8260 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
8258 num_ctx_wr_ops, num_ctx_rd_ops); 8261 num_ctx_wr_ops, num_ctx_rd_ops);
8259 8262
8260 /* disable channel switching.
8261 * at that point the hardware state can be inspected to
8262 * determine if the context we're interested in is current.
8263 */
8264 err = gr_gk20a_disable_ctxsw(g);
8265 if (err) {
8266 nvgpu_err(g, "unable to stop gr ctxsw");
8267 /* this should probably be ctx-fatal... */
8268 goto cleanup;
8269 }
8270
8271 restart_gr_ctxsw = true;
8272
8273 ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
8274
8275 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
8276
8277 if (ch_is_curr_ctx) { 8263 if (ch_is_curr_ctx) {
8278 for (pass = 0; pass < 2; pass++) { 8264 for (pass = 0; pass < 2; pass++) {
8279 ctx_op_nr = 0; 8265 ctx_op_nr = 0;
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8497 if (pm_ctx_ready) 8483 if (pm_ctx_ready)
8498 nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); 8484 nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem);
8499 8485
8500 if (restart_gr_ctxsw) { 8486 return err;
8501 int tmp_err = gr_gk20a_enable_ctxsw(g); 8487}
8502 if (tmp_err) { 8488
8503 nvgpu_err(g, "unable to restart ctxsw!\n"); 8489int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8504 err = tmp_err; 8490 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
8505 } 8491 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
8492{
8493 struct gk20a *g = ch->g;
8494 int err, tmp_err;
8495 bool ch_is_curr_ctx;
8496
8497 /* disable channel switching.
8498 * at that point the hardware state can be inspected to
8499 * determine if the context we're interested in is current.
8500 */
8501 err = gr_gk20a_disable_ctxsw(g);
8502 if (err) {
8503 nvgpu_err(g, "unable to stop gr ctxsw");
8504 /* this should probably be ctx-fatal... */
8505 return err;
8506 }
8507
8508 ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
8509
8510 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
8511 ch_is_curr_ctx);
8512
8513 err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
8514 num_ctx_rd_ops, ch_is_curr_ctx);
8515
8516 tmp_err = gr_gk20a_enable_ctxsw(g);
8517 if (tmp_err) {
8518 nvgpu_err(g, "unable to restart ctxsw!\n");
8519 err = tmp_err;
8506 } 8520 }
8507 8521
8508 return err; 8522 return err;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 06ce96e7..ee528c31 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -320,6 +320,7 @@ struct gr_gk20a {
320 u32 alpha_cb_default_size; 320 u32 alpha_cb_default_size;
321 u32 alpha_cb_size; 321 u32 alpha_cb_size;
322 u32 timeslice_mode; 322 u32 timeslice_mode;
323 u32 czf_bypass;
323 324
324 struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; 325 struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
325 326
@@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op;
563int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, 564int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
564 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, 565 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
565 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops); 566 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
567int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
568 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
569 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
570 bool ch_is_curr_ctx);
566int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, 571int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
567 u32 addr, 572 u32 addr,
568 u32 max_offsets, 573 u32 max_offsets,
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
index d42afb4c..ee14d00c 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GP10B specific sysfs files 2 * GP10B specific sysfs files
3 * 3 *
4 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,8 @@
18#include "gk20a/gk20a.h" 18#include "gk20a/gk20a.h"
19#include "gp10b_sysfs.h" 19#include "gp10b_sysfs.h"
20 20
21#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
22
21#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) 23#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
22 24
23static ssize_t ecc_enable_store(struct device *dev, 25static ssize_t ecc_enable_store(struct device *dev,
@@ -49,12 +51,43 @@ static ssize_t ecc_enable_read(struct device *dev,
49 51
50static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store); 52static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store);
51 53
54
55static ssize_t czf_bypass_store(struct device *dev,
56 struct device_attribute *attr, const char *buf, size_t count)
57{
58 struct gk20a *g = get_gk20a(dev);
59 unsigned long val;
60
61 if (kstrtoul(buf, 10, &val) < 0)
62 return -EINVAL;
63
64 if (val >= 4)
65 return -EINVAL;
66
67 g->gr.czf_bypass = val;
68
69 return count;
70}
71
72static ssize_t czf_bypass_read(struct device *dev,
73 struct device_attribute *attr, char *buf)
74{
75 struct gk20a *g = get_gk20a(dev);
76
77 return sprintf(buf, "%d\n", g->gr.czf_bypass);
78}
79
80static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
81
52void gp10b_create_sysfs(struct device *dev) 82void gp10b_create_sysfs(struct device *dev)
53{ 83{
54 struct gk20a *g = get_gk20a(dev); 84 struct gk20a *g = get_gk20a(dev);
55 int error = 0; 85 int error = 0;
56 86
87 g->gr.czf_bypass = gr_gpc0_prop_debug1_czf_bypass_init_v();
88
57 error |= device_create_file(dev, &dev_attr_ecc_enable); 89 error |= device_create_file(dev, &dev_attr_ecc_enable);
90 error |= device_create_file(dev, &dev_attr_czf_bypass);
58 if (error) 91 if (error)
59 nvgpu_err(g, "Failed to create sysfs attributes!\n"); 92 nvgpu_err(g, "Failed to create sysfs attributes!\n");
60} 93}
@@ -62,4 +95,5 @@ void gp10b_create_sysfs(struct device *dev)
62void gp10b_remove_sysfs(struct device *dev) 95void gp10b_remove_sysfs(struct device *dev)
63{ 96{
64 device_remove_file(dev, &dev_attr_ecc_enable); 97 device_remove_file(dev, &dev_attr_ecc_enable);
98 device_remove_file(dev, &dev_attr_czf_bypass);
65} 99}
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index a43252de..1853aaec 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -27,6 +27,7 @@
27#include "gk20a/gk20a.h" 27#include "gk20a/gk20a.h"
28#include "gk20a/gr_gk20a.h" 28#include "gk20a/gr_gk20a.h"
29#include "gk20a/dbg_gpu_gk20a.h" 29#include "gk20a/dbg_gpu_gk20a.h"
30#include "gk20a/regops_gk20a.h"
30 31
31#include "gm20b/gr_gm20b.h" 32#include "gm20b/gr_gm20b.h"
32#include "gp10b/gr_gp10b.h" 33#include "gp10b/gr_gp10b.h"
@@ -2304,6 +2305,22 @@ static void gr_gp10b_write_preemption_ptr(struct gk20a *g,
2304 2305
2305} 2306}
2306 2307
2308int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
2309{
2310 struct nvgpu_dbg_gpu_reg_op ops;
2311
2312 ops.op = REGOP(WRITE_32);
2313 ops.type = REGOP(TYPE_GR_CTX);
2314 ops.status = REGOP(STATUS_SUCCESS);
2315 ops.value_hi = 0;
2316 ops.and_n_mask_lo = gr_gpc0_prop_debug1_czf_bypass_m();
2317 ops.and_n_mask_hi = 0;
2318 ops.offset = gr_gpc0_prop_debug1_r();
2319 ops.value_lo = gr_gpc0_prop_debug1_czf_bypass_f(
2320 g->gr.czf_bypass);
2321
2322 return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
2323}
2307 2324
2308void gp10b_init_gr(struct gpu_ops *gops) 2325void gp10b_init_gr(struct gpu_ops *gops)
2309{ 2326{
@@ -2355,4 +2372,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
2355 gops->gr.load_smid_config = gr_gp10b_load_smid_config; 2372 gops->gr.load_smid_config = gr_gp10b_load_smid_config;
2356 gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx; 2373 gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx;
2357 gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx; 2374 gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx;
2375 gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass;
2358} 2376}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
index 12ba42a9..43591166 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
@@ -4270,4 +4270,20 @@ static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(void)
4270{ 4270{
4271 return 0xff << 0; 4271 return 0xff << 0;
4272} 4272}
4273static inline u32 gr_gpc0_prop_debug1_r(void)
4274{
4275 return 0x00500400;
4276}
4277static inline u32 gr_gpc0_prop_debug1_czf_bypass_f(u32 v)
4278{
4279 return (v & 0x3) << 14;
4280}
4281static inline u32 gr_gpc0_prop_debug1_czf_bypass_m(void)
4282{
4283 return 0x3 << 14;
4284}
4285static inline u32 gr_gpc0_prop_debug1_czf_bypass_init_v(void)
4286{
4287 return 0x00000001;
4288}
4273#endif 4289#endif