diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 70 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c | 36 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h | 16 |
6 files changed, 118 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c36049b9..b5d0572e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -366,6 +366,8 @@ struct gpu_ops { | |||
366 | int (*resume_from_pause)(struct gk20a *g); | 366 | int (*resume_from_pause)(struct gk20a *g); |
367 | int (*clear_sm_errors)(struct gk20a *g); | 367 | int (*clear_sm_errors)(struct gk20a *g); |
368 | u32 (*tpc_enabled_exceptions)(struct gk20a *g); | 368 | u32 (*tpc_enabled_exceptions)(struct gk20a *g); |
369 | int (*set_czf_bypass)(struct gk20a *g, | ||
370 | struct channel_gk20a *ch); | ||
369 | } gr; | 371 | } gr; |
370 | struct { | 372 | struct { |
371 | void (*init_hw)(struct gk20a *g); | 373 | void (*init_hw)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 04d494fc..25636bbd 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
3312 | c->first_init = true; | 3312 | c->first_init = true; |
3313 | } | 3313 | } |
3314 | 3314 | ||
3315 | if (g->ops.gr.set_czf_bypass) | ||
3316 | g->ops.gr.set_czf_bypass(g, c); | ||
3317 | |||
3315 | gk20a_dbg_fn("done"); | 3318 | gk20a_dbg_fn("done"); |
3316 | return 0; | 3319 | return 0; |
3317 | out: | 3320 | out: |
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) | |||
8236 | return ret; | 8239 | return ret; |
8237 | } | 8240 | } |
8238 | 8241 | ||
8239 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | 8242 | int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, |
8240 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | 8243 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, |
8241 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) | 8244 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, |
8245 | bool ch_is_curr_ctx) | ||
8242 | { | 8246 | { |
8243 | struct gk20a *g = ch->g; | 8247 | struct gk20a *g = ch->g; |
8244 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 8248 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; |
8245 | bool gr_ctx_ready = false; | 8249 | bool gr_ctx_ready = false; |
8246 | bool pm_ctx_ready = false; | 8250 | bool pm_ctx_ready = false; |
8247 | struct nvgpu_mem *current_mem = NULL; | 8251 | struct nvgpu_mem *current_mem = NULL; |
8248 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; | ||
8249 | u32 i, j, offset, v; | 8252 | u32 i, j, offset, v; |
8250 | struct gr_gk20a *gr = &g->gr; | 8253 | struct gr_gk20a *gr = &g->gr; |
8251 | u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count; | 8254 | u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count; |
8252 | u32 *offsets = NULL; | 8255 | u32 *offsets = NULL; |
8253 | u32 *offset_addrs = NULL; | 8256 | u32 *offset_addrs = NULL; |
8254 | u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; | 8257 | u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; |
8255 | int err, pass; | 8258 | int err = 0, pass; |
8256 | 8259 | ||
8257 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", | 8260 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", |
8258 | num_ctx_wr_ops, num_ctx_rd_ops); | 8261 | num_ctx_wr_ops, num_ctx_rd_ops); |
8259 | 8262 | ||
8260 | /* disable channel switching. | ||
8261 | * at that point the hardware state can be inspected to | ||
8262 | * determine if the context we're interested in is current. | ||
8263 | */ | ||
8264 | err = gr_gk20a_disable_ctxsw(g); | ||
8265 | if (err) { | ||
8266 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
8267 | /* this should probably be ctx-fatal... */ | ||
8268 | goto cleanup; | ||
8269 | } | ||
8270 | |||
8271 | restart_gr_ctxsw = true; | ||
8272 | |||
8273 | ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); | ||
8274 | |||
8275 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); | ||
8276 | |||
8277 | if (ch_is_curr_ctx) { | 8263 | if (ch_is_curr_ctx) { |
8278 | for (pass = 0; pass < 2; pass++) { | 8264 | for (pass = 0; pass < 2; pass++) { |
8279 | ctx_op_nr = 0; | 8265 | ctx_op_nr = 0; |
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
8497 | if (pm_ctx_ready) | 8483 | if (pm_ctx_ready) |
8498 | nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); | 8484 | nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); |
8499 | 8485 | ||
8500 | if (restart_gr_ctxsw) { | 8486 | return err; |
8501 | int tmp_err = gr_gk20a_enable_ctxsw(g); | 8487 | } |
8502 | if (tmp_err) { | 8488 | |
8503 | nvgpu_err(g, "unable to restart ctxsw!\n"); | 8489 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, |
8504 | err = tmp_err; | 8490 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, |
8505 | } | 8491 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) |
8492 | { | ||
8493 | struct gk20a *g = ch->g; | ||
8494 | int err, tmp_err; | ||
8495 | bool ch_is_curr_ctx; | ||
8496 | |||
8497 | /* disable channel switching. | ||
8498 | * at that point the hardware state can be inspected to | ||
8499 | * determine if the context we're interested in is current. | ||
8500 | */ | ||
8501 | err = gr_gk20a_disable_ctxsw(g); | ||
8502 | if (err) { | ||
8503 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
8504 | /* this should probably be ctx-fatal... */ | ||
8505 | return err; | ||
8506 | } | ||
8507 | |||
8508 | ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); | ||
8509 | |||
8510 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", | ||
8511 | ch_is_curr_ctx); | ||
8512 | |||
8513 | err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops, | ||
8514 | num_ctx_rd_ops, ch_is_curr_ctx); | ||
8515 | |||
8516 | tmp_err = gr_gk20a_enable_ctxsw(g); | ||
8517 | if (tmp_err) { | ||
8518 | nvgpu_err(g, "unable to restart ctxsw!\n"); | ||
8519 | err = tmp_err; | ||
8506 | } | 8520 | } |
8507 | 8521 | ||
8508 | return err; | 8522 | return err; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 06ce96e7..ee528c31 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -320,6 +320,7 @@ struct gr_gk20a { | |||
320 | u32 alpha_cb_default_size; | 320 | u32 alpha_cb_default_size; |
321 | u32 alpha_cb_size; | 321 | u32 alpha_cb_size; |
322 | u32 timeslice_mode; | 322 | u32 timeslice_mode; |
323 | u32 czf_bypass; | ||
323 | 324 | ||
324 | struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; | 325 | struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; |
325 | 326 | ||
@@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op; | |||
563 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | 564 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, |
564 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | 565 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, |
565 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops); | 566 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops); |
567 | int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | ||
568 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | ||
569 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, | ||
570 | bool ch_is_curr_ctx); | ||
566 | int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | 571 | int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, |
567 | u32 addr, | 572 | u32 addr, |
568 | u32 max_offsets, | 573 | u32 max_offsets, |
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c index d42afb4c..ee14d00c 100644 --- a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c +++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GP10B specific sysfs files | 2 | * GP10B specific sysfs files |
3 | * | 3 | * |
4 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -18,6 +18,8 @@ | |||
18 | #include "gk20a/gk20a.h" | 18 | #include "gk20a/gk20a.h" |
19 | #include "gp10b_sysfs.h" | 19 | #include "gp10b_sysfs.h" |
20 | 20 | ||
21 | #include <nvgpu/hw/gp10b/hw_gr_gp10b.h> | ||
22 | |||
21 | #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) | 23 | #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) |
22 | 24 | ||
23 | static ssize_t ecc_enable_store(struct device *dev, | 25 | static ssize_t ecc_enable_store(struct device *dev, |
@@ -49,12 +51,43 @@ static ssize_t ecc_enable_read(struct device *dev, | |||
49 | 51 | ||
50 | static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store); | 52 | static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store); |
51 | 53 | ||
54 | |||
55 | static ssize_t czf_bypass_store(struct device *dev, | ||
56 | struct device_attribute *attr, const char *buf, size_t count) | ||
57 | { | ||
58 | struct gk20a *g = get_gk20a(dev); | ||
59 | unsigned long val; | ||
60 | |||
61 | if (kstrtoul(buf, 10, &val) < 0) | ||
62 | return -EINVAL; | ||
63 | |||
64 | if (val >= 4) | ||
65 | return -EINVAL; | ||
66 | |||
67 | g->gr.czf_bypass = val; | ||
68 | |||
69 | return count; | ||
70 | } | ||
71 | |||
72 | static ssize_t czf_bypass_read(struct device *dev, | ||
73 | struct device_attribute *attr, char *buf) | ||
74 | { | ||
75 | struct gk20a *g = get_gk20a(dev); | ||
76 | |||
77 | return sprintf(buf, "%d\n", g->gr.czf_bypass); | ||
78 | } | ||
79 | |||
80 | static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store); | ||
81 | |||
52 | void gp10b_create_sysfs(struct device *dev) | 82 | void gp10b_create_sysfs(struct device *dev) |
53 | { | 83 | { |
54 | struct gk20a *g = get_gk20a(dev); | 84 | struct gk20a *g = get_gk20a(dev); |
55 | int error = 0; | 85 | int error = 0; |
56 | 86 | ||
87 | g->gr.czf_bypass = gr_gpc0_prop_debug1_czf_bypass_init_v(); | ||
88 | |||
57 | error |= device_create_file(dev, &dev_attr_ecc_enable); | 89 | error |= device_create_file(dev, &dev_attr_ecc_enable); |
90 | error |= device_create_file(dev, &dev_attr_czf_bypass); | ||
58 | if (error) | 91 | if (error) |
59 | nvgpu_err(g, "Failed to create sysfs attributes!\n"); | 92 | nvgpu_err(g, "Failed to create sysfs attributes!\n"); |
60 | } | 93 | } |
@@ -62,4 +95,5 @@ void gp10b_create_sysfs(struct device *dev) | |||
62 | void gp10b_remove_sysfs(struct device *dev) | 95 | void gp10b_remove_sysfs(struct device *dev) |
63 | { | 96 | { |
64 | device_remove_file(dev, &dev_attr_ecc_enable); | 97 | device_remove_file(dev, &dev_attr_ecc_enable); |
98 | device_remove_file(dev, &dev_attr_czf_bypass); | ||
65 | } | 99 | } |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index a43252de..1853aaec 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "gk20a/gk20a.h" | 27 | #include "gk20a/gk20a.h" |
28 | #include "gk20a/gr_gk20a.h" | 28 | #include "gk20a/gr_gk20a.h" |
29 | #include "gk20a/dbg_gpu_gk20a.h" | 29 | #include "gk20a/dbg_gpu_gk20a.h" |
30 | #include "gk20a/regops_gk20a.h" | ||
30 | 31 | ||
31 | #include "gm20b/gr_gm20b.h" | 32 | #include "gm20b/gr_gm20b.h" |
32 | #include "gp10b/gr_gp10b.h" | 33 | #include "gp10b/gr_gp10b.h" |
@@ -2304,6 +2305,22 @@ static void gr_gp10b_write_preemption_ptr(struct gk20a *g, | |||
2304 | 2305 | ||
2305 | } | 2306 | } |
2306 | 2307 | ||
2308 | int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch) | ||
2309 | { | ||
2310 | struct nvgpu_dbg_gpu_reg_op ops; | ||
2311 | |||
2312 | ops.op = REGOP(WRITE_32); | ||
2313 | ops.type = REGOP(TYPE_GR_CTX); | ||
2314 | ops.status = REGOP(STATUS_SUCCESS); | ||
2315 | ops.value_hi = 0; | ||
2316 | ops.and_n_mask_lo = gr_gpc0_prop_debug1_czf_bypass_m(); | ||
2317 | ops.and_n_mask_hi = 0; | ||
2318 | ops.offset = gr_gpc0_prop_debug1_r(); | ||
2319 | ops.value_lo = gr_gpc0_prop_debug1_czf_bypass_f( | ||
2320 | g->gr.czf_bypass); | ||
2321 | |||
2322 | return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false); | ||
2323 | } | ||
2307 | 2324 | ||
2308 | void gp10b_init_gr(struct gpu_ops *gops) | 2325 | void gp10b_init_gr(struct gpu_ops *gops) |
2309 | { | 2326 | { |
@@ -2355,4 +2372,5 @@ void gp10b_init_gr(struct gpu_ops *gops) | |||
2355 | gops->gr.load_smid_config = gr_gp10b_load_smid_config; | 2372 | gops->gr.load_smid_config = gr_gp10b_load_smid_config; |
2356 | gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx; | 2373 | gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx; |
2357 | gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx; | 2374 | gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx; |
2375 | gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass; | ||
2358 | } | 2376 | } |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h index 12ba42a9..43591166 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h | |||
@@ -4270,4 +4270,20 @@ static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(void) | |||
4270 | { | 4270 | { |
4271 | return 0xff << 0; | 4271 | return 0xff << 0; |
4272 | } | 4272 | } |
4273 | static inline u32 gr_gpc0_prop_debug1_r(void) | ||
4274 | { | ||
4275 | return 0x00500400; | ||
4276 | } | ||
4277 | static inline u32 gr_gpc0_prop_debug1_czf_bypass_f(u32 v) | ||
4278 | { | ||
4279 | return (v & 0x3) << 14; | ||
4280 | } | ||
4281 | static inline u32 gr_gpc0_prop_debug1_czf_bypass_m(void) | ||
4282 | { | ||
4283 | return 0x3 << 14; | ||
4284 | } | ||
4285 | static inline u32 gr_gpc0_prop_debug1_czf_bypass_init_v(void) | ||
4286 | { | ||
4287 | return 0x00000001; | ||
4288 | } | ||
4273 | #endif | 4289 | #endif |