summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRanjanikar Nikhil Prabhakarrao <rprabhakarra@nvidia.com>2018-12-13 06:59:20 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2020-06-30 13:07:26 -0400
commitf56874aec2ec61f2c341b813cc76de5acc51ea12 (patch)
treeefd3d6a3921c930a76bf0cb7011ca6b9809ed5f3
parentbbef4c6927a13a24821c43cb2b6af72f859f7deb (diff)
gpu: nvgpu: add speculative barrier
Data can be speculativerly stored and code flow can be hijacked. To mitigate this problem insert a speculation barrier. Bug 200447167 Change-Id: Ia865ff2add8b30de49aa970715625b13e8f71c08 Signed-off-by: Ranjanikar Nikhil Prabhakarrao <rprabhakarra@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1972221 (cherry picked from commit f0762ed4831b3fe6cc953a4a4ec26c2537dcb69f) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/1996052 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: Deepak Nibade <dnibade@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/fifo/submit.c1
-rw-r--r--drivers/gpu/nvgpu/common/fifo/tsg.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c3
-rw-r--r--drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c1
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_as.c2
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_channel.c5
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c11
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_dbg.c10
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_tsg.c2
-rw-r--r--drivers/gpu/nvgpu/os/linux/sched.c1
-rw-r--r--drivers/gpu/nvgpu/tu104/gr_tu104.c549
13 files changed, 589 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index d518fbfb..b0f38ff1 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -212,6 +212,7 @@ static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
212 u32 end = start + len; /* exclusive */ 212 u32 end = start + len; /* exclusive */
213 int err; 213 int err;
214 214
215 nvgpu_speculation_barrier();
215 if (end > gpfifo_size) { 216 if (end > gpfifo_size) {
216 /* wrap-around */ 217 /* wrap-around */
217 int length0 = gpfifo_size - start; 218 int length0 = gpfifo_size - start;
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index f6c718f0..841dd465 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -219,6 +219,7 @@ int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level)
219 219
220 nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level); 220 nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level);
221 221
222 nvgpu_speculation_barrier();
222 switch (level) { 223 switch (level) {
223 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW: 224 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
224 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: 225 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 989ee5c9..636d5714 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3943,6 +3943,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
3943 /* no endian swap ? */ 3943 /* no endian swap ? */
3944 3944
3945 nvgpu_mutex_acquire(&gr->zbc_lock); 3945 nvgpu_mutex_acquire(&gr->zbc_lock);
3946 nvgpu_speculation_barrier();
3946 switch (zbc_val->type) { 3947 switch (zbc_val->type) {
3947 case GK20A_ZBC_TYPE_COLOR: 3948 case GK20A_ZBC_TYPE_COLOR:
3948 /* search existing tables */ 3949 /* search existing tables */
@@ -4047,6 +4048,7 @@ int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
4047 u32 index = query_params->index_size; 4048 u32 index = query_params->index_size;
4048 u32 i; 4049 u32 i;
4049 4050
4051 nvgpu_speculation_barrier();
4050 switch (query_params->type) { 4052 switch (query_params->type) {
4051 case GK20A_ZBC_TYPE_INVALID: 4053 case GK20A_ZBC_TYPE_INVALID:
4052 query_params->index_size = GK20A_ZBC_TABLE_SIZE; 4054 query_params->index_size = GK20A_ZBC_TABLE_SIZE;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 4b42678f..2bcb08a4 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -52,6 +52,7 @@ bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
52{ 52{
53 bool valid = false; 53 bool valid = false;
54 54
55 nvgpu_speculation_barrier();
55 switch (class_num) { 56 switch (class_num) {
56 case PASCAL_COMPUTE_A: 57 case PASCAL_COMPUTE_A:
57 case PASCAL_A: 58 case PASCAL_A:
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 1dfecfc1..5820a695 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -77,6 +77,7 @@ bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
77{ 77{
78 bool valid = false; 78 bool valid = false;
79 79
80 nvgpu_speculation_barrier();
80 switch (class_num) { 81 switch (class_num) {
81 case VOLTA_COMPUTE_A: 82 case VOLTA_COMPUTE_A:
82 case VOLTA_A: 83 case VOLTA_A:
@@ -106,6 +107,7 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
106{ 107{
107 bool valid = false; 108 bool valid = false;
108 109
110 nvgpu_speculation_barrier();
109 switch (class_num) { 111 switch (class_num) {
110 case VOLTA_A: 112 case VOLTA_A:
111 case PASCAL_A: 113 case PASCAL_A:
@@ -140,6 +142,7 @@ bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
140{ 142{
141 bool valid = false; 143 bool valid = false;
142 144
145 nvgpu_speculation_barrier();
143 switch (class_num) { 146 switch (class_num) {
144 case VOLTA_COMPUTE_A: 147 case VOLTA_COMPUTE_A:
145 case PASCAL_COMPUTE_A: 148 case PASCAL_COMPUTE_A:
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c b/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c
index 8b38a9e1..bada5dc7 100644
--- a/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c
@@ -244,6 +244,7 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
244 vidmem_buf = dmabuf->priv; 244 vidmem_buf = dmabuf->priv;
245 mem = vidmem_buf->mem; 245 mem = vidmem_buf->mem;
246 246
247 nvgpu_speculation_barrier();
247 switch (cmd) { 248 switch (cmd) {
248 case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: 249 case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ:
249 nvgpu_mem_rd_n(g, mem, offset, buffer, size); 250 nvgpu_mem_rd_n(g, mem, offset, buffer, size);
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index 3fa8aa2c..f0cec178 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -170,6 +170,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
170 nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); 170 nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
171 } 171 }
172 172
173 nvgpu_speculation_barrier();
173 if (err) { 174 if (err) {
174 nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); 175 nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
175 176
@@ -355,6 +356,7 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
355 if (err) 356 if (err)
356 return err; 357 return err;
357 358
359 nvgpu_speculation_barrier();
358 switch (cmd) { 360 switch (cmd) {
359 case NVGPU_AS_IOCTL_BIND_CHANNEL: 361 case NVGPU_AS_IOCTL_BIND_CHANNEL:
360 trace_gk20a_as_ioctl_bind_channel(g->name); 362 trace_gk20a_as_ioctl_bind_channel(g->name);
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index 22177171..3c844491 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -290,6 +290,7 @@ static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
290 if (!args->dmabuf_fd) 290 if (!args->dmabuf_fd)
291 return -EINVAL; 291 return -EINVAL;
292 292
293 nvgpu_speculation_barrier();
293 /* handle the command (most frequent cases first) */ 294 /* handle the command (most frequent cases first) */
294 switch (args->cmd) { 295 switch (args->cmd) {
295 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: 296 case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
@@ -874,6 +875,7 @@ clean_up:
874 */ 875 */
875u32 nvgpu_get_common_runlist_level(u32 level) 876u32 nvgpu_get_common_runlist_level(u32 level)
876{ 877{
878 nvgpu_speculation_barrier();
877 switch (level) { 879 switch (level) {
878 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: 880 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
879 return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; 881 return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
@@ -982,6 +984,7 @@ u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode)
982 */ 984 */
983static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) 985static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
984{ 986{
987 nvgpu_speculation_barrier();
985 switch (graphics_preempt_mode) { 988 switch (graphics_preempt_mode) {
986 case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: 989 case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
987 return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; 990 return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
@@ -998,6 +1001,7 @@ static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
998 */ 1001 */
999static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) 1002static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode)
1000{ 1003{
1004 nvgpu_speculation_barrier();
1001 switch (compute_preempt_mode) { 1005 switch (compute_preempt_mode) {
1002 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: 1006 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
1003 return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; 1007 return NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
@@ -1121,6 +1125,7 @@ long gk20a_channel_ioctl(struct file *filp,
1121 /* this ioctl call keeps a ref to the file which keeps a ref to the 1125 /* this ioctl call keeps a ref to the file which keeps a ref to the
1122 * channel */ 1126 * channel */
1123 1127
1128 nvgpu_speculation_barrier();
1124 switch (cmd) { 1129 switch (cmd) {
1125 case NVGPU_IOCTL_CHANNEL_OPEN: 1130 case NVGPU_IOCTL_CHANNEL_OPEN:
1126 err = gk20a_channel_open_ioctl(ch->g, 1131 err = gk20a_channel_open_ioctl(ch->g,
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index 271c5d92..954b08b5 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -366,6 +366,7 @@ gk20a_ctrl_ioctl_gpu_characteristics(
366 if (request->gpu_characteristics_buf_size > 0) { 366 if (request->gpu_characteristics_buf_size > 0) {
367 size_t write_size = sizeof(gpu); 367 size_t write_size = sizeof(gpu);
368 368
369 nvgpu_speculation_barrier();
369 if (write_size > request->gpu_characteristics_buf_size) 370 if (write_size > request->gpu_characteristics_buf_size)
370 write_size = request->gpu_characteristics_buf_size; 371 write_size = request->gpu_characteristics_buf_size;
371 372
@@ -556,6 +557,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
556 if (args->mask_buf_size > 0) { 557 if (args->mask_buf_size > 0) {
557 size_t write_size = gpc_tpc_mask_size; 558 size_t write_size = gpc_tpc_mask_size;
558 559
560 nvgpu_speculation_barrier();
559 if (write_size > args->mask_buf_size) 561 if (write_size > args->mask_buf_size)
560 write_size = args->mask_buf_size; 562 write_size = args->mask_buf_size;
561 563
@@ -580,6 +582,7 @@ static int gk20a_ctrl_get_fbp_l2_masks(
580 if (args->mask_buf_size > 0) { 582 if (args->mask_buf_size > 0) {
581 size_t write_size = fbp_l2_mask_size; 583 size_t write_size = fbp_l2_mask_size;
582 584
585 nvgpu_speculation_barrier();
583 if (write_size > args->mask_buf_size) 586 if (write_size > args->mask_buf_size)
584 write_size = args->mask_buf_size; 587 write_size = args->mask_buf_size;
585 588
@@ -1245,6 +1248,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g,
1245 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) 1248 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain)))
1246 return -EINVAL; 1249 return -EINVAL;
1247 } 1250 }
1251 nvgpu_speculation_barrier();
1248 1252
1249 entry = (struct nvgpu_gpu_clk_info __user *) 1253 entry = (struct nvgpu_gpu_clk_info __user *)
1250 (uintptr_t)args->clk_info_entries; 1254 (uintptr_t)args->clk_info_entries;
@@ -1264,6 +1268,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g,
1264 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); 1268 nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz);
1265 } 1269 }
1266 1270
1271 nvgpu_speculation_barrier();
1267 ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); 1272 ret = nvgpu_clk_arb_commit_request_fd(g, session, fd);
1268 if (ret < 0) 1273 if (ret < 0)
1269 return ret; 1274 return ret;
@@ -1333,6 +1338,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g,
1333 clk_info.clk_type = args->clk_type; 1338 clk_info.clk_type = args->clk_type;
1334 } 1339 }
1335 1340
1341 nvgpu_speculation_barrier();
1336 switch (clk_info.clk_type) { 1342 switch (clk_info.clk_type) {
1337 case NVGPU_GPU_CLK_TYPE_TARGET: 1343 case NVGPU_GPU_CLK_TYPE_TARGET:
1338 err = nvgpu_clk_arb_get_session_target_mhz(session, 1344 err = nvgpu_clk_arb_get_session_target_mhz(session,
@@ -1366,6 +1372,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g,
1366 return -EFAULT; 1372 return -EFAULT;
1367 } 1373 }
1368 1374
1375 nvgpu_speculation_barrier();
1369 args->num_entries = num_entries; 1376 args->num_entries = num_entries;
1370 1377
1371 return 0; 1378 return 0;
@@ -1403,6 +1410,7 @@ static int nvgpu_gpu_get_voltage(struct gk20a *g,
1403 if (err) 1410 if (err)
1404 return err; 1411 return err;
1405 1412
1413 nvgpu_speculation_barrier();
1406 switch (args->which) { 1414 switch (args->which) {
1407 case NVGPU_GPU_VOLTAGE_CORE: 1415 case NVGPU_GPU_VOLTAGE_CORE:
1408 err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); 1416 err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage);
@@ -1625,6 +1633,7 @@ static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
1625 break; 1633 break;
1626 } 1634 }
1627 1635
1636 nvgpu_speculation_barrier();
1628 nvgpu_rwsem_up_read(&g->deterministic_busy); 1637 nvgpu_rwsem_up_read(&g->deterministic_busy);
1629 1638
1630out: 1639out:
@@ -1668,6 +1677,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
1668 gk20a_idle(g); 1677 gk20a_idle(g);
1669 } 1678 }
1670 1679
1680 nvgpu_speculation_barrier();
1671 switch (cmd) { 1681 switch (cmd) {
1672 case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: 1682 case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
1673 get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; 1683 get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
@@ -1713,6 +1723,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
1713 zbc_val->format = set_table_args->format; 1723 zbc_val->format = set_table_args->format;
1714 zbc_val->type = set_table_args->type; 1724 zbc_val->type = set_table_args->type;
1715 1725
1726 nvgpu_speculation_barrier();
1716 switch (zbc_val->type) { 1727 switch (zbc_val->type) {
1717 case GK20A_ZBC_TYPE_COLOR: 1728 case GK20A_ZBC_TYPE_COLOR:
1718 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { 1729 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index dc732dc5..0c9b10b5 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -314,6 +314,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
314 if (args->sm_error_state_record_size > 0) { 314 if (args->sm_error_state_record_size > 0) {
315 size_t write_size = sizeof(*sm_error_state); 315 size_t write_size = sizeof(*sm_error_state);
316 316
317 nvgpu_speculation_barrier();
317 if (write_size > args->sm_error_state_record_size) 318 if (write_size > args->sm_error_state_record_size)
318 write_size = args->sm_error_state_record_size; 319 write_size = args->sm_error_state_record_size;
319 320
@@ -361,6 +362,7 @@ static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
361 nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", 362 nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d",
362 timeout_mode); 363 timeout_mode);
363 364
365 nvgpu_speculation_barrier();
364 switch (timeout_mode) { 366 switch (timeout_mode) {
365 case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: 367 case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE:
366 if (dbg_s->is_timeout_disabled == true) 368 if (dbg_s->is_timeout_disabled == true)
@@ -917,6 +919,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
917 ops_offset += num_ops; 919 ops_offset += num_ops;
918 } 920 }
919 921
922 nvgpu_speculation_barrier();
920 nvgpu_kfree(g, linux_fragment); 923 nvgpu_kfree(g, linux_fragment);
921 924
922 /* enable powergate, if previously disabled */ 925 /* enable powergate, if previously disabled */
@@ -1007,6 +1010,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
1007 1010
1008static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) 1011static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode)
1009{ 1012{
1013 nvgpu_speculation_barrier();
1010 switch (mode){ 1014 switch (mode){
1011 case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: 1015 case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW:
1012 return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; 1016 return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW;
@@ -1153,6 +1157,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
1153 goto clean_up; 1157 goto clean_up;
1154 } 1158 }
1155 1159
1160 nvgpu_speculation_barrier();
1156 switch (action) { 1161 switch (action) {
1157 case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: 1162 case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
1158 gr_gk20a_suspend_context(ch); 1163 gr_gk20a_suspend_context(ch);
@@ -1366,6 +1371,7 @@ static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
1366 return -EINVAL; 1371 return -EINVAL;
1367 } 1372 }
1368 1373
1374 nvgpu_speculation_barrier();
1369 switch (args->cmd) { 1375 switch (args->cmd) {
1370 case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: 1376 case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
1371 gk20a_dbg_gpu_events_enable(dbg_s); 1377 gk20a_dbg_gpu_events_enable(dbg_s);
@@ -1536,6 +1542,7 @@ nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s,
1536 if (err) 1542 if (err)
1537 return err; 1543 return err;
1538 1544
1545 nvgpu_speculation_barrier();
1539 switch (args->action) { 1546 switch (args->action) {
1540 case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: 1547 case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS:
1541 err = g->ops.gr.suspend_contexts(g, dbg_s, 1548 err = g->ops.gr.suspend_contexts(g, dbg_s,
@@ -1627,6 +1634,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
1627 size -= access_size; 1634 size -= access_size;
1628 offset += access_size; 1635 offset += access_size;
1629 } 1636 }
1637 nvgpu_speculation_barrier();
1630 1638
1631fail_idle: 1639fail_idle:
1632 gk20a_idle(g); 1640 gk20a_idle(g);
@@ -1899,6 +1907,7 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask(
1899 struct gk20a *g = dbg_s->g; 1907 struct gk20a *g = dbg_s->g;
1900 u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; 1908 u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE;
1901 1909
1910 nvgpu_speculation_barrier();
1902 switch (args->exception_type_mask) { 1911 switch (args->exception_type_mask) {
1903 case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: 1912 case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL:
1904 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; 1913 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL;
@@ -1970,6 +1979,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
1970 /* protect from threaded user space calls */ 1979 /* protect from threaded user space calls */
1971 nvgpu_mutex_acquire(&dbg_s->ioctl_lock); 1980 nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
1972 1981
1982 nvgpu_speculation_barrier();
1973 switch (cmd) { 1983 switch (cmd) {
1974 case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: 1984 case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
1975 err = dbg_bind_channel_gk20a(dbg_s, 1985 err = dbg_bind_channel_gk20a(dbg_s,
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
index a26559f5..2f8cb3ae 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
@@ -361,6 +361,7 @@ static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
361 if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) 361 if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
362 return -EINVAL; 362 return -EINVAL;
363 363
364 nvgpu_speculation_barrier();
364 switch (args->cmd) { 365 switch (args->cmd) {
365 case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: 366 case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
366 err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); 367 err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
@@ -572,6 +573,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
572 if (args->record_size > 0) { 573 if (args->record_size > 0) {
573 size_t write_size = sizeof(*sm_error_state); 574 size_t write_size = sizeof(*sm_error_state);
574 575
576 nvgpu_speculation_barrier();
575 if (write_size > args->record_size) 577 if (write_size > args->record_size)
576 write_size = args->record_size; 578 write_size = args->record_size;
577 579
diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c
index 15cbf1ec..30c58a19 100644
--- a/drivers/gpu/nvgpu/os/linux/sched.c
+++ b/drivers/gpu/nvgpu/os/linux/sched.c
@@ -447,6 +447,7 @@ long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
447 return -EFAULT; 447 return -EFAULT;
448 } 448 }
449 449
450 nvgpu_speculation_barrier();
450 switch (cmd) { 451 switch (cmd) {
451 case NVGPU_SCHED_IOCTL_GET_TSGS: 452 case NVGPU_SCHED_IOCTL_GET_TSGS:
452 err = gk20a_sched_dev_ioctl_get_tsgs(g, 453 err = gk20a_sched_dev_ioctl_get_tsgs(g,
diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c
new file mode 100644
index 00000000..fa6995ac
--- /dev/null
+++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c
@@ -0,0 +1,549 @@
1<<<<<<< HEAD (bbef4c gpu: nvgpu: initialize masks for the perfmon counters 3)
2=======
3/*
4 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/types.h>
26#include <nvgpu/soc.h>
27#include <nvgpu/io.h>
28#include <nvgpu/utils.h>
29#include <nvgpu/gk20a.h>
30#include <nvgpu/channel.h>
31#include <nvgpu/netlist.h>
32
33#include "gk20a/gr_gk20a.h"
34#include "gk20a/gr_pri_gk20a.h"
35
36#include "gp10b/gr_gp10b.h"
37
38#include "gv11b/gr_gv11b.h"
39
40#include "tu104/gr_tu104.h"
41
42#include <nvgpu/hw/tu104/hw_gr_tu104.h>
43
44bool gr_tu104_is_valid_class(struct gk20a *g, u32 class_num)
45{
46 nvgpu_speculation_barrier();
47 switch (class_num) {
48 case TURING_CHANNEL_GPFIFO_A:
49 case TURING_A:
50 case TURING_COMPUTE_A:
51 case TURING_DMA_COPY_A:
52 return true;
53 default:
54 break;
55 }
56
57 return gr_gv11b_is_valid_class(g, class_num);
58};
59
60bool gr_tu104_is_valid_gfx_class(struct gk20a *g, u32 class_num)
61{
62 nvgpu_speculation_barrier();
63 switch (class_num) {
64 case TURING_A:
65 return true;
66 default:
67 break;
68 }
69
70 return gr_gv11b_is_valid_gfx_class(g, class_num);
71}
72
73bool gr_tu104_is_valid_compute_class(struct gk20a *g, u32 class_num)
74{
75 nvgpu_speculation_barrier();
76 switch (class_num) {
77 case TURING_COMPUTE_A:
78 return true;
79 default:
80 break;
81 }
82
83 return gr_gv11b_is_valid_compute_class(g, class_num);
84}
85
86int gr_tu104_init_sw_bundle64(struct gk20a *g)
87{
88 u32 i;
89 u32 last_bundle_data_lo = 0;
90 u32 last_bundle_data_hi = 0;
91 int err = 0;
92 struct netlist_av64_list *sw_bundle64_init =
93 &g->netlist_vars->sw_bundle64_init;
94
95 for (i = 0U; i < sw_bundle64_init->count; i++) {
96 if (i == 0U ||
97 (last_bundle_data_lo != sw_bundle64_init->l[i].value_lo) ||
98 (last_bundle_data_hi != sw_bundle64_init->l[i].value_hi)) {
99 nvgpu_writel(g, gr_pipe_bundle_data_r(),
100 sw_bundle64_init->l[i].value_lo);
101 nvgpu_writel(g, gr_pipe_bundle_data_hi_r(),
102 sw_bundle64_init->l[i].value_hi);
103
104 last_bundle_data_lo = sw_bundle64_init->l[i].value_lo;
105 last_bundle_data_hi = sw_bundle64_init->l[i].value_hi;
106 }
107
108 nvgpu_writel(g, gr_pipe_bundle_address_r(),
109 sw_bundle64_init->l[i].addr);
110
111 if (gr_pipe_bundle_address_value_v(sw_bundle64_init->l[i].addr)
112 == GR_GO_IDLE_BUNDLE) {
113 err = gr_gk20a_wait_idle(g,
114 gk20a_get_gr_idle_timeout(g),
115 GR_IDLE_CHECK_DEFAULT);
116 } else if (nvgpu_platform_is_silicon(g)) {
117 err = gr_gk20a_wait_fe_idle(g,
118 gk20a_get_gr_idle_timeout(g),
119 GR_IDLE_CHECK_DEFAULT);
120 }
121 if (err != 0) {
122 break;
123 }
124 }
125
126 return err;
127}
128
129int gr_tu104_alloc_global_ctx_buffers(struct gk20a *g)
130{
131 int err;
132 struct gr_gk20a *gr = &g->gr;
133 u32 rtv_circular_buffer_size;
134
135 nvgpu_log_fn(g, " ");
136
137 rtv_circular_buffer_size =
138 (gr_scc_rm_rtv_cb_size_div_256b_default_f() +
139 gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()) *
140 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
141 nvgpu_log_info(g, "rtv_circular_buffer_size : %u",
142 rtv_circular_buffer_size);
143
144 err = gk20a_gr_alloc_ctx_buffer(g,
145 &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER],
146 rtv_circular_buffer_size);
147 if (err != 0) {
148 return err;
149 }
150
151 err = gr_gk20a_alloc_global_ctx_buffers(g);
152 if (err != 0) {
153 goto clean_up;
154 }
155
156 return 0;
157
158clean_up:
159 nvgpu_err(g, "fail");
160 gk20a_gr_destroy_ctx_buffer(g,
161 &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER]);
162
163 return err;
164}
165
166int gr_tu104_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm,
167 struct nvgpu_gr_ctx *gr_ctx, bool vpr)
168{
169 int err;
170 u64 *g_bfr_va;
171 u64 *g_bfr_size;
172 int *g_bfr_index;
173 struct gr_gk20a *gr = &g->gr;
174 struct nvgpu_mem *mem;
175 u64 gpu_va;
176
177 nvgpu_log_fn(g, " ");
178
179 g_bfr_va = gr_ctx->global_ctx_buffer_va;
180 g_bfr_size = gr_ctx->global_ctx_buffer_size;
181 g_bfr_index = gr_ctx->global_ctx_buffer_index;
182
183 /* RTV circular buffer */
184 mem = &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER].mem;
185 gpu_va = nvgpu_gmmu_map(vm, mem, mem->size, 0,
186 gk20a_mem_flag_none, true, mem->aperture);
187 if (gpu_va == 0ULL) {
188 return -ENOMEM;
189 }
190
191 g_bfr_va[RTV_CIRCULAR_BUFFER_VA] = gpu_va;
192 g_bfr_size[RTV_CIRCULAR_BUFFER_VA] = mem->size;
193 g_bfr_index[RTV_CIRCULAR_BUFFER_VA] = RTV_CIRCULAR_BUFFER;
194
195 err = gr_gk20a_map_global_ctx_buffers(g, vm, gr_ctx, vpr);
196 if (err != 0) {
197 goto clean_up;
198 }
199
200 return 0;
201
202clean_up:
203 nvgpu_err(g, "fail");
204 nvgpu_gmmu_unmap(vm, mem, gpu_va);
205
206 return err;
207}
208
209static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g,
210 struct nvgpu_gr_ctx *gr_ctx,
211 u64 addr, u32 size, u32 gfxpAddSize, bool patch)
212{
213 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(),
214 gr_scc_rm_rtv_cb_base_addr_39_8_f(addr), patch);
215 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(),
216 gr_scc_rm_rtv_cb_size_div_256b_f(size), patch);
217 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(),
218 gr_gpcs_gcc_rm_rtv_cb_base_addr_39_8_f(addr), patch);
219 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(),
220 gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(gfxpAddSize),
221 patch);
222}
223
224int gr_tu104_commit_global_ctx_buffers(struct gk20a *g,
225 struct nvgpu_gr_ctx *gr_ctx, bool patch)
226{
227 int err;
228 u64 addr;
229 u32 size;
230 u32 gfxpaddsize = 0;
231
232 nvgpu_log_fn(g, " ");
233
234 err = gr_gk20a_commit_global_ctx_buffers(g, gr_ctx, patch);
235 if (err != 0) {
236 return err;
237 }
238
239 if (patch) {
240 int err;
241 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
242 if (err != 0) {
243 return err;
244 }
245 }
246
247 /* RTV circular buffer */
248 addr = gr_ctx->global_ctx_buffer_va[RTV_CIRCULAR_BUFFER_VA] >>
249 U64(gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f());
250
251 size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() +
252 gr_scc_rm_rtv_cb_size_div_256b_db_adder_f());
253
254 gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, size,
255 gfxpaddsize, patch);
256
257 if (patch) {
258 gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
259 }
260
261 return 0;
262}
263
264int gr_tu104_alloc_gfxp_rtv_cb(struct gk20a *g,
265 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm)
266{
267 int err;
268 u32 rtv_cb_size;
269
270 nvgpu_log_fn(g, " ");
271
272 rtv_cb_size =
273 (gr_scc_rm_rtv_cb_size_div_256b_default_f() +
274 gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() +
275 gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()) *
276 gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v();
277
278 err = gr_gp10b_alloc_buffer(vm,
279 rtv_cb_size,
280 &gr_ctx->gfxp_rtvcb_ctxsw_buffer);
281
282 return err;
283}
284
285void gr_tu104_commit_gfxp_rtv_cb(struct gk20a *g,
286 struct nvgpu_gr_ctx *gr_ctx, bool patch)
287{
288 u64 addr;
289 u32 rtv_cb_size;
290 u32 gfxp_addr_size;
291
292 nvgpu_log_fn(g, " ");
293
294 rtv_cb_size =
295 (gr_scc_rm_rtv_cb_size_div_256b_default_f() +
296 gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() +
297 gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f());
298 gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f();
299
300 /* GFXP RTV circular buffer */
301 addr = (u64)(u64_lo32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) >>
302 gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()) |
303 (u64)(u64_hi32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) <<
304 (32U - gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()));
305
306
307 gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr,
308 rtv_cb_size,
309 gfxp_addr_size,
310 patch);
311}
312
313void gr_tu104_bundle_cb_defaults(struct gk20a *g)
314{
315 struct gr_gk20a *gr = &g->gr;
316
317 gr->bundle_cb_default_size =
318 gr_scc_bundle_cb_size_div_256b__prod_v();
319 gr->min_gpm_fifo_depth =
320 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
321 gr->bundle_cb_token_limit =
322 gr_pd_ab_dist_cfg2_token_limit_init_v();
323}
324
325void gr_tu104_cb_size_default(struct gk20a *g)
326{
327 struct gr_gk20a *gr = &g->gr;
328
329 if (gr->attrib_cb_default_size == 0U) {
330 gr->attrib_cb_default_size =
331 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
332 }
333 gr->alpha_cb_default_size =
334 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
335 gr->attrib_cb_gfxp_default_size =
336 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
337 gr->attrib_cb_gfxp_size =
338 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
339}
340
341void gr_tu104_free_gr_ctx(struct gk20a *g,
342 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
343{
344 nvgpu_log_fn(g, " ");
345
346 if (gr_ctx != NULL) {
347 nvgpu_dma_unmap_free(vm, &gr_ctx->gfxp_rtvcb_ctxsw_buffer);
348 }
349
350 gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
351}
352
353void gr_tu104_enable_gpc_exceptions(struct gk20a *g)
354{
355 struct gr_gk20a *gr = &g->gr;
356 u32 tpc_mask;
357
358 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
359 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
360
361 tpc_mask =
362 gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->max_tpc_per_gpc_count) - 1);
363
364 gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
365 (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) |
366 gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1) |
367 gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1)));
368}
369
370int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g,
371 enum ctxsw_addr_type addr_type,
372 u32 num_tpcs,
373 u32 num_ppcs,
374 u32 reg_list_ppc_count,
375 u32 *__offset_in_segment)
376{
377 u32 offset_in_segment = 0;
378 u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
379 GPU_LIT_NUM_PES_PER_GPC);
380
381 if (addr_type == CTXSW_ADDR_TYPE_TPC) {
382 /*
383 * reg = g->netlist_vars->ctxsw_regs.tpc.l;
384 * offset_in_segment = 0;
385 */
386 } else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
387 /*
388 * The ucode stores TPC data before PPC data.
389 * Advance offset past TPC data to PPC data.
390 */
391 offset_in_segment =
392 ((g->netlist_vars->ctxsw_regs.tpc.count *
393 num_tpcs) << 2);
394 } else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
395 /*
396 * The ucode stores TPC/PPC data before GPC data.
397 * Advance offset past TPC/PPC data to GPC data.
398 *
399 * Note 1 PES_PER_GPC case
400 */
401 if (num_pes_per_gpc > 1U) {
402 offset_in_segment =
403 (((g->netlist_vars->ctxsw_regs.tpc.count *
404 num_tpcs) << 2) +
405 ((reg_list_ppc_count * num_ppcs) << 2));
406 } else {
407 offset_in_segment =
408 ((g->netlist_vars->ctxsw_regs.tpc.count *
409 num_tpcs) << 2);
410 }
411 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
412 (addr_type == CTXSW_ADDR_TYPE_ETPC)) {
413 if (num_pes_per_gpc > 1U) {
414 offset_in_segment =
415 ((g->netlist_vars->ctxsw_regs.tpc.count *
416 num_tpcs) << 2) +
417 ((reg_list_ppc_count * num_ppcs) << 2) +
418 (g->netlist_vars->ctxsw_regs.gpc.count << 2);
419 } else {
420 offset_in_segment =
421 ((g->netlist_vars->ctxsw_regs.tpc.count *
422 num_tpcs) << 2) +
423 (g->netlist_vars->ctxsw_regs.gpc.count << 2);
424 }
425
426 /* aligned to next 256 byte */
427 offset_in_segment = ALIGN(offset_in_segment, 256);
428
429 nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
430 "egpc etpc offset_in_segment 0x%#08x",
431 offset_in_segment);
432 } else {
433 nvgpu_log_fn(g, "Unknown address type.");
434 return -EINVAL;
435 }
436
437 *__offset_in_segment = offset_in_segment;
438 return 0;
439}
440
441static void gr_tu104_set_sm_disp_ctrl(struct gk20a *g, u32 data)
442{
443 u32 reg_val;
444
445 nvgpu_log_fn(g, " ");
446
447 reg_val = nvgpu_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r());
448
449 if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK)
450 == NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_DISABLE) {
451 reg_val = set_field(reg_val,
452 gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(),
453 gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_disable_f()
454 );
455 } else if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK)
456 == NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_ENABLE) {
457 reg_val = set_field(reg_val,
458 gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(),
459 gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_enable_f()
460 );
461 }
462
463 nvgpu_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), reg_val);
464}
465
466int gr_tu104_handle_sw_method(struct gk20a *g, u32 addr,
467 u32 class_num, u32 offset, u32 data)
468{
469 nvgpu_log_fn(g, " ");
470
471 if (class_num == TURING_COMPUTE_A) {
472 switch (offset << 2) {
473 case NVC5C0_SET_SHADER_EXCEPTIONS:
474 gv11b_gr_set_shader_exceptions(g, data);
475 break;
476 case NVC5C0_SET_SKEDCHECK:
477 gr_gv11b_set_skedcheck(g, data);
478 break;
479 case NVC5C0_SET_SM_DISP_CTRL:
480 gr_tu104_set_sm_disp_ctrl(g, data);
481 break;
482 case NVC5C0_SET_SHADER_CUT_COLLECTOR:
483 gr_gv11b_set_shader_cut_collector(g, data);
484 break;
485 default:
486 goto fail;
487 }
488 }
489
490 if (class_num == TURING_A) {
491 switch (offset << 2) {
492 case NVC597_SET_SHADER_EXCEPTIONS:
493 gv11b_gr_set_shader_exceptions(g, data);
494 break;
495 case NVC597_SET_CIRCULAR_BUFFER_SIZE:
496 g->ops.gr.set_circular_buffer_size(g, data);
497 break;
498 case NVC597_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
499 g->ops.gr.set_alpha_circular_buffer_size(g, data);
500 break;
501 case NVC597_SET_GO_IDLE_TIMEOUT:
502 gr_gv11b_set_go_idle_timeout(g, data);
503 break;
504 case NVC097_SET_COALESCE_BUFFER_SIZE:
505 gr_gv11b_set_coalesce_buffer_size(g, data);
506 break;
507 case NVC597_SET_TEX_IN_DBG:
508 gr_gv11b_set_tex_in_dbg(g, data);
509 break;
510 case NVC597_SET_SKEDCHECK:
511 gr_gv11b_set_skedcheck(g, data);
512 break;
513 case NVC597_SET_BES_CROP_DEBUG3:
514 g->ops.gr.set_bes_crop_debug3(g, data);
515 break;
516 case NVC597_SET_BES_CROP_DEBUG4:
517 g->ops.gr.set_bes_crop_debug4(g, data);
518 break;
519 case NVC597_SET_SM_DISP_CTRL:
520 gr_tu104_set_sm_disp_ctrl(g, data);
521 break;
522 case NVC597_SET_SHADER_CUT_COLLECTOR:
523 gr_gv11b_set_shader_cut_collector(g, data);
524 break;
525 default:
526 goto fail;
527 }
528 }
529 return 0;
530
531fail:
532 return -EINVAL;
533}
534
535void gr_tu104_init_sm_dsm_reg_info(void)
536{
537 return;
538}
539
540void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
541 u32 *num_sm_dsm_perf_ctrl_regs,
542 u32 **sm_dsm_perf_ctrl_regs,
543 u32 *ctrl_register_stride)
544{
545 *num_sm_dsm_perf_ctrl_regs = 0;
546 *sm_dsm_perf_ctrl_regs = NULL;
547 *ctrl_register_stride = 0;
548}
549>>>>>>> CHANGE (f0762e gpu: nvgpu: add speculative barrier)