diff options
-rw-r--r-- | drivers/gpu/nvgpu/common/fifo/submit.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/fifo/tsg.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_as.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 11 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/sched.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/tu104/gr_tu104.c | 549 |
13 files changed, 589 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index d518fbfb..b0f38ff1 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c | |||
@@ -212,6 +212,7 @@ static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c, | |||
212 | u32 end = start + len; /* exclusive */ | 212 | u32 end = start + len; /* exclusive */ |
213 | int err; | 213 | int err; |
214 | 214 | ||
215 | nvgpu_speculation_barrier(); | ||
215 | if (end > gpfifo_size) { | 216 | if (end > gpfifo_size) { |
216 | /* wrap-around */ | 217 | /* wrap-around */ |
217 | int length0 = gpfifo_size - start; | 218 | int length0 = gpfifo_size - start; |
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index f6c718f0..841dd465 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c | |||
@@ -219,6 +219,7 @@ int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level) | |||
219 | 219 | ||
220 | nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level); | 220 | nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level); |
221 | 221 | ||
222 | nvgpu_speculation_barrier(); | ||
222 | switch (level) { | 223 | switch (level) { |
223 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW: | 224 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW: |
224 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: | 225 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 989ee5c9..636d5714 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3943,6 +3943,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, | |||
3943 | /* no endian swap ? */ | 3943 | /* no endian swap ? */ |
3944 | 3944 | ||
3945 | nvgpu_mutex_acquire(&gr->zbc_lock); | 3945 | nvgpu_mutex_acquire(&gr->zbc_lock); |
3946 | nvgpu_speculation_barrier(); | ||
3946 | switch (zbc_val->type) { | 3947 | switch (zbc_val->type) { |
3947 | case GK20A_ZBC_TYPE_COLOR: | 3948 | case GK20A_ZBC_TYPE_COLOR: |
3948 | /* search existing tables */ | 3949 | /* search existing tables */ |
@@ -4047,6 +4048,7 @@ int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr, | |||
4047 | u32 index = query_params->index_size; | 4048 | u32 index = query_params->index_size; |
4048 | u32 i; | 4049 | u32 i; |
4049 | 4050 | ||
4051 | nvgpu_speculation_barrier(); | ||
4050 | switch (query_params->type) { | 4052 | switch (query_params->type) { |
4051 | case GK20A_ZBC_TYPE_INVALID: | 4053 | case GK20A_ZBC_TYPE_INVALID: |
4052 | query_params->index_size = GK20A_ZBC_TABLE_SIZE; | 4054 | query_params->index_size = GK20A_ZBC_TABLE_SIZE; |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 4b42678f..2bcb08a4 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -52,6 +52,7 @@ bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) | |||
52 | { | 52 | { |
53 | bool valid = false; | 53 | bool valid = false; |
54 | 54 | ||
55 | nvgpu_speculation_barrier(); | ||
55 | switch (class_num) { | 56 | switch (class_num) { |
56 | case PASCAL_COMPUTE_A: | 57 | case PASCAL_COMPUTE_A: |
57 | case PASCAL_A: | 58 | case PASCAL_A: |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 1dfecfc1..5820a695 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -77,6 +77,7 @@ bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) | |||
77 | { | 77 | { |
78 | bool valid = false; | 78 | bool valid = false; |
79 | 79 | ||
80 | nvgpu_speculation_barrier(); | ||
80 | switch (class_num) { | 81 | switch (class_num) { |
81 | case VOLTA_COMPUTE_A: | 82 | case VOLTA_COMPUTE_A: |
82 | case VOLTA_A: | 83 | case VOLTA_A: |
@@ -106,6 +107,7 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | |||
106 | { | 107 | { |
107 | bool valid = false; | 108 | bool valid = false; |
108 | 109 | ||
110 | nvgpu_speculation_barrier(); | ||
109 | switch (class_num) { | 111 | switch (class_num) { |
110 | case VOLTA_A: | 112 | case VOLTA_A: |
111 | case PASCAL_A: | 113 | case PASCAL_A: |
@@ -140,6 +142,7 @@ bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) | |||
140 | { | 142 | { |
141 | bool valid = false; | 143 | bool valid = false; |
142 | 144 | ||
145 | nvgpu_speculation_barrier(); | ||
143 | switch (class_num) { | 146 | switch (class_num) { |
144 | case VOLTA_COMPUTE_A: | 147 | case VOLTA_COMPUTE_A: |
145 | case PASCAL_COMPUTE_A: | 148 | case PASCAL_COMPUTE_A: |
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c b/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c index 8b38a9e1..bada5dc7 100644 --- a/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c +++ b/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c | |||
@@ -244,6 +244,7 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | |||
244 | vidmem_buf = dmabuf->priv; | 244 | vidmem_buf = dmabuf->priv; |
245 | mem = vidmem_buf->mem; | 245 | mem = vidmem_buf->mem; |
246 | 246 | ||
247 | nvgpu_speculation_barrier(); | ||
247 | switch (cmd) { | 248 | switch (cmd) { |
248 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: | 249 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: |
249 | nvgpu_mem_rd_n(g, mem, offset, buffer, size); | 250 | nvgpu_mem_rd_n(g, mem, offset, buffer, size); |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 3fa8aa2c..f0cec178 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c | |||
@@ -170,6 +170,7 @@ static int gk20a_as_ioctl_map_buffer_batch( | |||
170 | nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); | 170 | nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); |
171 | } | 171 | } |
172 | 172 | ||
173 | nvgpu_speculation_barrier(); | ||
173 | if (err) { | 174 | if (err) { |
174 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); | 175 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); |
175 | 176 | ||
@@ -355,6 +356,7 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
355 | if (err) | 356 | if (err) |
356 | return err; | 357 | return err; |
357 | 358 | ||
359 | nvgpu_speculation_barrier(); | ||
358 | switch (cmd) { | 360 | switch (cmd) { |
359 | case NVGPU_AS_IOCTL_BIND_CHANNEL: | 361 | case NVGPU_AS_IOCTL_BIND_CHANNEL: |
360 | trace_gk20a_as_ioctl_bind_channel(g->name); | 362 | trace_gk20a_as_ioctl_bind_channel(g->name); |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index 22177171..3c844491 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c | |||
@@ -290,6 +290,7 @@ static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, | |||
290 | if (!args->dmabuf_fd) | 290 | if (!args->dmabuf_fd) |
291 | return -EINVAL; | 291 | return -EINVAL; |
292 | 292 | ||
293 | nvgpu_speculation_barrier(); | ||
293 | /* handle the command (most frequent cases first) */ | 294 | /* handle the command (most frequent cases first) */ |
294 | switch (args->cmd) { | 295 | switch (args->cmd) { |
295 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: | 296 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: |
@@ -874,6 +875,7 @@ clean_up: | |||
874 | */ | 875 | */ |
875 | u32 nvgpu_get_common_runlist_level(u32 level) | 876 | u32 nvgpu_get_common_runlist_level(u32 level) |
876 | { | 877 | { |
878 | nvgpu_speculation_barrier(); | ||
877 | switch (level) { | 879 | switch (level) { |
878 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: | 880 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: |
879 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; | 881 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; |
@@ -982,6 +984,7 @@ u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) | |||
982 | */ | 984 | */ |
983 | static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) | 985 | static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) |
984 | { | 986 | { |
987 | nvgpu_speculation_barrier(); | ||
985 | switch (graphics_preempt_mode) { | 988 | switch (graphics_preempt_mode) { |
986 | case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: | 989 | case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: |
987 | return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | 990 | return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; |
@@ -998,6 +1001,7 @@ static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) | |||
998 | */ | 1001 | */ |
999 | static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) | 1002 | static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) |
1000 | { | 1003 | { |
1004 | nvgpu_speculation_barrier(); | ||
1001 | switch (compute_preempt_mode) { | 1005 | switch (compute_preempt_mode) { |
1002 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: | 1006 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: |
1003 | return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; | 1007 | return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; |
@@ -1121,6 +1125,7 @@ long gk20a_channel_ioctl(struct file *filp, | |||
1121 | /* this ioctl call keeps a ref to the file which keeps a ref to the | 1125 | /* this ioctl call keeps a ref to the file which keeps a ref to the |
1122 | * channel */ | 1126 | * channel */ |
1123 | 1127 | ||
1128 | nvgpu_speculation_barrier(); | ||
1124 | switch (cmd) { | 1129 | switch (cmd) { |
1125 | case NVGPU_IOCTL_CHANNEL_OPEN: | 1130 | case NVGPU_IOCTL_CHANNEL_OPEN: |
1126 | err = gk20a_channel_open_ioctl(ch->g, | 1131 | err = gk20a_channel_open_ioctl(ch->g, |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 271c5d92..954b08b5 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | |||
@@ -366,6 +366,7 @@ gk20a_ctrl_ioctl_gpu_characteristics( | |||
366 | if (request->gpu_characteristics_buf_size > 0) { | 366 | if (request->gpu_characteristics_buf_size > 0) { |
367 | size_t write_size = sizeof(gpu); | 367 | size_t write_size = sizeof(gpu); |
368 | 368 | ||
369 | nvgpu_speculation_barrier(); | ||
369 | if (write_size > request->gpu_characteristics_buf_size) | 370 | if (write_size > request->gpu_characteristics_buf_size) |
370 | write_size = request->gpu_characteristics_buf_size; | 371 | write_size = request->gpu_characteristics_buf_size; |
371 | 372 | ||
@@ -556,6 +557,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, | |||
556 | if (args->mask_buf_size > 0) { | 557 | if (args->mask_buf_size > 0) { |
557 | size_t write_size = gpc_tpc_mask_size; | 558 | size_t write_size = gpc_tpc_mask_size; |
558 | 559 | ||
560 | nvgpu_speculation_barrier(); | ||
559 | if (write_size > args->mask_buf_size) | 561 | if (write_size > args->mask_buf_size) |
560 | write_size = args->mask_buf_size; | 562 | write_size = args->mask_buf_size; |
561 | 563 | ||
@@ -580,6 +582,7 @@ static int gk20a_ctrl_get_fbp_l2_masks( | |||
580 | if (args->mask_buf_size > 0) { | 582 | if (args->mask_buf_size > 0) { |
581 | size_t write_size = fbp_l2_mask_size; | 583 | size_t write_size = fbp_l2_mask_size; |
582 | 584 | ||
585 | nvgpu_speculation_barrier(); | ||
583 | if (write_size > args->mask_buf_size) | 586 | if (write_size > args->mask_buf_size) |
584 | write_size = args->mask_buf_size; | 587 | write_size = args->mask_buf_size; |
585 | 588 | ||
@@ -1245,6 +1248,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g, | |||
1245 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) | 1248 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) |
1246 | return -EINVAL; | 1249 | return -EINVAL; |
1247 | } | 1250 | } |
1251 | nvgpu_speculation_barrier(); | ||
1248 | 1252 | ||
1249 | entry = (struct nvgpu_gpu_clk_info __user *) | 1253 | entry = (struct nvgpu_gpu_clk_info __user *) |
1250 | (uintptr_t)args->clk_info_entries; | 1254 | (uintptr_t)args->clk_info_entries; |
@@ -1264,6 +1268,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g, | |||
1264 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); | 1268 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); |
1265 | } | 1269 | } |
1266 | 1270 | ||
1271 | nvgpu_speculation_barrier(); | ||
1267 | ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); | 1272 | ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); |
1268 | if (ret < 0) | 1273 | if (ret < 0) |
1269 | return ret; | 1274 | return ret; |
@@ -1333,6 +1338,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g, | |||
1333 | clk_info.clk_type = args->clk_type; | 1338 | clk_info.clk_type = args->clk_type; |
1334 | } | 1339 | } |
1335 | 1340 | ||
1341 | nvgpu_speculation_barrier(); | ||
1336 | switch (clk_info.clk_type) { | 1342 | switch (clk_info.clk_type) { |
1337 | case NVGPU_GPU_CLK_TYPE_TARGET: | 1343 | case NVGPU_GPU_CLK_TYPE_TARGET: |
1338 | err = nvgpu_clk_arb_get_session_target_mhz(session, | 1344 | err = nvgpu_clk_arb_get_session_target_mhz(session, |
@@ -1366,6 +1372,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g, | |||
1366 | return -EFAULT; | 1372 | return -EFAULT; |
1367 | } | 1373 | } |
1368 | 1374 | ||
1375 | nvgpu_speculation_barrier(); | ||
1369 | args->num_entries = num_entries; | 1376 | args->num_entries = num_entries; |
1370 | 1377 | ||
1371 | return 0; | 1378 | return 0; |
@@ -1403,6 +1410,7 @@ static int nvgpu_gpu_get_voltage(struct gk20a *g, | |||
1403 | if (err) | 1410 | if (err) |
1404 | return err; | 1411 | return err; |
1405 | 1412 | ||
1413 | nvgpu_speculation_barrier(); | ||
1406 | switch (args->which) { | 1414 | switch (args->which) { |
1407 | case NVGPU_GPU_VOLTAGE_CORE: | 1415 | case NVGPU_GPU_VOLTAGE_CORE: |
1408 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); | 1416 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); |
@@ -1625,6 +1633,7 @@ static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, | |||
1625 | break; | 1633 | break; |
1626 | } | 1634 | } |
1627 | 1635 | ||
1636 | nvgpu_speculation_barrier(); | ||
1628 | nvgpu_rwsem_up_read(&g->deterministic_busy); | 1637 | nvgpu_rwsem_up_read(&g->deterministic_busy); |
1629 | 1638 | ||
1630 | out: | 1639 | out: |
@@ -1668,6 +1677,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
1668 | gk20a_idle(g); | 1677 | gk20a_idle(g); |
1669 | } | 1678 | } |
1670 | 1679 | ||
1680 | nvgpu_speculation_barrier(); | ||
1671 | switch (cmd) { | 1681 | switch (cmd) { |
1672 | case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: | 1682 | case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: |
1673 | get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; | 1683 | get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; |
@@ -1713,6 +1723,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
1713 | zbc_val->format = set_table_args->format; | 1723 | zbc_val->format = set_table_args->format; |
1714 | zbc_val->type = set_table_args->type; | 1724 | zbc_val->type = set_table_args->type; |
1715 | 1725 | ||
1726 | nvgpu_speculation_barrier(); | ||
1716 | switch (zbc_val->type) { | 1727 | switch (zbc_val->type) { |
1717 | case GK20A_ZBC_TYPE_COLOR: | 1728 | case GK20A_ZBC_TYPE_COLOR: |
1718 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | 1729 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index dc732dc5..0c9b10b5 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | |||
@@ -314,6 +314,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( | |||
314 | if (args->sm_error_state_record_size > 0) { | 314 | if (args->sm_error_state_record_size > 0) { |
315 | size_t write_size = sizeof(*sm_error_state); | 315 | size_t write_size = sizeof(*sm_error_state); |
316 | 316 | ||
317 | nvgpu_speculation_barrier(); | ||
317 | if (write_size > args->sm_error_state_record_size) | 318 | if (write_size > args->sm_error_state_record_size) |
318 | write_size = args->sm_error_state_record_size; | 319 | write_size = args->sm_error_state_record_size; |
319 | 320 | ||
@@ -361,6 +362,7 @@ static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, | |||
361 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", | 362 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", |
362 | timeout_mode); | 363 | timeout_mode); |
363 | 364 | ||
365 | nvgpu_speculation_barrier(); | ||
364 | switch (timeout_mode) { | 366 | switch (timeout_mode) { |
365 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: | 367 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: |
366 | if (dbg_s->is_timeout_disabled == true) | 368 | if (dbg_s->is_timeout_disabled == true) |
@@ -917,6 +919,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | |||
917 | ops_offset += num_ops; | 919 | ops_offset += num_ops; |
918 | } | 920 | } |
919 | 921 | ||
922 | nvgpu_speculation_barrier(); | ||
920 | nvgpu_kfree(g, linux_fragment); | 923 | nvgpu_kfree(g, linux_fragment); |
921 | 924 | ||
922 | /* enable powergate, if previously disabled */ | 925 | /* enable powergate, if previously disabled */ |
@@ -1007,6 +1010,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | |||
1007 | 1010 | ||
1008 | static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) | 1011 | static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) |
1009 | { | 1012 | { |
1013 | nvgpu_speculation_barrier(); | ||
1010 | switch (mode){ | 1014 | switch (mode){ |
1011 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: | 1015 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: |
1012 | return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; | 1016 | return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; |
@@ -1153,6 +1157,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | |||
1153 | goto clean_up; | 1157 | goto clean_up; |
1154 | } | 1158 | } |
1155 | 1159 | ||
1160 | nvgpu_speculation_barrier(); | ||
1156 | switch (action) { | 1161 | switch (action) { |
1157 | case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: | 1162 | case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: |
1158 | gr_gk20a_suspend_context(ch); | 1163 | gr_gk20a_suspend_context(ch); |
@@ -1366,6 +1371,7 @@ static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, | |||
1366 | return -EINVAL; | 1371 | return -EINVAL; |
1367 | } | 1372 | } |
1368 | 1373 | ||
1374 | nvgpu_speculation_barrier(); | ||
1369 | switch (args->cmd) { | 1375 | switch (args->cmd) { |
1370 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: | 1376 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: |
1371 | gk20a_dbg_gpu_events_enable(dbg_s); | 1377 | gk20a_dbg_gpu_events_enable(dbg_s); |
@@ -1536,6 +1542,7 @@ nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s, | |||
1536 | if (err) | 1542 | if (err) |
1537 | return err; | 1543 | return err; |
1538 | 1544 | ||
1545 | nvgpu_speculation_barrier(); | ||
1539 | switch (args->action) { | 1546 | switch (args->action) { |
1540 | case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: | 1547 | case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: |
1541 | err = g->ops.gr.suspend_contexts(g, dbg_s, | 1548 | err = g->ops.gr.suspend_contexts(g, dbg_s, |
@@ -1627,6 +1634,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, | |||
1627 | size -= access_size; | 1634 | size -= access_size; |
1628 | offset += access_size; | 1635 | offset += access_size; |
1629 | } | 1636 | } |
1637 | nvgpu_speculation_barrier(); | ||
1630 | 1638 | ||
1631 | fail_idle: | 1639 | fail_idle: |
1632 | gk20a_idle(g); | 1640 | gk20a_idle(g); |
@@ -1899,6 +1907,7 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask( | |||
1899 | struct gk20a *g = dbg_s->g; | 1907 | struct gk20a *g = dbg_s->g; |
1900 | u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | 1908 | u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; |
1901 | 1909 | ||
1910 | nvgpu_speculation_barrier(); | ||
1902 | switch (args->exception_type_mask) { | 1911 | switch (args->exception_type_mask) { |
1903 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: | 1912 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: |
1904 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; | 1913 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; |
@@ -1970,6 +1979,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | |||
1970 | /* protect from threaded user space calls */ | 1979 | /* protect from threaded user space calls */ |
1971 | nvgpu_mutex_acquire(&dbg_s->ioctl_lock); | 1980 | nvgpu_mutex_acquire(&dbg_s->ioctl_lock); |
1972 | 1981 | ||
1982 | nvgpu_speculation_barrier(); | ||
1973 | switch (cmd) { | 1983 | switch (cmd) { |
1974 | case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: | 1984 | case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: |
1975 | err = dbg_bind_channel_gk20a(dbg_s, | 1985 | err = dbg_bind_channel_gk20a(dbg_s, |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index a26559f5..2f8cb3ae 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | |||
@@ -361,6 +361,7 @@ static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, | |||
361 | if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) | 361 | if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) |
362 | return -EINVAL; | 362 | return -EINVAL; |
363 | 363 | ||
364 | nvgpu_speculation_barrier(); | ||
364 | switch (args->cmd) { | 365 | switch (args->cmd) { |
365 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: | 366 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: |
366 | err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); | 367 | err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); |
@@ -572,6 +573,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g, | |||
572 | if (args->record_size > 0) { | 573 | if (args->record_size > 0) { |
573 | size_t write_size = sizeof(*sm_error_state); | 574 | size_t write_size = sizeof(*sm_error_state); |
574 | 575 | ||
576 | nvgpu_speculation_barrier(); | ||
575 | if (write_size > args->record_size) | 577 | if (write_size > args->record_size) |
576 | write_size = args->record_size; | 578 | write_size = args->record_size; |
577 | 579 | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c index 15cbf1ec..30c58a19 100644 --- a/drivers/gpu/nvgpu/os/linux/sched.c +++ b/drivers/gpu/nvgpu/os/linux/sched.c | |||
@@ -447,6 +447,7 @@ long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, | |||
447 | return -EFAULT; | 447 | return -EFAULT; |
448 | } | 448 | } |
449 | 449 | ||
450 | nvgpu_speculation_barrier(); | ||
450 | switch (cmd) { | 451 | switch (cmd) { |
451 | case NVGPU_SCHED_IOCTL_GET_TSGS: | 452 | case NVGPU_SCHED_IOCTL_GET_TSGS: |
452 | err = gk20a_sched_dev_ioctl_get_tsgs(g, | 453 | err = gk20a_sched_dev_ioctl_get_tsgs(g, |
diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c new file mode 100644 index 00000000..fa6995ac --- /dev/null +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c | |||
@@ -0,0 +1,549 @@ | |||
1 | <<<<<<< HEAD (bbef4c gpu: nvgpu: initialize masks for the perfmon counters 3) | ||
2 | ======= | ||
3 | /* | ||
4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/types.h> | ||
26 | #include <nvgpu/soc.h> | ||
27 | #include <nvgpu/io.h> | ||
28 | #include <nvgpu/utils.h> | ||
29 | #include <nvgpu/gk20a.h> | ||
30 | #include <nvgpu/channel.h> | ||
31 | #include <nvgpu/netlist.h> | ||
32 | |||
33 | #include "gk20a/gr_gk20a.h" | ||
34 | #include "gk20a/gr_pri_gk20a.h" | ||
35 | |||
36 | #include "gp10b/gr_gp10b.h" | ||
37 | |||
38 | #include "gv11b/gr_gv11b.h" | ||
39 | |||
40 | #include "tu104/gr_tu104.h" | ||
41 | |||
42 | #include <nvgpu/hw/tu104/hw_gr_tu104.h> | ||
43 | |||
44 | bool gr_tu104_is_valid_class(struct gk20a *g, u32 class_num) | ||
45 | { | ||
46 | nvgpu_speculation_barrier(); | ||
47 | switch (class_num) { | ||
48 | case TURING_CHANNEL_GPFIFO_A: | ||
49 | case TURING_A: | ||
50 | case TURING_COMPUTE_A: | ||
51 | case TURING_DMA_COPY_A: | ||
52 | return true; | ||
53 | default: | ||
54 | break; | ||
55 | } | ||
56 | |||
57 | return gr_gv11b_is_valid_class(g, class_num); | ||
58 | }; | ||
59 | |||
60 | bool gr_tu104_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
61 | { | ||
62 | nvgpu_speculation_barrier(); | ||
63 | switch (class_num) { | ||
64 | case TURING_A: | ||
65 | return true; | ||
66 | default: | ||
67 | break; | ||
68 | } | ||
69 | |||
70 | return gr_gv11b_is_valid_gfx_class(g, class_num); | ||
71 | } | ||
72 | |||
73 | bool gr_tu104_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
74 | { | ||
75 | nvgpu_speculation_barrier(); | ||
76 | switch (class_num) { | ||
77 | case TURING_COMPUTE_A: | ||
78 | return true; | ||
79 | default: | ||
80 | break; | ||
81 | } | ||
82 | |||
83 | return gr_gv11b_is_valid_compute_class(g, class_num); | ||
84 | } | ||
85 | |||
86 | int gr_tu104_init_sw_bundle64(struct gk20a *g) | ||
87 | { | ||
88 | u32 i; | ||
89 | u32 last_bundle_data_lo = 0; | ||
90 | u32 last_bundle_data_hi = 0; | ||
91 | int err = 0; | ||
92 | struct netlist_av64_list *sw_bundle64_init = | ||
93 | &g->netlist_vars->sw_bundle64_init; | ||
94 | |||
95 | for (i = 0U; i < sw_bundle64_init->count; i++) { | ||
96 | if (i == 0U || | ||
97 | (last_bundle_data_lo != sw_bundle64_init->l[i].value_lo) || | ||
98 | (last_bundle_data_hi != sw_bundle64_init->l[i].value_hi)) { | ||
99 | nvgpu_writel(g, gr_pipe_bundle_data_r(), | ||
100 | sw_bundle64_init->l[i].value_lo); | ||
101 | nvgpu_writel(g, gr_pipe_bundle_data_hi_r(), | ||
102 | sw_bundle64_init->l[i].value_hi); | ||
103 | |||
104 | last_bundle_data_lo = sw_bundle64_init->l[i].value_lo; | ||
105 | last_bundle_data_hi = sw_bundle64_init->l[i].value_hi; | ||
106 | } | ||
107 | |||
108 | nvgpu_writel(g, gr_pipe_bundle_address_r(), | ||
109 | sw_bundle64_init->l[i].addr); | ||
110 | |||
111 | if (gr_pipe_bundle_address_value_v(sw_bundle64_init->l[i].addr) | ||
112 | == GR_GO_IDLE_BUNDLE) { | ||
113 | err = gr_gk20a_wait_idle(g, | ||
114 | gk20a_get_gr_idle_timeout(g), | ||
115 | GR_IDLE_CHECK_DEFAULT); | ||
116 | } else if (nvgpu_platform_is_silicon(g)) { | ||
117 | err = gr_gk20a_wait_fe_idle(g, | ||
118 | gk20a_get_gr_idle_timeout(g), | ||
119 | GR_IDLE_CHECK_DEFAULT); | ||
120 | } | ||
121 | if (err != 0) { | ||
122 | break; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | return err; | ||
127 | } | ||
128 | |||
129 | int gr_tu104_alloc_global_ctx_buffers(struct gk20a *g) | ||
130 | { | ||
131 | int err; | ||
132 | struct gr_gk20a *gr = &g->gr; | ||
133 | u32 rtv_circular_buffer_size; | ||
134 | |||
135 | nvgpu_log_fn(g, " "); | ||
136 | |||
137 | rtv_circular_buffer_size = | ||
138 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
139 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()) * | ||
140 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); | ||
141 | nvgpu_log_info(g, "rtv_circular_buffer_size : %u", | ||
142 | rtv_circular_buffer_size); | ||
143 | |||
144 | err = gk20a_gr_alloc_ctx_buffer(g, | ||
145 | &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER], | ||
146 | rtv_circular_buffer_size); | ||
147 | if (err != 0) { | ||
148 | return err; | ||
149 | } | ||
150 | |||
151 | err = gr_gk20a_alloc_global_ctx_buffers(g); | ||
152 | if (err != 0) { | ||
153 | goto clean_up; | ||
154 | } | ||
155 | |||
156 | return 0; | ||
157 | |||
158 | clean_up: | ||
159 | nvgpu_err(g, "fail"); | ||
160 | gk20a_gr_destroy_ctx_buffer(g, | ||
161 | &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER]); | ||
162 | |||
163 | return err; | ||
164 | } | ||
165 | |||
166 | int gr_tu104_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm, | ||
167 | struct nvgpu_gr_ctx *gr_ctx, bool vpr) | ||
168 | { | ||
169 | int err; | ||
170 | u64 *g_bfr_va; | ||
171 | u64 *g_bfr_size; | ||
172 | int *g_bfr_index; | ||
173 | struct gr_gk20a *gr = &g->gr; | ||
174 | struct nvgpu_mem *mem; | ||
175 | u64 gpu_va; | ||
176 | |||
177 | nvgpu_log_fn(g, " "); | ||
178 | |||
179 | g_bfr_va = gr_ctx->global_ctx_buffer_va; | ||
180 | g_bfr_size = gr_ctx->global_ctx_buffer_size; | ||
181 | g_bfr_index = gr_ctx->global_ctx_buffer_index; | ||
182 | |||
183 | /* RTV circular buffer */ | ||
184 | mem = &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER].mem; | ||
185 | gpu_va = nvgpu_gmmu_map(vm, mem, mem->size, 0, | ||
186 | gk20a_mem_flag_none, true, mem->aperture); | ||
187 | if (gpu_va == 0ULL) { | ||
188 | return -ENOMEM; | ||
189 | } | ||
190 | |||
191 | g_bfr_va[RTV_CIRCULAR_BUFFER_VA] = gpu_va; | ||
192 | g_bfr_size[RTV_CIRCULAR_BUFFER_VA] = mem->size; | ||
193 | g_bfr_index[RTV_CIRCULAR_BUFFER_VA] = RTV_CIRCULAR_BUFFER; | ||
194 | |||
195 | err = gr_gk20a_map_global_ctx_buffers(g, vm, gr_ctx, vpr); | ||
196 | if (err != 0) { | ||
197 | goto clean_up; | ||
198 | } | ||
199 | |||
200 | return 0; | ||
201 | |||
202 | clean_up: | ||
203 | nvgpu_err(g, "fail"); | ||
204 | nvgpu_gmmu_unmap(vm, mem, gpu_va); | ||
205 | |||
206 | return err; | ||
207 | } | ||
208 | |||
209 | static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g, | ||
210 | struct nvgpu_gr_ctx *gr_ctx, | ||
211 | u64 addr, u32 size, u32 gfxpAddSize, bool patch) | ||
212 | { | ||
213 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(), | ||
214 | gr_scc_rm_rtv_cb_base_addr_39_8_f(addr), patch); | ||
215 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(), | ||
216 | gr_scc_rm_rtv_cb_size_div_256b_f(size), patch); | ||
217 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(), | ||
218 | gr_gpcs_gcc_rm_rtv_cb_base_addr_39_8_f(addr), patch); | ||
219 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(), | ||
220 | gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(gfxpAddSize), | ||
221 | patch); | ||
222 | } | ||
223 | |||
224 | int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, | ||
225 | struct nvgpu_gr_ctx *gr_ctx, bool patch) | ||
226 | { | ||
227 | int err; | ||
228 | u64 addr; | ||
229 | u32 size; | ||
230 | u32 gfxpaddsize = 0; | ||
231 | |||
232 | nvgpu_log_fn(g, " "); | ||
233 | |||
234 | err = gr_gk20a_commit_global_ctx_buffers(g, gr_ctx, patch); | ||
235 | if (err != 0) { | ||
236 | return err; | ||
237 | } | ||
238 | |||
239 | if (patch) { | ||
240 | int err; | ||
241 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); | ||
242 | if (err != 0) { | ||
243 | return err; | ||
244 | } | ||
245 | } | ||
246 | |||
247 | /* RTV circular buffer */ | ||
248 | addr = gr_ctx->global_ctx_buffer_va[RTV_CIRCULAR_BUFFER_VA] >> | ||
249 | U64(gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()); | ||
250 | |||
251 | size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
252 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()); | ||
253 | |||
254 | gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, size, | ||
255 | gfxpaddsize, patch); | ||
256 | |||
257 | if (patch) { | ||
258 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); | ||
259 | } | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | int gr_tu104_alloc_gfxp_rtv_cb(struct gk20a *g, | ||
265 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm) | ||
266 | { | ||
267 | int err; | ||
268 | u32 rtv_cb_size; | ||
269 | |||
270 | nvgpu_log_fn(g, " "); | ||
271 | |||
272 | rtv_cb_size = | ||
273 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
274 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + | ||
275 | gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()) * | ||
276 | gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v(); | ||
277 | |||
278 | err = gr_gp10b_alloc_buffer(vm, | ||
279 | rtv_cb_size, | ||
280 | &gr_ctx->gfxp_rtvcb_ctxsw_buffer); | ||
281 | |||
282 | return err; | ||
283 | } | ||
284 | |||
285 | void gr_tu104_commit_gfxp_rtv_cb(struct gk20a *g, | ||
286 | struct nvgpu_gr_ctx *gr_ctx, bool patch) | ||
287 | { | ||
288 | u64 addr; | ||
289 | u32 rtv_cb_size; | ||
290 | u32 gfxp_addr_size; | ||
291 | |||
292 | nvgpu_log_fn(g, " "); | ||
293 | |||
294 | rtv_cb_size = | ||
295 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
296 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + | ||
297 | gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()); | ||
298 | gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f(); | ||
299 | |||
300 | /* GFXP RTV circular buffer */ | ||
301 | addr = (u64)(u64_lo32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) >> | ||
302 | gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()) | | ||
303 | (u64)(u64_hi32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) << | ||
304 | (32U - gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f())); | ||
305 | |||
306 | |||
307 | gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, | ||
308 | rtv_cb_size, | ||
309 | gfxp_addr_size, | ||
310 | patch); | ||
311 | } | ||
312 | |||
313 | void gr_tu104_bundle_cb_defaults(struct gk20a *g) | ||
314 | { | ||
315 | struct gr_gk20a *gr = &g->gr; | ||
316 | |||
317 | gr->bundle_cb_default_size = | ||
318 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
319 | gr->min_gpm_fifo_depth = | ||
320 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
321 | gr->bundle_cb_token_limit = | ||
322 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
323 | } | ||
324 | |||
325 | void gr_tu104_cb_size_default(struct gk20a *g) | ||
326 | { | ||
327 | struct gr_gk20a *gr = &g->gr; | ||
328 | |||
329 | if (gr->attrib_cb_default_size == 0U) { | ||
330 | gr->attrib_cb_default_size = | ||
331 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); | ||
332 | } | ||
333 | gr->alpha_cb_default_size = | ||
334 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); | ||
335 | gr->attrib_cb_gfxp_default_size = | ||
336 | gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); | ||
337 | gr->attrib_cb_gfxp_size = | ||
338 | gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); | ||
339 | } | ||
340 | |||
341 | void gr_tu104_free_gr_ctx(struct gk20a *g, | ||
342 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) | ||
343 | { | ||
344 | nvgpu_log_fn(g, " "); | ||
345 | |||
346 | if (gr_ctx != NULL) { | ||
347 | nvgpu_dma_unmap_free(vm, &gr_ctx->gfxp_rtvcb_ctxsw_buffer); | ||
348 | } | ||
349 | |||
350 | gr_gk20a_free_gr_ctx(g, vm, gr_ctx); | ||
351 | } | ||
352 | |||
353 | void gr_tu104_enable_gpc_exceptions(struct gk20a *g) | ||
354 | { | ||
355 | struct gr_gk20a *gr = &g->gr; | ||
356 | u32 tpc_mask; | ||
357 | |||
358 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), | ||
359 | gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); | ||
360 | |||
361 | tpc_mask = | ||
362 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->max_tpc_per_gpc_count) - 1); | ||
363 | |||
364 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), | ||
365 | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) | | ||
366 | gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1) | | ||
367 | gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1))); | ||
368 | } | ||
369 | |||
370 | int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g, | ||
371 | enum ctxsw_addr_type addr_type, | ||
372 | u32 num_tpcs, | ||
373 | u32 num_ppcs, | ||
374 | u32 reg_list_ppc_count, | ||
375 | u32 *__offset_in_segment) | ||
376 | { | ||
377 | u32 offset_in_segment = 0; | ||
378 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, | ||
379 | GPU_LIT_NUM_PES_PER_GPC); | ||
380 | |||
381 | if (addr_type == CTXSW_ADDR_TYPE_TPC) { | ||
382 | /* | ||
383 | * reg = g->netlist_vars->ctxsw_regs.tpc.l; | ||
384 | * offset_in_segment = 0; | ||
385 | */ | ||
386 | } else if (addr_type == CTXSW_ADDR_TYPE_PPC) { | ||
387 | /* | ||
388 | * The ucode stores TPC data before PPC data. | ||
389 | * Advance offset past TPC data to PPC data. | ||
390 | */ | ||
391 | offset_in_segment = | ||
392 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
393 | num_tpcs) << 2); | ||
394 | } else if (addr_type == CTXSW_ADDR_TYPE_GPC) { | ||
395 | /* | ||
396 | * The ucode stores TPC/PPC data before GPC data. | ||
397 | * Advance offset past TPC/PPC data to GPC data. | ||
398 | * | ||
399 | * Note 1 PES_PER_GPC case | ||
400 | */ | ||
401 | if (num_pes_per_gpc > 1U) { | ||
402 | offset_in_segment = | ||
403 | (((g->netlist_vars->ctxsw_regs.tpc.count * | ||
404 | num_tpcs) << 2) + | ||
405 | ((reg_list_ppc_count * num_ppcs) << 2)); | ||
406 | } else { | ||
407 | offset_in_segment = | ||
408 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
409 | num_tpcs) << 2); | ||
410 | } | ||
411 | } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) || | ||
412 | (addr_type == CTXSW_ADDR_TYPE_ETPC)) { | ||
413 | if (num_pes_per_gpc > 1U) { | ||
414 | offset_in_segment = | ||
415 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
416 | num_tpcs) << 2) + | ||
417 | ((reg_list_ppc_count * num_ppcs) << 2) + | ||
418 | (g->netlist_vars->ctxsw_regs.gpc.count << 2); | ||
419 | } else { | ||
420 | offset_in_segment = | ||
421 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
422 | num_tpcs) << 2) + | ||
423 | (g->netlist_vars->ctxsw_regs.gpc.count << 2); | ||
424 | } | ||
425 | |||
426 | /* aligned to next 256 byte */ | ||
427 | offset_in_segment = ALIGN(offset_in_segment, 256); | ||
428 | |||
429 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg, | ||
430 | "egpc etpc offset_in_segment 0x%#08x", | ||
431 | offset_in_segment); | ||
432 | } else { | ||
433 | nvgpu_log_fn(g, "Unknown address type."); | ||
434 | return -EINVAL; | ||
435 | } | ||
436 | |||
437 | *__offset_in_segment = offset_in_segment; | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static void gr_tu104_set_sm_disp_ctrl(struct gk20a *g, u32 data) | ||
442 | { | ||
443 | u32 reg_val; | ||
444 | |||
445 | nvgpu_log_fn(g, " "); | ||
446 | |||
447 | reg_val = nvgpu_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r()); | ||
448 | |||
449 | if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK) | ||
450 | == NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_DISABLE) { | ||
451 | reg_val = set_field(reg_val, | ||
452 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(), | ||
453 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_disable_f() | ||
454 | ); | ||
455 | } else if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK) | ||
456 | == NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_ENABLE) { | ||
457 | reg_val = set_field(reg_val, | ||
458 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(), | ||
459 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_enable_f() | ||
460 | ); | ||
461 | } | ||
462 | |||
463 | nvgpu_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), reg_val); | ||
464 | } | ||
465 | |||
466 | int gr_tu104_handle_sw_method(struct gk20a *g, u32 addr, | ||
467 | u32 class_num, u32 offset, u32 data) | ||
468 | { | ||
469 | nvgpu_log_fn(g, " "); | ||
470 | |||
471 | if (class_num == TURING_COMPUTE_A) { | ||
472 | switch (offset << 2) { | ||
473 | case NVC5C0_SET_SHADER_EXCEPTIONS: | ||
474 | gv11b_gr_set_shader_exceptions(g, data); | ||
475 | break; | ||
476 | case NVC5C0_SET_SKEDCHECK: | ||
477 | gr_gv11b_set_skedcheck(g, data); | ||
478 | break; | ||
479 | case NVC5C0_SET_SM_DISP_CTRL: | ||
480 | gr_tu104_set_sm_disp_ctrl(g, data); | ||
481 | break; | ||
482 | case NVC5C0_SET_SHADER_CUT_COLLECTOR: | ||
483 | gr_gv11b_set_shader_cut_collector(g, data); | ||
484 | break; | ||
485 | default: | ||
486 | goto fail; | ||
487 | } | ||
488 | } | ||
489 | |||
490 | if (class_num == TURING_A) { | ||
491 | switch (offset << 2) { | ||
492 | case NVC597_SET_SHADER_EXCEPTIONS: | ||
493 | gv11b_gr_set_shader_exceptions(g, data); | ||
494 | break; | ||
495 | case NVC597_SET_CIRCULAR_BUFFER_SIZE: | ||
496 | g->ops.gr.set_circular_buffer_size(g, data); | ||
497 | break; | ||
498 | case NVC597_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
499 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
500 | break; | ||
501 | case NVC597_SET_GO_IDLE_TIMEOUT: | ||
502 | gr_gv11b_set_go_idle_timeout(g, data); | ||
503 | break; | ||
504 | case NVC097_SET_COALESCE_BUFFER_SIZE: | ||
505 | gr_gv11b_set_coalesce_buffer_size(g, data); | ||
506 | break; | ||
507 | case NVC597_SET_TEX_IN_DBG: | ||
508 | gr_gv11b_set_tex_in_dbg(g, data); | ||
509 | break; | ||
510 | case NVC597_SET_SKEDCHECK: | ||
511 | gr_gv11b_set_skedcheck(g, data); | ||
512 | break; | ||
513 | case NVC597_SET_BES_CROP_DEBUG3: | ||
514 | g->ops.gr.set_bes_crop_debug3(g, data); | ||
515 | break; | ||
516 | case NVC597_SET_BES_CROP_DEBUG4: | ||
517 | g->ops.gr.set_bes_crop_debug4(g, data); | ||
518 | break; | ||
519 | case NVC597_SET_SM_DISP_CTRL: | ||
520 | gr_tu104_set_sm_disp_ctrl(g, data); | ||
521 | break; | ||
522 | case NVC597_SET_SHADER_CUT_COLLECTOR: | ||
523 | gr_gv11b_set_shader_cut_collector(g, data); | ||
524 | break; | ||
525 | default: | ||
526 | goto fail; | ||
527 | } | ||
528 | } | ||
529 | return 0; | ||
530 | |||
531 | fail: | ||
532 | return -EINVAL; | ||
533 | } | ||
534 | |||
535 | void gr_tu104_init_sm_dsm_reg_info(void) | ||
536 | { | ||
537 | return; | ||
538 | } | ||
539 | |||
540 | void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
541 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
542 | u32 **sm_dsm_perf_ctrl_regs, | ||
543 | u32 *ctrl_register_stride) | ||
544 | { | ||
545 | *num_sm_dsm_perf_ctrl_regs = 0; | ||
546 | *sm_dsm_perf_ctrl_regs = NULL; | ||
547 | *ctrl_register_stride = 0; | ||
548 | } | ||
549 | >>>>>>> CHANGE (f0762e gpu: nvgpu: add speculative barrier) | ||